Skip to content

Commit 6c0edd7

Browse files
committed
Implement vsim search method params and vsim filtering in HybridQuery
1 parent 527b024 commit 6c0edd7

File tree

3 files changed

+308
-45
lines changed

3 files changed

+308
-45
lines changed

redisvl/query/aggregate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def __init__(
124124
num_results (int, optional): The number of results to return. Defaults to 10.
125125
return_fields (Optional[List[str]], optional): The fields to return. Defaults to None.
126126
stopwords (Optional[Union[str, Set[str]]], optional): The stopwords to remove from the
127-
provided text prior to searchuse. If a string such as "english" "german" is
127+
provided text prior to search-use. If a string such as "english" "german" is
128128
provided then a default set of stopwords for that language will be used. if a list,
129129
set, or tuple of strings is provided then those will be used as stopwords.
130130
Defaults to "english". if set to "None" then no stopwords will be removed.

redisvl/query/hybrid.py

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import Any, Dict, List, Literal, Optional, Set, Union
22

3+
from redis.commands.search.query import Filter
4+
35
from redisvl.utils.full_text_query_helper import FullTextQueryHelper
46

57
try:
@@ -16,7 +18,9 @@
1618

1719

1820
class HybridQuery(_HybridQuery):
19-
"""TBD"""
21+
"""
22+
A hybrid search query that combines text search and vector similarity, with configurable fusion methods.
23+
"""
2024

2125
def __init__(
2226
self,
@@ -25,43 +29,116 @@ def __init__(
2529
vector: Union[bytes, List[float]],
2630
vector_field_name: str,
2731
text_scorer: str = "BM25STD",
28-
filter_expression: Optional[Union[str, FilterExpression]] = None,
32+
text_filter_expression: Optional[Union[str, FilterExpression]] = None,
33+
yield_text_score_as: Optional[str] = None,
2934
vector_search_method: Optional[Literal["KNN", "RANGE"]] = None,
30-
vector_search_method_params: Optional[Dict[str, Any]] = None,
35+
knn_k: Optional[int] = None,
36+
knn_ef_runtime: Optional[int] = None,
37+
range_radius: Optional[float] = None,
38+
range_epsilon: Optional[float] = None,
39+
yield_vsim_score_as: Optional[str] = None,
40+
vector_filter_expression: Optional[Union[str, FilterExpression]] = None,
3141
stopwords: Optional[Union[str, Set[str]]] = "english",
3242
text_weights: Optional[Dict[str, float]] = None,
3343
):
44+
"""
45+
Instantiates a HybridQuery object.
46+
47+
Args:
48+
text: The text to search for.
49+
text_field_name: The text field name to search in.
50+
vector: The vector to perform vector similarity search.
51+
vector_field_name: The vector field name to search in.
52+
text_scorer: The text scorer to use. Options are {TFIDF, TFIDF.DOCNORM,
53+
BM25, DISMAX, DOCSCORE, BM25STD}. Defaults to "BM25STD".
54+
text_filter_expression: The filter expression to use for the text search. Defaults to None.
55+
yield_text_score_as: The name of the field to yield the text score as.
56+
vector_search_method: The vector search method to use. Options are {KNN, RANGE}. Defaults to None.
57+
knn_k: The number of nearest neighbors to return, required if `vector_search_method` is "KNN".
58+
knn_ef_runtime: The exploration factor parameter for HNSW, optional if `vector_search_method` is "KNN".
59+
range_radius: The search radius to use, required if `vector_search_method` is "RANGE".
60+
range_epsilon: The epsilon value to use, optional if `vector_search_method` is "RANGE"; defines the
61+
accuracy of the search.
62+
yield_vsim_score_as: The name of the field to yield the vector similarity score as.
63+
vector_filter_expression: The filter expression to use for the vector similarity search. Defaults to None.
64+
stopwords (Optional[Union[str, Set[str]]], optional): The stopwords to remove from the
65+
provided text prior to search-use. If a string such as "english" "german" is
66+
provided then a default set of stopwords for that language will be used. if a list,
67+
set, or tuple of strings is provided then those will be used as stopwords.
68+
Defaults to "english". if set to "None" then no stopwords will be removed.
69+
70+
Note: This parameter controls query-time stopword filtering (client-side).
71+
For index-level stopwords configuration (server-side), see IndexInfo.stopwords.
72+
Using query-time stopwords with index-level STOPWORDS 0 is counterproductive.
73+
text_weights (Optional[Dict[str, float]]): The importance weighting of individual words
74+
within the query text. Defaults to None, as no modifications will be made to the
75+
text_scorer score.
76+
77+
Raises:
78+
TypeError: If the stopwords are not a set, list, or tuple of strings.
79+
ValueError: If the text string is empty, or if the text string becomes empty after
80+
stopwords are removed.
81+
ValueError: If `vector_search_method` is not one of {KNN, RANGE} (or None).
82+
ValueError: If `vector_search_method` is "KNN" and `knn_k` is not provided.
83+
ValueError: If `vector_search_method` is "RANGE" and `range_radius` is not provided.
84+
"""
3485
self._ft_helper = FullTextQueryHelper(
3586
stopwords=stopwords,
3687
text_weights=text_weights,
3788
)
3889

90+
# Serialize the full-text search query
3991
search_query = HybridSearchQuery(
4092
query_string=self._ft_helper.build_query_string(
4193
text=text,
4294
text_field_name=text_field_name,
43-
filter_expression=filter_expression,
95+
filter_expression=text_filter_expression,
4496
),
4597
scorer=text_scorer,
98+
yield_score_as=yield_text_score_as,
4699
)
47100

101+
# If the vector isn't already bytes, it needs to be represented as a string
48102
if not isinstance(vector, bytes):
49103
vector_data: Union[str, bytes] = str(vector)
50104
else:
51105
vector_data = vector
52106

107+
# Serialize vector similarity search method and params, if specified
53108
vsim_search_method = None
54-
if vector_search_method:
55-
vsim_search_method = VectorSearchMethods(vector_search_method)
109+
vsim_search_method_params = {}
110+
if vector_search_method == "KNN":
111+
vsim_search_method = VectorSearchMethods.KNN
112+
if not knn_k:
113+
raise ValueError("Must provide K if vector_search_method is KNN")
114+
115+
vsim_search_method_params["K"] = knn_k
116+
if knn_ef_runtime:
117+
vsim_search_method_params["EF_RUNTIME"] = knn_ef_runtime
118+
119+
elif vector_search_method == "RANGE":
120+
vsim_search_method = VectorSearchMethods.RANGE
121+
if not range_radius:
122+
raise ValueError("Must provide RADIUS if vector_search_method is RANGE")
123+
124+
vsim_search_method_params["RADIUS"] = range_radius
125+
if range_epsilon:
126+
vsim_search_method_params["EPSILON"] = range_epsilon
127+
128+
elif vector_search_method is not None:
129+
raise ValueError(f"Unknown vector search method: {vector_search_method}")
56130

131+
# Serialize the vector similarity query
57132
vsim_query = HybridVsimQuery(
58133
vector_field_name=vector_field_name,
59134
vector_data=vector_data,
60135
vsim_search_method=vsim_search_method,
61-
vsim_search_method_params=vector_search_method_params,
62-
# TODO: Implement filter
136+
vsim_search_method_params=vsim_search_method_params,
137+
filter=vector_filter_expression and Filter("FILTER", str(vector_filter_expression)),
138+
yield_score_as=yield_vsim_score_as,
63139
)
64140

141+
# Initialize the base HybridQuery
65142
super().__init__(
66143
search_query=search_query,
67144
vector_similarity_query=vsim_query,

0 commit comments

Comments
 (0)