11from typing import Any , Dict , List , Literal , Optional , Set , Union
22
3+ from redis .commands .search .query import Filter
4+
35from redisvl .utils .full_text_query_helper import FullTextQueryHelper
46
57try :
1618
1719
1820class HybridQuery (_HybridQuery ):
19- """TBD"""
21+ """
22+ A hybrid search query that combines text search and vector similarity, with configurable fusion methods.
23+ """
2024
2125 def __init__ (
2226 self ,
@@ -25,43 +29,116 @@ def __init__(
2529 vector : Union [bytes , List [float ]],
2630 vector_field_name : str ,
2731 text_scorer : str = "BM25STD" ,
28- filter_expression : Optional [Union [str , FilterExpression ]] = None ,
32+ text_filter_expression : Optional [Union [str , FilterExpression ]] = None ,
33+ yield_text_score_as : Optional [str ] = None ,
2934 vector_search_method : Optional [Literal ["KNN" , "RANGE" ]] = None ,
30- vector_search_method_params : Optional [Dict [str , Any ]] = None ,
35+ knn_k : Optional [int ] = None ,
36+ knn_ef_runtime : Optional [int ] = None ,
37+ range_radius : Optional [float ] = None ,
38+ range_epsilon : Optional [float ] = None ,
39+ yield_vsim_score_as : Optional [str ] = None ,
40+ vector_filter_expression : Optional [Union [str , FilterExpression ]] = None ,
3141 stopwords : Optional [Union [str , Set [str ]]] = "english" ,
3242 text_weights : Optional [Dict [str , float ]] = None ,
3343 ):
44+ """
45+ Instantiates a HybridQuery object.
46+
47+ Args:
48+ text: The text to search for.
49+ text_field_name: The text field name to search in.
50+ vector: The vector to perform vector similarity search.
51+ vector_field_name: The vector field name to search in.
52+ text_scorer: The text scorer to use. Options are {TFIDF, TFIDF.DOCNORM,
53+ BM25, DISMAX, DOCSCORE, BM25STD}. Defaults to "BM25STD".
54+ text_filter_expression: The filter expression to use for the text search. Defaults to None.
55+ yield_text_score_as: The name of the field to yield the text score as.
56+ vector_search_method: The vector search method to use. Options are {KNN, RANGE}. Defaults to None.
57+ knn_k: The number of nearest neighbors to return, required if `vector_search_method` is "KNN".
58+ knn_ef_runtime: The exploration factor parameter for HNSW, optional if `vector_search_method` is "KNN".
59+ range_radius: The search radius to use, required if `vector_search_method` is "RANGE".
60+ range_epsilon: The epsilon value to use, optional if `vector_search_method` is "RANGE"; defines the
61+ accuracy of the search.
62+ yield_vsim_score_as: The name of the field to yield the vector similarity score as.
63+ vector_filter_expression: The filter expression to use for the vector similarity search. Defaults to None.
64+ stopwords (Optional[Union[str, Set[str]]], optional): The stopwords to remove from the
65+ provided text prior to search-use. If a string such as "english" "german" is
66+ provided then a default set of stopwords for that language will be used. if a list,
67+ set, or tuple of strings is provided then those will be used as stopwords.
68+ Defaults to "english". if set to "None" then no stopwords will be removed.
69+
70+ Note: This parameter controls query-time stopword filtering (client-side).
71+ For index-level stopwords configuration (server-side), see IndexInfo.stopwords.
72+ Using query-time stopwords with index-level STOPWORDS 0 is counterproductive.
73+ text_weights (Optional[Dict[str, float]]): The importance weighting of individual words
74+ within the query text. Defaults to None, as no modifications will be made to the
75+ text_scorer score.
76+
77+ Raises:
78+ TypeError: If the stopwords are not a set, list, or tuple of strings.
79+ ValueError: If the text string is empty, or if the text string becomes empty after
80+ stopwords are removed.
81+ ValueError: If `vector_search_method` is not one of {KNN, RANGE} (or None).
82+ ValueError: If `vector_search_method` is "KNN" and `knn_k` is not provided.
83+ ValueError: If `vector_search_method` is "RANGE" and `range_radius` is not provided.
84+ """
3485 self ._ft_helper = FullTextQueryHelper (
3586 stopwords = stopwords ,
3687 text_weights = text_weights ,
3788 )
3889
90+ # Serialize the full-text search query
3991 search_query = HybridSearchQuery (
4092 query_string = self ._ft_helper .build_query_string (
4193 text = text ,
4294 text_field_name = text_field_name ,
43- filter_expression = filter_expression ,
95+ filter_expression = text_filter_expression ,
4496 ),
4597 scorer = text_scorer ,
98+ yield_score_as = yield_text_score_as ,
4699 )
47100
101+ # If the vector isn't already bytes, it needs to be represented as a string
48102 if not isinstance (vector , bytes ):
49103 vector_data : Union [str , bytes ] = str (vector )
50104 else :
51105 vector_data = vector
52106
107+ # Serialize vector similarity search method and params, if specified
53108 vsim_search_method = None
54- if vector_search_method :
55- vsim_search_method = VectorSearchMethods (vector_search_method )
109+ vsim_search_method_params = {}
110+ if vector_search_method == "KNN" :
111+ vsim_search_method = VectorSearchMethods .KNN
112+ if not knn_k :
113+ raise ValueError ("Must provide K if vector_search_method is KNN" )
114+
115+ vsim_search_method_params ["K" ] = knn_k
116+ if knn_ef_runtime :
117+ vsim_search_method_params ["EF_RUNTIME" ] = knn_ef_runtime
118+
119+ elif vector_search_method == "RANGE" :
120+ vsim_search_method = VectorSearchMethods .RANGE
121+ if not range_radius :
122+ raise ValueError ("Must provide RADIUS if vector_search_method is RANGE" )
123+
124+ vsim_search_method_params ["RADIUS" ] = range_radius
125+ if range_epsilon :
126+ vsim_search_method_params ["EPSILON" ] = range_epsilon
127+
128+ elif vector_search_method is not None :
129+ raise ValueError (f"Unknown vector search method: { vector_search_method } " )
56130
131+ # Serialize the vector similarity query
57132 vsim_query = HybridVsimQuery (
58133 vector_field_name = vector_field_name ,
59134 vector_data = vector_data ,
60135 vsim_search_method = vsim_search_method ,
61- vsim_search_method_params = vector_search_method_params ,
62- # TODO: Implement filter
136+ vsim_search_method_params = vsim_search_method_params ,
137+ filter = vector_filter_expression and Filter ("FILTER" , str (vector_filter_expression )),
138+ yield_score_as = yield_vsim_score_as ,
63139 )
64140
141+ # Initialize the base HybridQuery
65142 super ().__init__ (
66143 search_query = search_query ,
67144 vector_similarity_query = vsim_query ,
0 commit comments