Source code for redisvl.query.hybrid

from typing import Any, Literal

from redis.commands.search.query import Filter

from redisvl.query.filter import FilterExpression
from redisvl.redis.utils import array_to_buffer
from redisvl.utils.full_text_query_helper import FullTextQueryHelper

_IMPORT_ERROR_MESSAGE = "Hybrid queries require Redis >= 8.4.0 and redis-py>=7.1.0"


[docs] class HybridQuery: """ A hybrid search query that combines text search and vector similarity, with configurable fusion methods. .. code-block:: python from redisvl.query import HybridQuery from redisvl.index import SearchIndex index = SearchIndex.from_yaml("path/to/index.yaml") query = HybridQuery( text="example text", text_field_name="text_field", vector=[0.1, 0.2, 0.3], vector_field_name="vector_field", text_scorer="BM25STD", yield_text_score_as="text_score", yield_vsim_score_as="vector_similarity", combination_method="LINEAR", linear_alpha=0.3, yield_combined_score_as="hybrid_score", num_results=10, return_fields=["field1", "field2"], stopwords="english", ) results = index.query(query) See Also: - `FT.HYBRID command documentation <https://redis.io/docs/latest/commands/ft.hybrid>`_ - `redis-py hybrid_search documentation <https://redis.readthedocs.io/en/stable/redismodules.html#redis.commands.search.commands.SearchCommands.hybrid_search>`_ """ def __init__( self, text: str, text_field_name: str, vector: bytes | list[float], vector_field_name: str, vector_param_name: str = "vector", text_scorer: str = "BM25STD", yield_text_score_as: str | None = None, vector_search_method: Literal["KNN", "RANGE"] | None = None, knn_ef_runtime: int = 10, range_radius: float | None = None, range_epsilon: float = 0.01, yield_vsim_score_as: str | None = None, filter_expression: str | FilterExpression | None = None, combination_method: Literal["RRF", "LINEAR"] | None = None, rrf_window: int = 20, rrf_constant: int = 60, linear_alpha: float = 0.3, yield_combined_score_as: str | None = None, dtype: str = "float32", num_results: int | None = 10, return_fields: list[str] | None = None, stopwords: str | set[str] | None = "english", text_weights: dict[str, float] | None = None, ): """ Instantiates a HybridQuery object. Args: text: The text to search for. text_field_name: The text field name to search in. vector: The vector to perform vector similarity search. vector_field_name: The vector field name to search in. vector_param_name: The name of the parameter substitution containing the vector blob. text_scorer: The text scorer to use. Options are {TFIDF, TFIDF.DOCNORM, BM25STD, BM25STD.NORM, BM25STD.TANH, DISMAX, DOCSCORE, HAMMING}. Defaults to "BM25STD". For more information about supported scoring algorithms, see https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/scoring/ yield_text_score_as: The name of the field to yield the text score as. vector_search_method: The vector search method to use. Options are {KNN, RANGE}. Defaults to None. knn_ef_runtime: The exploration factor parameter for HNSW, optional if `vector_search_method` is "KNN". range_radius: The search radius to use, required if `vector_search_method` is "RANGE". range_epsilon: The epsilon value to use, optional if `vector_search_method` is "RANGE"; defines the accuracy of the search. yield_vsim_score_as: The name of the field to yield the vector similarity score as. filter_expression: The filter expression to use for both the text and vector searches. Defaults to None. combination_method: The combination method to use. Options are {RRF, LINEAR}. If not specified, the server defaults to RRF. If "RRF" is specified, then at least one of `rrf_window` or `rrf_constant` must be provided. If "LINEAR" is specified, then at least one of `linear_alpha` or `linear_beta` must be provided. rrf_window: The window size to use for the reciprocal rank fusion (RRF) combination method. Limits fusion scope. rrf_constant: The constant to use for the reciprocal rank fusion (RRF) combination method. Controls decay of rank influence. linear_alpha: The weight of the text query for the linear combination method (LINEAR). yield_combined_score_as: The name of the field to yield the combined score as. dtype: The data type of the vector. Defaults to "float32". num_results: The number of results to return. return_fields: The fields to return. Defaults to None. stopwords (Optional[Union[str, Set[str]]], optional): The stopwords to remove from the provided text prior to search-use. If a string such as "english" "german" is provided then a default set of stopwords for that language will be used. if a list, set, or tuple of strings is provided then those will be used as stopwords. Defaults to "english". if set to "None" then no stopwords will be removed. Note: This parameter controls query-time stopword filtering (client-side). For index-level stopwords configuration (server-side), see IndexInfo.stopwords. Using query-time stopwords with index-level STOPWORDS 0 is counterproductive. text_weights (Optional[Dict[str, float]]): The importance weighting of individual words within the query text. Defaults to None, as no modifications will be made to the text_scorer score. Raises: ImportError: If redis-py>=7.1.0 is not installed. TypeError: If the stopwords are not a set, list, or tuple of strings. ValueError: If the text string is empty, or if the text string becomes empty after stopwords are removed. ValueError: If `vector_search_method` is defined and isn't one of {KNN, RANGE}. ValueError: If `vector_search_method` is "KNN" and `knn_k` is not provided. ValueError: If `vector_search_method` is "RANGE" and `range_radius` is not provided. """ try: from redis.commands.search.hybrid_query import ( CombineResultsMethod, HybridPostProcessingConfig, ) except (ImportError, ModuleNotFoundError): raise ImportError(_IMPORT_ERROR_MESSAGE) self.postprocessing_config = HybridPostProcessingConfig() if num_results: self.postprocessing_config.limit(offset=0, num=num_results) if return_fields: self.postprocessing_config.load(*(f"@{f}" for f in return_fields)) self._ft_helper = FullTextQueryHelper( stopwords=stopwords, text_weights=text_weights, ) query_string = self._ft_helper.build_query_string( text, text_field_name, filter_expression ) if isinstance(vector, bytes): vector_data = vector else: vector_data = array_to_buffer(vector, dtype) self.params = { vector_param_name: vector_data, } self.query = build_base_query( text_query=query_string, vector_param_name=vector_param_name, vector_field_name=vector_field_name, text_scorer=text_scorer, yield_text_score_as=yield_text_score_as, vector_search_method=vector_search_method, num_results=num_results, knn_ef_runtime=knn_ef_runtime, range_radius=range_radius, range_epsilon=range_epsilon, yield_vsim_score_as=yield_vsim_score_as, filter_expression=filter_expression, ) if combination_method: self.combination_method: CombineResultsMethod | None = ( build_combination_method( combination_method=combination_method, rrf_window=rrf_window, rrf_constant=rrf_constant, linear_alpha=linear_alpha, yield_score_as=yield_combined_score_as, ) ) else: self.combination_method = None
def build_base_query( text_query: str, vector_param_name: str, vector_field_name: str, text_scorer: str = "BM25STD", yield_text_score_as: str | None = None, vector_search_method: Literal["KNN", "RANGE"] | None = None, num_results: int | None = None, knn_ef_runtime: int | None = None, range_radius: float | None = None, range_epsilon: float | None = None, yield_vsim_score_as: str | None = None, filter_expression: str | FilterExpression | None = None, ): """Build a Redis HybridQuery for performing hybrid search. Args: text_query: The query for the text search. vector_param_name: The name of the parameter substitution containing the vector blob. vector_field_name: The vector field name to search in. text_scorer: The text scorer to use. Options are {TFIDF, TFIDF.DOCNORM, BM25STD, BM25STD.NORM, BM25STD.TANH, DISMAX, DOCSCORE, HAMMING}. Defaults to "BM25STD". For more information about supported scroring algorithms, see https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/scoring/ yield_text_score_as: The name of the field to yield the text score as. vector_search_method: The vector search method to use. Options are {KNN, RANGE}. Defaults to None. num_results: The number of nearest neighbors to return, required if `vector_search_method` is "KNN". knn_ef_runtime: The exploration factor parameter for HNSW, optional if `vector_search_method` is "KNN". range_radius: The search radius to use, required if `vector_search_method` is "RANGE". range_epsilon: The epsilon value to use, optional if `vector_search_method` is "RANGE"; defines the accuracy of the search. yield_vsim_score_as: The name of the field to yield the vector similarity score as. filter_expression: The filter expression to use for the vector similarity search. Defaults to None. Notes: If RRF combination method is used, then at least one of `rrf_window` or `rrf_constant` must be provided. If LINEAR combination method is used, then at least one of `linear_alpha` or `linear_beta` must be provided. Raises: ImportError: If redis-py>=7.1.0 is not installed. ValueError: If `vector_search_method` is defined and isn't one of {KNN, RANGE}. ValueError: If `vector_search_method` is "KNN" and `knn_k` is not provided. ValueError: If `vector_search_method` is "RANGE" and `range_radius` is not provided. Returns: A Redis HybridQuery object that defines the text and vector searches to be performed. """ try: from redis.commands.search.hybrid_query import HybridQuery as RedisHybridQuery from redis.commands.search.hybrid_query import ( HybridSearchQuery, HybridVsimQuery, VectorSearchMethods, ) except (ImportError, ModuleNotFoundError): raise ImportError(_IMPORT_ERROR_MESSAGE) # Serialize the full-text search query search_query = HybridSearchQuery( query_string=text_query, scorer=text_scorer, yield_score_as=yield_text_score_as, ) # Serialize vector similarity search method and params, if specified vsim_search_method: VectorSearchMethods | None = None vsim_search_method_params: dict[str, Any] = {} if vector_search_method == "KNN": vsim_search_method = VectorSearchMethods.KNN if not num_results: raise ValueError( "Must provide `num_results` if vector_search_method is KNN" ) vsim_search_method_params["K"] = num_results if knn_ef_runtime: vsim_search_method_params["EF_RUNTIME"] = knn_ef_runtime elif vector_search_method == "RANGE": vsim_search_method = VectorSearchMethods.RANGE if not range_radius: raise ValueError("Must provide RADIUS if vector_search_method is RANGE") vsim_search_method_params["RADIUS"] = range_radius if range_epsilon: vsim_search_method_params["EPSILON"] = range_epsilon elif vector_search_method is not None: raise ValueError(f"Unknown vector search method: {vector_search_method}") if isinstance(filter_expression, FilterExpression): filter_expression = str(filter_expression) if filter_expression and filter_expression != "*": vsim_filter = Filter("FILTER", str(filter_expression)) else: vsim_filter = None # Serialize the vector similarity query vsim_query = HybridVsimQuery( vector_field_name="@" + vector_field_name, vector_data="$" + vector_param_name, vsim_search_method=vsim_search_method, vsim_search_method_params=vsim_search_method_params, filter=vsim_filter, yield_score_as=yield_vsim_score_as, ) return RedisHybridQuery( search_query=search_query, vector_similarity_query=vsim_query, ) def build_combination_method( combination_method: Literal["RRF", "LINEAR"], rrf_window: int | None = None, rrf_constant: float | None = None, linear_alpha: float | None = None, yield_score_as: str | None = None, ): """Build a configuration for combining hybrid search scores. Args: combination_method: The combination method to use. Options are {RRF, LINEAR}. rrf_window: The window size to use for the reciprocal rank fusion (RRF) combination method. Limits fusion scope. rrf_constant: The constant to use for the reciprocal rank fusion (RRF) combination method. Controls decay of rank influence. linear_alpha: The weight of the first query for the linear combination method (LINEAR). yield_score_as: The name of the field to yield the combined score as. Raises: ImportError: If redis-py>=7.1.0 is not installed. ValueError: If `combination_method` is defined and isn't one of {RRF, LINEAR}. ValueError: If `combination_method` is "RRF" and neither `rrf_window` nor `rrf_constant` is provided. ValueError: If `combination_method` is "LINEAR" and neither `linear_alpha` nor `linear_beta` is provided. Returns: A CombineResultsMethod object that defines how the text and vector scores should be combined. """ try: from redis.commands.search.hybrid_query import ( CombinationMethods, CombineResultsMethod, ) except (ImportError, ModuleNotFoundError): raise ImportError(_IMPORT_ERROR_MESSAGE) method_params: dict[str, Any] = {} if combination_method == "RRF": method = CombinationMethods.RRF if rrf_window: method_params["WINDOW"] = rrf_window if rrf_constant: method_params["CONSTANT"] = rrf_constant elif combination_method == "LINEAR": method = CombinationMethods.LINEAR if linear_alpha: method_params["ALPHA"] = linear_alpha method_params["BETA"] = 1 - linear_alpha else: raise ValueError(f"Unknown combination method: {combination_method}") if yield_score_as: method_params["YIELD_SCORE_AS"] = yield_score_as if not method_params: raise ValueError( "No parameters provided for combination method - must provide at least one parameter." ) return CombineResultsMethod( method=method, **method_params, )