Hybrid search represents a breakthrough approach combining traditional BM25 algorithm with modern vector embeddings. This technique enables achieving more accurate results than individual methods alone. Discover how to implement hybrid search for your LLM applications and AI agents.
Hybrid Search: Combining BM25 and Vector Search for Better Search¶
In modern RAG (Retrieval-Augmented Generation) systems, we often face a dilemma: use traditional keyword-based search or semantic vector search? The answer is simple – why choose when we can use both approaches simultaneously? Hybrid search combines the power of BM25 algorithm with vector embeddings, providing the best results from both worlds.
Why Hybrid Search?¶
Each approach has its strengths and weaknesses:
- BM25 – excellent for exact match, specific terms, product names, codes
- Vector search – perfect for semantic similarity, synonyms, contextual search
By combining these methods, we achieve better precision and recall, especially in cases where we need to find documents containing both specific keywords and semantically similar content.
Implementing Hybrid Search¶
Data Preparation and Indexing¶
First, let’s prepare data and create necessary indexes. We’ll need both an inverted index for BM25 and a vector database for embeddings.
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer
import faiss
class HybridSearchEngine:
def __init__(self, model_name='sentence-transformers/all-MiniLM-L6-v2'):
self.embedding_model = SentenceTransformer(model_name)
self.tfidf_vectorizer = TfidfVectorizer(
stop_words='english',
max_features=10000,
ngram_range=(1, 2)
)
self.documents = []
self.embeddings = None
self.tfidf_matrix = None
self.faiss_index = None
def index_documents(self, documents):
"""Index documents for both types of search"""
self.documents = documents
# Vector embeddings
embeddings = self.embedding_model.encode(documents)
self.embeddings = embeddings
# FAISS index for fast vector search
dimension = embeddings.shape[1]
self.faiss_index = faiss.IndexFlatIP(dimension)
# Normalize embeddings for cosine similarity
faiss.normalize_L2(embeddings)
self.faiss_index.add(embeddings.astype('float32'))
# TF-IDF for BM25-like search
self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(documents)
BM25 Implementation¶
Let’s implement the BM25 algorithm, which is an improved version of TF-IDF with better document length normalization.
import math
from collections import Counter
class BM25:
def __init__(self, corpus, k1=1.5, b=0.75):
self.k1 = k1
self.b = b
self.corpus = corpus
self.doc_len = [len(doc.split()) for doc in corpus]
self.avgdl = sum(self.doc_len) / len(self.doc_len)
self.doc_freqs = []
self.idf = {}
# Create inverted index
for doc in corpus:
frequencies = Counter(doc.lower().split())
self.doc_freqs.append(frequencies)
for word in frequencies:
if word not in self.idf:
self.idf[word] = 0
self.idf[word] += 1
# Calculate IDF values
for word in self.idf:
self.idf[word] = math.log((len(corpus) - self.idf[word] + 0.5) /
(self.idf[word] + 0.5))
def search(self, query, k=10):
"""Search k most relevant documents using BM25"""
query_words = query.lower().split()
scores = []
for i, doc_freq in enumerate(self.doc_freqs):
score = 0
for word in query_words:
if word in doc_freq:
tf = doc_freq[word]
idf = self.idf.get(word, 0)
# BM25 formula
numerator = tf * (self.k1 + 1)
denominator = (tf + self.k1 * (1 - self.b +
self.b * self.doc_len[i] / self.avgdl))
score += idf * (numerator / denominator)
scores.append((i, score))
# Sort by score
scores.sort(key=lambda x: x[1], reverse=True)
return scores[:k]
Combining Results¶
The key part of hybrid search is combining results from both methods. There are several approaches – we can use weighted sum, rank fusion, or reciprocal rank fusion (RRF).
class HybridSearchEngine:
# ... previous code ...
def hybrid_search(self, query, k=10, alpha=0.7):
"""
Combines BM25 and vector search
alpha: weight for vector search (1-alpha for BM25)
"""
# Vector search
query_embedding = self.embedding_model.encode([query])
faiss.normalize_L2(query_embedding)
vector_scores, vector_indices = self.faiss_index.search(
query_embedding.astype('float32'), k*2
)
# BM25 search
if not hasattr(self, 'bm25'):
self.bm25 = BM25(self.documents)
bm25_results = self.bm25.search(query, k*2)
# Normalize scores to [0,1]
def normalize_scores(scores):
if not scores or max(scores) == min(scores):
return [0.0] * len(scores)
min_score, max_score = min(scores), max(scores)
return [(s - min_score) / (max_score - min_score) for s in scores]
# Create dictionary with all scores
combined_scores = {}
# Vector search scores
norm_vector_scores = normalize_scores(vector_scores[0].tolist())
for idx, score in zip(vector_indices[0], norm_vector_scores):
combined_scores[idx] = alpha * score
# BM25 scores
bm25_scores = [score for _, score in bm25_results]
norm_bm25_scores = normalize_scores(bm25_scores)
for (idx, _), score in zip(bm25_results, norm_bm25_scores):
if idx in combined_scores:
combined_scores[idx] += (1 - alpha) * score
else:
combined_scores[idx] = (1 - alpha) * score
# Sort by combined score
sorted_results = sorted(combined_scores.items(),
key=lambda x: x[1], reverse=True)
return [(idx, score, self.documents[idx])
for idx, score in sorted_results[:k]]
Reciprocal Rank Fusion (RRF)¶
An alternative approach to weighted average is RRF, which combines results based on their ranking instead of absolute scores. This approach is often more robust to different score scales.
def rrf_hybrid_search(self, query, k=10, rrf_constant=60):
"""Hybrid search with Reciprocal Rank Fusion"""
# Get results from both methods
query_embedding = self.embedding_model.encode([query])
faiss.normalize_L2(query_embedding)
vector_scores, vector_indices = self.faiss_index.search(
query_embedding.astype('float32'), k*2
)
bm25_results = self.bm25.search(query, k*2)
# RRF score for each document
rrf_scores = {}
# Vector search - add RRF score by position
for rank, doc_idx in enumerate(vector_indices[0]):
rrf_scores[doc_idx] = 1.0 / (rrf_constant + rank + 1)
# BM25 - add RRF score
for rank, (doc_idx, _) in enumerate(bm25_results):
if doc_idx in rrf_scores:
rrf_scores[doc_idx] += 1.0 / (rrf_constant + rank + 1)
else:
rrf_scores[doc_idx] = 1.0 / (rrf_constant + rank + 1)
# Sort by RRF score
sorted_results = sorted(rrf_scores.items(),
key=lambda x: x[1], reverse=True)
return [(idx, score, self.documents[idx])
for idx, score in sorted_results[:k]]
Practical Usage¶
Let’s see how to use the hybrid search engine in practice:
# Initialization and indexing
documents = [
"Python is a programming language for data science",
"JavaScript is used for web development",
"Machine learning algorithms in Python",
"React framework for frontend development",
"Data analysis using pandas library"
]
search_engine = HybridSearchEngine()
search_engine.index_documents(documents)
# Searching
query = "python programming"
# Classic BM25
print("BM25 results:")
bm25_results = search_engine.bm25.search(query, k=3)
for idx, score in bm25_results:
print(f"Score: {score:.3f} - {documents[idx]}")
print("\nHybrid search results:")
hybrid_results = search_engine.hybrid_search(query, k=3)
for idx, score, doc in hybrid_results:
print(f"Score: {score:.3f} - {doc}")
print("\nRRF results:")
rrf_results = search_engine.rrf_hybrid_search(query, k=3)
for idx, score, doc in rrf_results:
print(f"RRF score: {score:.3f} - {doc}")
Optimization and Tuning¶
To achieve the best results, it’s important to properly set parameters:
- Alpha parameter – weight between vector (alpha) and BM25 (1-alpha) search. Start with value 0.7
- BM25 parameters – k1 (1.2-2.0) and b (0.75) depending on document type
- Vector model – choose model suitable for your domain and language
- RRF constant – typically 60, but may need adjustment based on data
def evaluate_hybrid_search(self, test_queries, relevance_judgments, alphas):
"""Evaluate different alpha values"""
best_alpha = 0.7
best_score = 0
for alpha in alphas:
scores = []
for query in test_queries:
results = self.hybrid_search(query, alpha=alpha)
# Calculate precision@k or other metrics
score = calculate_precision_at_k(results, relevance_judgments[query])
scores.append(score)
avg_score = sum(scores) / len(scores)
if avg_score > best_score:
best_score = avg_score
best_alpha = alpha
return best_alpha, best_score
Summary¶
Hybrid search combining BM25 and vector search provides a robust solution for modern search systems. While BM25 excels at exact matching of specific terms, vector search can capture semantic similarity. Their combination using weighted average or RRF delivers better results than either approach alone. The key to success is proper parameter tuning and testing on real data from your domain.