Skip to content

Commit a4e50c9

Browse files
Migrated the testcase to use _vectors instead
1 parent 83f543d commit a4e50c9

File tree

1 file changed

+65
-47
lines changed

1 file changed

+65
-47
lines changed

Diff for: src/search.rs

+65-47
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,12 @@ pub struct SearchQuery<'a, Http: HttpClient> {
350350
#[cfg(feature = "experimental-vector-search")]
351351
#[serde(skip_serializing_if = "Option::is_none")]
352352
pub hybrid: Option<HybridSearch<'a>>,
353+
354+
/// EXPERIMENTAL
355+
/// Defines what vectors an userprovided embedder has gotten for semantic searching
356+
#[cfg(feature = "experimental-vector-search")]
357+
#[serde(skip_serializing_if = "Option::is_none")]
358+
pub vector: Option<&'a [f32]>,
353359
}
354360

355361
#[allow(missing_docs)]
@@ -380,6 +386,8 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
380386
index_uid: None,
381387
#[cfg(feature = "experimental-vector-search")]
382388
hybrid: None,
389+
#[cfg(feature = "experimental-vector-search")]
390+
vector: None,
383391
}
384392
}
385393
pub fn with_query<'b>(&'b mut self, query: &'a str) -> &'b mut SearchQuery<'a, Http> {
@@ -563,6 +571,8 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
563571
self.index_uid = Some(&self.index.uid);
564572
self
565573
}
574+
/// EXPERIMENTAL
575+
/// Defines whether to utilise previously defined embedders for semantic searching
566576
#[cfg(feature = "experimental-vector-search")]
567577
pub fn with_hybrid<'b>(
568578
&'b mut self,
@@ -575,6 +585,13 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
575585
});
576586
self
577587
}
588+
/// EXPERIMENTAL
589+
/// Defines what vectors an userprovided embedder has gotten for semantic searching
590+
#[cfg(feature = "experimental-vector-search")]
591+
pub fn with_vector<'b>(&'b mut self, vector: &'a [f32]) -> &'b mut SearchQuery<'a, Http> {
592+
self.vector = Some(vector);
593+
self
594+
}
578595

579596
#[must_use]
580597
pub fn build(&mut self) -> SearchQuery<'a, Http> {
@@ -650,7 +667,6 @@ mod tests {
650667
use meilisearch_test_macro::meilisearch_test;
651668
use serde::{Deserialize, Serialize};
652669
use serde_json::{json, Map, Value};
653-
use std::time::Duration;
654670

655671
#[derive(Debug, Serialize, Deserialize, PartialEq)]
656672
struct Nested {
@@ -664,6 +680,7 @@ mod tests {
664680
kind: String,
665681
number: i32,
666682
nested: Nested,
683+
_vectors: HashMap<String, Vec<f32>>,
667684
}
668685

669686
impl PartialEq<Map<String, Value>> for Document {
@@ -677,31 +694,25 @@ mod tests {
677694

678695
async fn setup_test_index(client: &Client, index: &Index) -> Result<(), Error> {
679696
let t0 = index.add_documents(&[
680-
Document { id: 0, kind: "text".into(), number: 0, value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."), nested: Nested { child: S("first") } },
681-
Document { id: 1, kind: "text".into(), number: 10, value: S("dolor sit amet, consectetur adipiscing elit"), nested: Nested { child: S("second") } },
682-
Document { id: 2, kind: "title".into(), number: 20, value: S("The Social Network"), nested: Nested { child: S("third") } },
683-
Document { id: 3, kind: "title".into(), number: 30, value: S("Harry Potter and the Sorcerer's Stone"), nested: Nested { child: S("fourth") } },
684-
Document { id: 4, kind: "title".into(), number: 40, value: S("Harry Potter and the Chamber of Secrets"), nested: Nested { child: S("fift") } },
685-
Document { id: 5, kind: "title".into(), number: 50, value: S("Harry Potter and the Prisoner of Azkaban"), nested: Nested { child: S("sixth") } },
686-
Document { id: 6, kind: "title".into(), number: 60, value: S("Harry Potter and the Goblet of Fire"), nested: Nested { child: S("seventh") } },
687-
Document { id: 7, kind: "title".into(), number: 70, value: S("Harry Potter and the Order of the Phoenix"), nested: Nested { child: S("eighth") } },
688-
Document { id: 8, kind: "title".into(), number: 80, value: S("Harry Potter and the Half-Blood Prince"), nested: Nested { child: S("ninth") } },
689-
Document { id: 9, kind: "title".into(), number: 90, value: S("Harry Potter and the Deathly Hallows"), nested: Nested { child: S("tenth") } },
697+
Document { id: 0, kind: "text".into(), number: 0, value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."), nested: Nested { child: S("first") }, _vectors: HashMap::from([(S("default"), vec![1000.0])])},
698+
Document { id: 1, kind: "text".into(), number: 10, value: S("dolor sit amet, consectetur adipiscing elit"), nested: Nested { child: S("second") }, _vectors: HashMap::from([(S("default"), vec![2000.0])]) },
699+
Document { id: 2, kind: "title".into(), number: 20, value: S("The Social Network"), nested: Nested { child: S("third") }, _vectors: HashMap::from([(S("default"), vec![3000.0])]) },
700+
Document { id: 3, kind: "title".into(), number: 30, value: S("Harry Potter and the Sorcerer's Stone"), nested: Nested { child: S("fourth") }, _vectors: HashMap::from([(S("default"), vec![4000.0])]) },
701+
Document { id: 4, kind: "title".into(), number: 40, value: S("Harry Potter and the Chamber of Secrets"), nested: Nested { child: S("fift") }, _vectors: HashMap::from([(S("default"), vec![5000.0])]) },
702+
Document { id: 5, kind: "title".into(), number: 50, value: S("Harry Potter and the Prisoner of Azkaban"), nested: Nested { child: S("sixth") }, _vectors: HashMap::from([(S("default"), vec![6000.0])]) },
703+
Document { id: 6, kind: "title".into(), number: 60, value: S("Harry Potter and the Goblet of Fire"), nested: Nested { child: S("seventh") }, _vectors: HashMap::from([(S("default"), vec![7000.0])]) },
704+
Document { id: 7, kind: "title".into(), number: 70, value: S("Harry Potter and the Order of the Phoenix"), nested: Nested { child: S("eighth") }, _vectors: HashMap::from([(S("default"), vec![8000.0])]) },
705+
Document { id: 8, kind: "title".into(), number: 80, value: S("Harry Potter and the Half-Blood Prince"), nested: Nested { child: S("ninth") }, _vectors: HashMap::from([(S("default"), vec![9000.0])]) },
706+
Document { id: 9, kind: "title".into(), number: 90, value: S("Harry Potter and the Deathly Hallows"), nested: Nested { child: S("tenth") }, _vectors: HashMap::from([(S("default"), vec![10000.0])]) },
690707
], None).await?;
691708
let t1 = index
692709
.set_filterable_attributes(["kind", "value", "number"])
693710
.await?;
694711
let t2 = index.set_sortable_attributes(["title"]).await?;
695712

696-
// the vector search has longer indexing times leading to the timeout being triggered
697-
let timeout = if cfg!(feature = "experimental-vector-search") {
698-
Some(Duration::from_secs(120))
699-
} else {
700-
None
701-
};
702-
t2.wait_for_completion(client, None, timeout).await?;
703-
t1.wait_for_completion(client, None, timeout).await?;
704-
t0.wait_for_completion(client, None, timeout).await?;
713+
t2.wait_for_completion(client, None, None).await?;
714+
t1.wait_for_completion(client, None, None).await?;
715+
t0.wait_for_completion(client, None, None).await?;
705716

706717
Ok(())
707718
}
@@ -780,7 +791,8 @@ mod tests {
780791
value: S("dolor sit amet, consectetur adipiscing elit"),
781792
kind: S("text"),
782793
number: 10,
783-
nested: Nested { child: S("second") }
794+
nested: Nested { child: S("second") },
795+
_vectors: HashMap::from([(S("default"), vec![2000.0])]),
784796
},
785797
&results.hits[0].result
786798
);
@@ -952,7 +964,8 @@ mod tests {
952964
value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do…"),
953965
kind: S("text"),
954966
number: 0,
955-
nested: Nested { child: S("first") }
967+
nested: Nested { child: S("first") },
968+
_vectors: HashMap::from([(S("default"), vec![1000.0])])
956969
},
957970
results.hits[0].formatted_result.as_ref().unwrap()
958971
);
@@ -967,7 +980,8 @@ mod tests {
967980
value: S("Lorem ipsum dolor sit amet…"),
968981
kind: S("text"),
969982
number: 0,
970-
nested: Nested { child: S("first") }
983+
nested: Nested { child: S("first") },
984+
_vectors: HashMap::from([(S("default"), vec![1000.0])])
971985
},
972986
results.hits[0].formatted_result.as_ref().unwrap()
973987
);
@@ -988,7 +1002,8 @@ mod tests {
9881002
value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."),
9891003
kind: S("text"),
9901004
number: 0,
991-
nested: Nested { child: S("first") }
1005+
nested: Nested { child: S("first") },
1006+
_vectors: HashMap::from([(S("default"), vec![1000.0])])
9921007
},
9931008
results.hits[0].formatted_result.as_ref().unwrap());
9941009

@@ -1003,7 +1018,8 @@ mod tests {
10031018
value: S("Lorem ipsum dolor sit amet…"),
10041019
kind: S("text"),
10051020
number: 0,
1006-
nested: Nested { child: S("first") }
1021+
nested: Nested { child: S("first") },
1022+
_vectors: HashMap::from([(S("default"), vec![1000.0])])
10071023
},
10081024
results.hits[0].formatted_result.as_ref().unwrap()
10091025
);
@@ -1028,7 +1044,8 @@ mod tests {
10281044
value: S("(ꈍᴗꈍ)sed do eiusmod tempor incididunt ut(ꈍᴗꈍ)"),
10291045
kind: S("text"),
10301046
number: 0,
1031-
nested: Nested { child: S("first") }
1047+
nested: Nested { child: S("first") },
1048+
_vectors: HashMap::from([(S("default"), vec![1000.0])]),
10321049
},
10331050
results.hits[0].formatted_result.as_ref().unwrap()
10341051
);
@@ -1055,7 +1072,8 @@ mod tests {
10551072
value: S("The (⊃。•́‿•̀。)⊃ Social ⊂(´• ω •`⊂) Network"),
10561073
kind: S("title"),
10571074
number: 20,
1058-
nested: Nested { child: S("third") }
1075+
nested: Nested { child: S("third") },
1076+
_vectors: HashMap::from([(S("default"), vec![3000.0])])
10591077
},
10601078
results.hits[0].formatted_result.as_ref().unwrap()
10611079
);
@@ -1077,7 +1095,8 @@ mod tests {
10771095
value: S("<em>dolor</em> sit amet, consectetur adipiscing elit"),
10781096
kind: S("<em>text</em>"),
10791097
number: 10,
1080-
nested: Nested { child: S("first") }
1098+
nested: Nested { child: S("second") },
1099+
_vectors: HashMap::from([(S("default"), vec![1000.0])]),
10811100
},
10821101
results.hits[0].formatted_result.as_ref().unwrap(),
10831102
);
@@ -1092,7 +1111,8 @@ mod tests {
10921111
value: S("<em>dolor</em> sit amet, consectetur adipiscing elit"),
10931112
kind: S("text"),
10941113
number: 10,
1095-
nested: Nested { child: S("first") }
1114+
nested: Nested { child: S("second") },
1115+
_vectors: HashMap::from([(S("default"), vec![2000.0])])
10961116
},
10971117
results.hits[0].formatted_result.as_ref().unwrap()
10981118
);
@@ -1223,20 +1243,16 @@ mod tests {
12231243
#[cfg(feature = "experimental-vector-search")]
12241244
#[meilisearch_test]
12251245
async fn test_hybrid(client: Client, index: Index) -> Result<(), Error> {
1226-
use crate::settings::{Embedder, HuggingFaceEmbedderSettings};
1227-
log::warn!("You are executing the vector search test. This WILL take a while and might lead to timeouts in other tests. You can disable this testcase by not enabling the `experimental-vector-search`-feature and running this ");
1246+
use crate::settings::{Embedder, UserProvidedEmbedderSettings};
12281247
// enable vector searching and configure an embedder
12291248
let features = crate::features::ExperimentalFeatures::new(&client)
12301249
.set_vector_store(true)
12311250
.update()
12321251
.await
12331252
.expect("could not enable the vector store");
12341253
assert_eq!(features.vector_store, true);
1235-
let embedder_setting = Embedder::HuggingFace(HuggingFaceEmbedderSettings {
1236-
model: Some("BAAI/bge-base-en-v1.5".into()),
1237-
revision: None,
1238-
document_template: Some("{{ doc.value }}".into()),
1239-
});
1254+
let embedder_setting =
1255+
Embedder::UserProvided(UserProvidedEmbedderSettings { dimensions: 1 });
12401256
let t3 = index
12411257
.set_settings(&crate::settings::Settings {
12421258
embedders: Some(HashMap::from([("default".to_string(), embedder_setting)])),
@@ -1247,11 +1263,16 @@ mod tests {
12471263

12481264
setup_test_index(&client, &index).await?;
12491265

1250-
// "zweite" = "second" in german
1251-
// => an embedding should be able to detect that this is equivalent, but not the regular search
1266+
// "2nd" = "second"
1267+
// no semantic searching => no matches
1268+
let results: SearchResults<Document> = index.search().with_query("2nd").execute().await?;
1269+
assert_eq!(results.hits.len(), 0);
1270+
1271+
// an embedding should be able to detect that this is equivalent, but not the regular search
12521272
let results: SearchResults<Document> = index
12531273
.search()
1254-
.with_query("Facebook")
1274+
.with_query("2nd")
1275+
.with_vector(&[2000.0])
12551276
.with_hybrid("default", 1.0) // entirely rely on semantic searching
12561277
.execute()
12571278
.await?;
@@ -1263,30 +1284,26 @@ mod tests {
12631284
kind: S("text"),
12641285
number: 10,
12651286
nested: Nested { child: S("second") },
1287+
_vectors: HashMap::from([(S("default"), vec![2000.0])])
12661288
},
12671289
&results.hits[0].result
12681290
);
1269-
let results: SearchResults<Document> = index
1270-
.search()
1271-
.with_query("zweite")
1272-
.with_hybrid("default", 0.0) // no semantic searching => no matches
1273-
.execute()
1274-
.await?;
1275-
assert_eq!(results.hits.len(), 0);
12761291

12771292
// word that has a typo => would have been found via traditional means
12781293
// if entirely relying on semantic searching, no result is found
12791294
let results: SearchResults<Document> = index
12801295
.search()
12811296
.with_query("lohrem")
12821297
.with_hybrid("default", 1.0)
1298+
.with_vector(&[1000.0])
12831299
.execute()
12841300
.await?;
12851301
assert_eq!(results.hits.len(), 0);
12861302
let results: SearchResults<Document> = index
12871303
.search()
12881304
.with_query("lohrem")
12891305
.with_hybrid("default", 0.0)
1306+
.with_vector(&[1000.0])
12901307
.execute()
12911308
.await?;
12921309
assert_eq!(results.hits.len(), 1);
@@ -1296,7 +1313,8 @@ mod tests {
12961313
value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."),
12971314
kind: S("text"),
12981315
number: 0,
1299-
nested: Nested { child: S("first") }
1316+
nested: Nested { child: S("first") },
1317+
_vectors: HashMap::from([(S("default"), vec![1000.0])]),
13001318
},
13011319
&results.hits[0].result
13021320
);

0 commit comments

Comments
 (0)