Skip to content

Commit 74ce806

Browse files
committed
Adding cosine support in build_disk_index and ensuring that the dummy map file is written in the correct location
1 parent b2b0942 commit 74ce806

File tree

3 files changed

+33
-3
lines changed

3 files changed

+33
-3
lines changed

apps/build_disk_index.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,21 @@ int main(int argc, char **argv)
103103

104104
bool use_filters = (label_file != "") ? true : false;
105105
diskann::Metric metric;
106-
if (dist_fn == std::string("l2"))
106+
if (dist_fn == std::string("l2"))
107+
{
107108
metric = diskann::Metric::L2;
109+
}
108110
else if (dist_fn == std::string("mips"))
111+
{
109112
metric = diskann::Metric::INNER_PRODUCT;
113+
}
114+
else if (dist_fn == std::string("cosine"))
115+
{
116+
metric = diskann::Metric::COSINE;
117+
}
110118
else
111119
{
112-
std::cout << "Error. Only l2 and mips distance functions are supported" << std::endl;
120+
std::cout << "Error. Only l2, cosine, and mips distance functions are supported" << std::endl;
113121
return -1;
114122
}
115123

src/disk_utils.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -1239,7 +1239,9 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
12391239
augmented_labels_file = index_prefix_path + "_augmented_labels.txt";
12401240
if (filter_threshold != 0)
12411241
{
1242-
dummy_remap_file = index_prefix_path + "_dummy_remap.txt";
1242+
//Changing this filename to "_disk.index_dummy_map.txt" from "_dummy_map.txt" to conform
1243+
//to the convention that index files all share the _disk.index prefix.
1244+
dummy_remap_file = index_prefix_path + "_disk.index_dummy_map.txt";
12431245
breakup_dense_points<T>(data_file_to_use, labels_file_to_use, filter_threshold, augmented_data_file,
12441246
augmented_labels_file,
12451247
dummy_remap_file); // RKNOTE: This has large memory footprint,

src/pq_flash_index.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,8 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
876876
#ifndef EXEC_ENV_OLS
877877
infile.close();
878878
#endif
879+
diskann::cout << "Labels file: " << labels_file << " loaded with " << num_pts_in_label_file << " points"
880+
<< std::endl;
879881

880882
#ifdef EXEC_ENV_OLS
881883
FileContent &content_labels_map = files.getContent(labels_map_file);
@@ -889,6 +891,8 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
889891
map_reader.close();
890892
#endif
891893

894+
diskann::cout << "Labels map file: " << labels_map_file << " loaded." << std::endl;
895+
892896
#ifdef EXEC_ENV_OLS
893897
if (files.fileExists(labels_to_medoids))
894898
{
@@ -902,7 +906,16 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
902906
assert(medoid_stream.is_open());
903907
#endif
904908
load_label_medoid_map(labels_to_medoids, medoid_stream);
909+
diskann::cout << "Loaded labels_to_medoids map from: " << labels_to_medoids << std::endl;
910+
}
911+
else
912+
{
913+
std::stringstream ss;
914+
ss << "Filter support is enabled but " << labels_to_medoids << " file cannot be opened." << std::endl;
915+
diskann::cerr << ss.str();
916+
throw diskann::ANNException(ss.str(), -1);
905917
}
918+
906919
std::string univ_label_file = std ::string(_disk_index_file) + "_universal_label.txt";
907920

908921
#ifdef EXEC_ENV_OLS
@@ -944,6 +957,13 @@ template <typename T, typename LabelT> void PQFlashIndex<T, LabelT>::load_labels
944957
#endif
945958
diskann::cout << "Loaded dummy map" << std::endl;
946959
}
960+
else
961+
{
962+
std::stringstream ss;
963+
ss << "Note: Filter support is enabled but " << dummy_map_file << " file cannot be opened" << std::endl;
964+
diskann::cerr << ss.str();
965+
}
966+
947967
}
948968
else
949969
{

0 commit comments

Comments
 (0)