diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index 653d89f7b..e811c1ff5 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -58,7 +58,7 @@ jobs: - name: build and search disk index with labels using L2 and Cosine metrics (random distributed labels) if: success() || failure() run: | - dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --universal_label 0 --Lf 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50_wlabel -R 16 -L 32 -B 0.00003 -M 1 + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --universal_label 0 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50_wlabel -R 16 -L 32 -B 0.00003 -M 1 dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --filter_label 10 --fail_if_recall_below 50 --index_path_prefix data/disk_index_l2_rand_uint8_10D_10K_norm50_wlabel --result_path /tmp/res --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 - name: build and search in-memory index with labels using L2 and Cosine metrics (zipf distributed labels) if: success() || failure() @@ -70,25 +70,25 @@ jobs: - name: build and search disk index with labels using L2 and Cosine metrics (zipf distributed labels) if: success() || failure() run: | - dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --universal_label 0 --Lf 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel -R 16 -L 32 -B 0.00003 -M 1 + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --universal_label 0 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel -R 16 -L 32 -B 0.00003 -M 1 dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --filter_label 5 --fail_if_recall_below 50 --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel --result_path /tmp/res --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 - name : build and search in-memory and disk index (without universal label, zipf distributed) if: success() || failure() run: | dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal - dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --Lf 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal -R 16 -L 32 -B 0.00003 -M 1 + dist/bin/build_disk_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal -R 16 -L 32 -B 0.00003 -M 1 dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --filter_label 5 --fail_if_recall_below 70 --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel_nouniversal -L 16 32 dist/bin/search_disk_index --data_type uint8 --dist_fn l2 --filter_label 5 --index_path_prefix data/disk_index_l2_zipf_uint8_10D_10K_norm50_wlabel_nouniversal --result_path /tmp/res --query_file data/rand_uint8_10D_1K_norm50.0.bin --gt_file data/l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel_nouniversal --recall_at 5 -L 5 12 -W 2 --num_nodes_to_cache 10 -T 16 - name: Generate combined GT for each query with a separate label and search if: success() || failure() run: | dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/zipf_labels_50_10K.txt --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel - dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --query_filters_file data/query_labels_1K.txt --fail_if_recall_below 70 --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/combined_l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --query_filters_file data/query_labels_1K.txt --fail_if_recall_below 70 --index_path_prefix data/index_l2_zipf_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/combined_l2_zipf_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 - name: build and search in-memory index with pq_dist of 5 with 10 dimensions if: success() || failure() run: | dist/bin/build_memory_index --data_type uint8 --dist_fn l2 --FilteredLbuild 90 --universal_label 0 --data_path data/rand_uint8_10D_10K_norm50.0.bin --label_file data/rand_labels_50_10K.txt --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel --build_PQ_bytes 5 - dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --filter_label 10 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 + dist/bin/search_memory_index --data_type uint8 --dist_fn l2 --filter_label 10 --fail_if_recall_below 70 --index_path_prefix data/index_l2_rand_uint8_10D_10K_norm50_wlabel --query_file data/rand_uint8_10D_1K_norm50.0.bin --recall_at 10 --result_path temp --gt_file data/l2_rand_uint8_10D_10K_norm50.0_10D_1K_norm50.0_gt100_wlabel -L 16 32 - name: Build and search stitched vamana with random and zipf distributed labels if: success() || failure() run: | diff --git a/apps/build_disk_index.cpp b/apps/build_disk_index.cpp index 3d097458b..1edb027da 100644 --- a/apps/build_disk_index.cpp +++ b/apps/build_disk_index.cpp @@ -67,7 +67,7 @@ int main(int argc, char **argv) "can " "assign a special universal filter to the point instead of comma " "separated filters for that point"); - desc.add_options()("FilteredLbuild,Lf", po::value(&Lf)->default_value(0), + desc.add_options()("FilteredLbuild", po::value(&Lf)->default_value(0), "Build complexity for filtered points, higher value " "results in better graphs"); desc.add_options()("filter_threshold,F", po::value(&filter_threshold)->default_value(0), @@ -96,12 +96,7 @@ int main(int argc, char **argv) return -1; } - bool use_filters = false; - if (label_file != "") - { - use_filters = true; - } - + bool use_filters = (label_file != "") ? true : false; diskann::Metric metric; if (dist_fn == std::string("l2")) metric = diskann::Metric::L2; diff --git a/apps/build_memory_index.cpp b/apps/build_memory_index.cpp index 8d483f5c4..d96ad7f50 100644 --- a/apps/build_memory_index.cpp +++ b/apps/build_memory_index.cpp @@ -107,7 +107,7 @@ int main(int argc, char **argv) desc.add_options()("universal_label", po::value(&universal_label)->default_value(""), "Universal label, if using it, only in conjunction with " "labels_file"); - desc.add_options()("FilteredLbuild,Lf", po::value(&Lf)->default_value(0), + desc.add_options()("FilteredLbuild", po::value(&Lf)->default_value(0), "Build complexity for filtered points, higher value " "results in better graphs"); desc.add_options()("label_type", po::value(&label_type)->default_value("uint"), diff --git a/src/index.cpp b/src/index.cpp index 109d93689..55ba60ac9 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2195,7 +2195,7 @@ std::pair Index::search(const T *query, con } if (pos < K) { - diskann::cerr << "Found fewer than K elements for query" << std::endl; + diskann::cerr << "Found pos: " << pos << "fewer than K elements " << K << " for query" << std::endl; } return retval;