Skip to content

Commit

Permalink
Merge branch 'users/suryangupta/align_latency_timer_to_rust_in_disk_s…
Browse files Browse the repository at this point in the history
…earch' into users/suryangupta/remove_restriction_of_caching_at_most_ten_percent_nodes_in_disk_search
  • Loading branch information
Suryansh Gupta committed Jan 29, 2025
2 parents f38e379 + 68e7bb6 commit cbc76f8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 18 deletions.
11 changes: 5 additions & 6 deletions python/include/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ void build_disk_index(diskann::Metric metric, const std::string &data_file_path,

template <typename DT, typename TagT = DynamicIdType, typename LabelT = filterT>
void build_memory_index(diskann::Metric metric, const std::string &vector_bin_path,
const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity,
float alpha, uint32_t num_threads, bool use_pq_build,
size_t num_pq_bytes, bool use_opq, bool use_tags = false,
const std::string& filter_labels_file = "", const std::string& universal_label = "",
uint32_t filter_complexity = 0);
const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity, float alpha,
uint32_t num_threads, bool use_pq_build, size_t num_pq_bytes, bool use_opq,
bool use_tags = false, const std::string &filter_labels_file = "",
const std::string &universal_label = "", uint32_t filter_complexity = 0);

}
} // namespace diskannpy
18 changes: 8 additions & 10 deletions python/include/dynamic_memory_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ namespace py = pybind11;
namespace diskannpy
{

template <typename DT>
class DynamicMemoryIndex
template <typename DT> class DynamicMemoryIndex
{
public:
DynamicMemoryIndex(diskann::Metric m, size_t dimensions, size_t max_vectors, uint32_t complexity,
Expand All @@ -31,19 +30,18 @@ class DynamicMemoryIndex
void load(const std::string &index_path);
int insert(const py::array_t<DT, py::array::c_style | py::array::forcecast> &vector, DynamicIdType id);
py::array_t<int> batch_insert(py::array_t<DT, py::array::c_style | py::array::forcecast> &vectors,
py::array_t<DynamicIdType, py::array::c_style | py::array::forcecast> &ids, int32_t num_inserts,
int num_threads = 0);
py::array_t<DynamicIdType, py::array::c_style | py::array::forcecast> &ids,
int32_t num_inserts, int num_threads = 0);
int mark_deleted(DynamicIdType id);
void save(const std::string &save_path, bool compact_before_save = false);
NeighborsAndDistances<DynamicIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn,
uint64_t complexity);
NeighborsAndDistances<DynamicIdType> batch_search(py::array_t<DT, py::array::c_style | py::array::forcecast> &queries,
uint64_t num_queries, uint64_t knn, uint64_t complexity,
uint32_t num_threads);
NeighborsAndDistances<DynamicIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query,
uint64_t knn, uint64_t complexity);
NeighborsAndDistances<DynamicIdType> batch_search(
py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, uint64_t num_queries, uint64_t knn,
uint64_t complexity, uint32_t num_threads);
void consolidate_delete();
size_t num_points();


private:
const uint32_t _initial_search_complexity;
const diskann::IndexWriteParameters _write_parameters;
Expand Down
8 changes: 6 additions & 2 deletions src/pq_flash_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
if (beam_width > num_sector_per_nodes * defaults::MAX_N_SECTOR_READS)
throw ANNException("Beamwidth can not be higher than defaults::MAX_N_SECTOR_READS", -1, __FUNCSIG__, __FILE__,
__LINE__);

Timer query_timer, io_timer, cpu_timer;
ScratchStoreManager<SSDThreadData<T>> manager(this->_thread_data);
auto data = manager.scratch_space();
IOContext &ctx = data->ctx;
Expand Down Expand Up @@ -1333,11 +1333,16 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
const uint64_t num_sectors_per_node =
_nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN);

cpu_timer.reset();
// query <-> PQ chunk centers distances
_pq_table.preprocess_query(query_rotated); // center the query and rotate if
// we have a rotation matrix
float *pq_dists = pq_query_scratch->aligned_pqtable_dist_scratch;
_pq_table.populate_chunk_distances(query_rotated, pq_dists);
if (stats != nullptr)
{
stats->cpu_us += (float)cpu_timer.elapsed();
}

// query <-> neighbor list
float *dist_scratch = pq_query_scratch->aligned_dist_scratch;
Expand All @@ -1349,7 +1354,6 @@ void PQFlashIndex<T, LabelT>::cached_beam_search(const T *query1, const uint64_t
diskann::aggregate_coords(ids, n_ids, this->data, this->_n_chunks, pq_coord_scratch);
diskann::pq_dist_lookup(pq_coord_scratch, n_ids, this->_n_chunks, pq_dists, dists_out);
};
Timer query_timer, io_timer, cpu_timer;

tsl::robin_set<uint64_t> &visited = query_scratch->visited;
NeighborPriorityQueue &retset = query_scratch->retset;
Expand Down

0 comments on commit cbc76f8

Please sign in to comment.