-
Notifications
You must be signed in to change notification settings - Fork 253
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Refactor of diskannpy module code. * 0.5.0.rc1 for python and enabling the build-python portion of the pr-test process. * clang-format changes * In theory this should speed up the python build drastically by only building the wheel for the python version and OS we're attempting to fan out to in our CICD job tree * Missed a dollar sign * Copy/pasting left a CICD step name that implied we were running a code formatting check when instead we were building a wheel. This is now fixed. * In theory, readying the release action too. We won't know if it works until it merges and we cut a release, but at least the paths have been fixed * Designated initializers just happened to work on linux but shouldn't have as they weren't added until cpp20 * Formatting
- Loading branch information
Showing
19 changed files
with
831 additions
and
548 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
name: Build Python Wheel | ||
description: Builds a python wheel with cibuildwheel | ||
inputs: | ||
cibw-identifer: | ||
description: "CI build wheel identifier to build" | ||
required: true | ||
runs: | ||
using: "composite" | ||
steps: | ||
- uses: actions/setup-python@v3 | ||
- name: Install cibuildwheel | ||
run: python -m pip install cibuildwheel==2.11.3 | ||
shell: bash | ||
- name: Building Python ${{inputs.cibw-identifier}} Wheel | ||
run: python -m cibuildwheel --output-dir dist | ||
env: | ||
CIBW_BUILD: ${{inputs.cibw-identifier}} | ||
shell: bash | ||
- uses: actions/upload-artifact@v3 | ||
with: | ||
name: wheels | ||
path: ./dist/*.whl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT license. | ||
|
||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <string> | ||
|
||
#include "common.h" | ||
#include "distance.h" | ||
|
||
namespace diskannpy | ||
{ | ||
template <typename DT> | ||
void build_disk_index(diskann::Metric metric, const std::string &data_file_path, const std::string &index_prefix_path, | ||
uint32_t complexity, uint32_t graph_degree, double final_index_ram_limit, | ||
double indexing_ram_budget, uint32_t num_threads, uint32_t pq_disk_bytes); | ||
|
||
template <typename DT, typename TagT = DynamicIdType, typename LabelT = filterT> | ||
void build_memory_index(diskann::Metric metric, const std::string &vector_bin_path, | ||
const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity, | ||
float alpha, uint32_t num_threads, bool use_pq_build, | ||
size_t num_pq_bytes, bool use_opq, uint32_t filter_complexity, | ||
bool use_tags = false); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT license. | ||
|
||
#pragma once | ||
|
||
#include <stdint.h> | ||
#include <utility> | ||
|
||
#include <pybind11/pybind11.h> | ||
#include <pybind11/numpy.h> | ||
|
||
namespace py = pybind11; | ||
|
||
namespace diskannpy | ||
{ | ||
|
||
typedef uint32_t filterT; | ||
|
||
typedef uint32_t StaticIdType; | ||
typedef uint32_t DynamicIdType; | ||
|
||
template <class IdType> using NeighborsAndDistances = std::pair<py::array_t<IdType>, py::array_t<float>>; | ||
|
||
}; // namespace diskannpy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT license. | ||
|
||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <string> | ||
|
||
#include <pybind11/pybind11.h> | ||
#include <pybind11/numpy.h> | ||
|
||
#include "common.h" | ||
#include "index.h" | ||
#include "parameters.h" | ||
|
||
namespace py = pybind11; | ||
|
||
namespace diskannpy | ||
{ | ||
|
||
template <typename DT> | ||
class DynamicMemoryIndex | ||
{ | ||
public: | ||
DynamicMemoryIndex(diskann::Metric m, size_t dimensions, size_t max_vectors, uint32_t complexity, | ||
uint32_t graph_degree, bool saturate_graph, uint32_t max_occlusion_size, float alpha, | ||
uint32_t num_threads, uint32_t filter_complexity, uint32_t num_frozen_points, | ||
uint32_t initial_search_complexity, uint32_t initial_search_threads, | ||
bool concurrent_consolidation); | ||
|
||
void load(const std::string &index_path); | ||
int insert(const py::array_t<DT, py::array::c_style | py::array::forcecast> &vector, DynamicIdType id); | ||
py::array_t<int> batch_insert(py::array_t<DT, py::array::c_style | py::array::forcecast> &vectors, | ||
py::array_t<DynamicIdType, py::array::c_style | py::array::forcecast> &ids, int32_t num_inserts, | ||
int num_threads = 0); | ||
int mark_deleted(DynamicIdType id); | ||
void save(const std::string &save_path, bool compact_before_save = false); | ||
NeighborsAndDistances<DynamicIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn, | ||
uint64_t complexity); | ||
NeighborsAndDistances<DynamicIdType> batch_search(py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, | ||
uint64_t num_queries, uint64_t knn, uint64_t complexity, | ||
uint32_t num_threads); | ||
void consolidate_delete(); | ||
|
||
private: | ||
const uint32_t _initial_search_complexity; | ||
const diskann::IndexWriteParameters _write_parameters; | ||
diskann::Index<DT, DynamicIdType, filterT> _index; | ||
}; | ||
|
||
}; // namespace diskannpy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT license. | ||
|
||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <string> | ||
|
||
|
||
#include <pybind11/pybind11.h> | ||
#include <pybind11/numpy.h> | ||
|
||
#ifdef _WINDOWS | ||
#include "windows_aligned_file_reader.h" | ||
#else | ||
#include "linux_aligned_file_reader.h" | ||
#endif | ||
|
||
#include "common.h" | ||
#include "pq_flash_index.h" | ||
|
||
namespace py = pybind11; | ||
|
||
namespace diskannpy { | ||
|
||
#ifdef _WINDOWS | ||
typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader; | ||
#else | ||
typedef LinuxAlignedFileReader PlatformSpecificAlignedFileReader; | ||
#endif | ||
|
||
template <typename DT> | ||
class StaticDiskIndex | ||
{ | ||
public: | ||
StaticDiskIndex(diskann::Metric metric, const std::string &index_path_prefix, uint32_t num_threads, | ||
size_t num_nodes_to_cache, uint32_t cache_mechanism); | ||
|
||
void cache_bfs_levels(size_t num_nodes_to_cache); | ||
|
||
void cache_sample_paths(size_t num_nodes_to_cache, const std::string &warmup_query_file, uint32_t num_threads); | ||
|
||
NeighborsAndDistances<StaticIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn, | ||
uint64_t complexity, uint64_t beam_width); | ||
|
||
NeighborsAndDistances<StaticIdType> batch_search(py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, uint64_t num_queries, | ||
uint64_t knn, uint64_t complexity, uint64_t beam_width, uint32_t num_threads); | ||
private: | ||
std::shared_ptr<AlignedFileReader> _reader; | ||
diskann::PQFlashIndex<DT> _index; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT license. | ||
|
||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <string> | ||
|
||
#include <pybind11/pybind11.h> | ||
#include <pybind11/numpy.h> | ||
|
||
#include "common.h" | ||
#include "index.h" | ||
|
||
namespace py = pybind11; | ||
|
||
namespace diskannpy { | ||
|
||
template <typename DT> | ||
class StaticMemoryIndex | ||
{ | ||
public: | ||
StaticMemoryIndex(diskann::Metric m, const std::string &index_prefix, size_t num_points, | ||
size_t dimensions, uint32_t num_threads, uint32_t initial_search_complexity); | ||
|
||
NeighborsAndDistances<StaticIdType> search(py::array_t<DT, py::array::c_style | py::array::forcecast> &query, uint64_t knn, | ||
uint64_t complexity); | ||
|
||
NeighborsAndDistances<StaticIdType> batch_search(py::array_t<DT, py::array::c_style | py::array::forcecast> &queries, | ||
uint64_t num_queries, uint64_t knn, uint64_t complexity, uint32_t num_threads); | ||
private: | ||
diskann::Index<DT, StaticIdType, filterT> _index; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.