Loading cpp/CHANGELOG.md +1 −0 Original line number Diff line number Diff line Loading @@ -36,6 +36,7 @@ Please mark all change in change log and use the ticket from JIRA. ## New Feature - MS-627 - Integrate new index: IVFSQHybrid - MS-631 - IVFSQ8H Index support ## Task - MS-554 - Change license to Apache 2.0 Loading cpp/src/cache/CpuCacheMgr.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -32,7 +32,7 @@ CpuCacheMgr::CpuCacheMgr() { server::Config& config = server::Config::GetInstance(); Status s; int32_t cpu_cache_cap; int64_t cpu_cache_cap; s = config.GetCacheConfigCpuCacheCapacity(cpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); Loading cpp/src/cache/GpuCacheMgr.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -36,12 +36,12 @@ GpuCacheMgr::GpuCacheMgr() { server::Config& config = server::Config::GetInstance(); Status s; int32_t gpu_cache_cap; int64_t gpu_cache_cap; s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } int32_t cap = gpu_cache_cap * G_BYTE; int64_t cap = gpu_cache_cap * G_BYTE; cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32); float gpu_mem_threshold; Loading cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +21 −5 Original line number Diff line number Diff line Loading @@ -100,16 +100,20 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { void IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { FaissBaseIndex::LoadImpl(index_binary); // load on cpu auto* ivf_index = dynamic_cast<faiss::IndexIVF*>(index_.get()); ivf_index->backup_quantizer(); } void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { if (gpu_mode) { if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); } else { } else if (gpu_mode == 1) { ResScope rs(res_, gpu_id_); IVF::search_impl(n, data, k, distances, labels, cfg); } else if (gpu_mode == 0) { IVF::search_impl(n, data, k, distances, labels, cfg); } } Loading Loading @@ -137,8 +141,12 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { delete gpu_index; auto q = std::make_shared<FaissIVFQuantizer>(); q->quantizer = index_composition->quantizer; auto& q_ptr = index_composition->quantizer; q->size = q_ptr->d * q_ptr->getNumVecs() * sizeof(float); q->quantizer = q_ptr; res_ = res; gpu_mode = 1; return q; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); Loading @@ -156,7 +164,7 @@ IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { faiss::gpu::GpuIndexFlat* is_gpu_flat_index = dynamic_cast<faiss::gpu::GpuIndexFlat*>(ivf_index->quantizer); if (is_gpu_flat_index == nullptr) { delete ivf_index->quantizer; // delete ivf_index->quantizer; ivf_index->quantizer = ivf_quantizer->quantizer; } } Loading Loading @@ -199,10 +207,18 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option); index_.reset(gpu_index); gpu_mode = true; // all in gpu gpu_mode = 2; // all in gpu } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); } } FaissIVFQuantizer::~FaissIVFQuantizer() { if (quantizer != nullptr) { delete quantizer; quantizer = nullptr; } // else do nothing } } // namespace knowhere cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +6 −4 Original line number Diff line number Diff line Loading @@ -27,23 +27,25 @@ namespace knowhere { struct FaissIVFQuantizer : public Quantizer { faiss::gpu::GpuIndexFlat* quantizer = nullptr; ~FaissIVFQuantizer() override; }; using FaissIVFQuantizerPtr = std::shared_ptr<FaissIVFQuantizer>; class IVFSQHybrid : public GPUIVFSQ { public: explicit IVFSQHybrid(const int& device_id) : GPUIVFSQ(device_id) { gpu_mode = false; gpu_mode = 0; } explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index) : GPUIVFSQ(-1) { index_ = index; gpu_mode = false; gpu_mode = 0; } explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& resource) : GPUIVFSQ(index, device_id, resource) { gpu_mode = true; gpu_mode = 2; } public: Loading Loading @@ -76,7 +78,7 @@ class IVFSQHybrid : public GPUIVFSQ { LoadImpl(const BinarySet& index_binary) override; protected: bool gpu_mode = false; int64_t gpu_mode = 0; // 0,1,2 }; } // namespace knowhere Loading
cpp/CHANGELOG.md +1 −0 Original line number Diff line number Diff line Loading @@ -36,6 +36,7 @@ Please mark all change in change log and use the ticket from JIRA. ## New Feature - MS-627 - Integrate new index: IVFSQHybrid - MS-631 - IVFSQ8H Index support ## Task - MS-554 - Change license to Apache 2.0 Loading
cpp/src/cache/CpuCacheMgr.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -32,7 +32,7 @@ CpuCacheMgr::CpuCacheMgr() { server::Config& config = server::Config::GetInstance(); Status s; int32_t cpu_cache_cap; int64_t cpu_cache_cap; s = config.GetCacheConfigCpuCacheCapacity(cpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); Loading
cpp/src/cache/GpuCacheMgr.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -36,12 +36,12 @@ GpuCacheMgr::GpuCacheMgr() { server::Config& config = server::Config::GetInstance(); Status s; int32_t gpu_cache_cap; int64_t gpu_cache_cap; s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } int32_t cap = gpu_cache_cap * G_BYTE; int64_t cap = gpu_cache_cap * G_BYTE; cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32); float gpu_mem_threshold; Loading
cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +21 −5 Original line number Diff line number Diff line Loading @@ -100,16 +100,20 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { void IVFSQHybrid::LoadImpl(const BinarySet& index_binary) { FaissBaseIndex::LoadImpl(index_binary); // load on cpu auto* ivf_index = dynamic_cast<faiss::IndexIVF*>(index_.get()); ivf_index->backup_quantizer(); } void IVFSQHybrid::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { if (gpu_mode) { if (gpu_mode == 2) { GPUIVF::search_impl(n, data, k, distances, labels, cfg); } else { } else if (gpu_mode == 1) { ResScope rs(res_, gpu_id_); IVF::search_impl(n, data, k, distances, labels, cfg); } else if (gpu_mode == 0) { IVF::search_impl(n, data, k, distances, labels, cfg); } } Loading Loading @@ -137,8 +141,12 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { delete gpu_index; auto q = std::make_shared<FaissIVFQuantizer>(); q->quantizer = index_composition->quantizer; auto& q_ptr = index_composition->quantizer; q->size = q_ptr->d * q_ptr->getNumVecs() * sizeof(float); q->quantizer = q_ptr; res_ = res; gpu_mode = 1; return q; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); Loading @@ -156,7 +164,7 @@ IVFSQHybrid::SetQuantizer(const QuantizerPtr& q) { faiss::gpu::GpuIndexFlat* is_gpu_flat_index = dynamic_cast<faiss::gpu::GpuIndexFlat*>(ivf_index->quantizer); if (is_gpu_flat_index == nullptr) { delete ivf_index->quantizer; // delete ivf_index->quantizer; ivf_index->quantizer = ivf_quantizer->quantizer; } } Loading Loading @@ -199,10 +207,18 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option); index_.reset(gpu_index); gpu_mode = true; // all in gpu gpu_mode = 2; // all in gpu } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); } } FaissIVFQuantizer::~FaissIVFQuantizer() { if (quantizer != nullptr) { delete quantizer; quantizer = nullptr; } // else do nothing } } // namespace knowhere
cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h +6 −4 Original line number Diff line number Diff line Loading @@ -27,23 +27,25 @@ namespace knowhere { struct FaissIVFQuantizer : public Quantizer { faiss::gpu::GpuIndexFlat* quantizer = nullptr; ~FaissIVFQuantizer() override; }; using FaissIVFQuantizerPtr = std::shared_ptr<FaissIVFQuantizer>; class IVFSQHybrid : public GPUIVFSQ { public: explicit IVFSQHybrid(const int& device_id) : GPUIVFSQ(device_id) { gpu_mode = false; gpu_mode = 0; } explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index) : GPUIVFSQ(-1) { index_ = index; gpu_mode = false; gpu_mode = 0; } explicit IVFSQHybrid(std::shared_ptr<faiss::Index> index, const int64_t& device_id, ResPtr& resource) : GPUIVFSQ(index, device_id, resource) { gpu_mode = true; gpu_mode = 2; } public: Loading Loading @@ -76,7 +78,7 @@ class IVFSQHybrid : public GPUIVFSQ { LoadImpl(const BinarySet& index_binary) override; protected: bool gpu_mode = false; int64_t gpu_mode = 0; // 0,1,2 }; } // namespace knowhere