Loading core/CHANGELOG.md +1 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-644 - Search crashed with index-type: flat - MS-624 - Search vectors failed if time ranges long enough - MS-652 - IVFSQH quantization double free - MS-605 - Server going down during searching vectors - MS-654 - Describe index timeout when building index ## Improvement Loading core/src/db/DBImpl.cpp +7 −1 Original line number Diff line number Diff line Loading @@ -509,7 +509,13 @@ DBImpl::StartMetricTask() { server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total); if (cache_total > 0) { double cache_usage_double = cache_usage; server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total); } else { server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0); } server::Metrics::GetInstance().GpuCacheUsageGaugeSet(); uint64_t size; Size(size); Loading core/src/db/engine/ExecutionEngine.h +1 −2 Original line number Diff line number Diff line Loading @@ -80,8 +80,7 @@ class ExecutionEngine { Merge(const std::string& location) = 0; virtual Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const = 0; Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0; virtual std::shared_ptr<ExecutionEngine> BuildIndex(const std::string& location, EngineType engine_type) = 0; Loading core/src/db/engine/ExecutionEngineImpl.cpp +55 −21 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ #include <utility> #include <vector> //#define ON_SEARCH namespace milvus { namespace engine { Loading Loading @@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { auto key = location_ + ".quantizer"; auto quantizer = std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key)); auto conf = std::make_shared<knowhere::QuantizerCfg>(); conf->gpu_id = device_id; if (quantizer) { // cache hit conf->mode = 2; auto new_index = index_->LoadData(quantizer->Data(), conf); index_ = new_index; } else { auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } return Status::OK(); } Loading Loading @@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t Status ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const { bool hybrid) { if (index_type_ == EngineType::FAISS_IVFSQ8H) { if (!hybrid) { const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); const int64_t NOT_FOUND = -1; int64_t device_id = NOT_FOUND; // cache hit { knowhere::QuantizerPtr quantizer = nullptr; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); if (auto cached_quantizer = cache->GetIndex(key)) { device_id = gpu; quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data(); } } if (device_id != NOT_FOUND) { // cache hit auto config = std::make_shared<knowhere::QuantizerCfg>(); config->gpu_id = device_id; config->mode = 2; auto new_index = index_->LoadData(quantizer, config); index_ = new_index; } } if (device_id == NOT_FOUND) { // cache miss std::vector<int64_t> all_free_mem; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); all_free_mem.push_back(free_mem); } auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); auto best_index = std::distance(all_free_mem.begin(), max_e); device_id = gpus[best_index]; auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } } } if (index_ == nullptr) { ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search"; return Status(DB_ERROR, "index is null"); Loading core/src/db/engine/ExecutionEngineImpl.h +1 −1 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine { Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid = false) const override; bool hybrid = false) override; ExecutionEnginePtr BuildIndex(const std::string& location, EngineType engine_type) override; Loading Loading
core/CHANGELOG.md +1 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-644 - Search crashed with index-type: flat - MS-624 - Search vectors failed if time ranges long enough - MS-652 - IVFSQH quantization double free - MS-605 - Server going down during searching vectors - MS-654 - Describe index timeout when building index ## Improvement Loading
core/src/db/DBImpl.cpp +7 −1 Original line number Diff line number Diff line Loading @@ -509,7 +509,13 @@ DBImpl::StartMetricTask() { server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total); if (cache_total > 0) { double cache_usage_double = cache_usage; server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total); } else { server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0); } server::Metrics::GetInstance().GpuCacheUsageGaugeSet(); uint64_t size; Size(size); Loading
core/src/db/engine/ExecutionEngine.h +1 −2 Original line number Diff line number Diff line Loading @@ -80,8 +80,7 @@ class ExecutionEngine { Merge(const std::string& location) = 0; virtual Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const = 0; Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0; virtual std::shared_ptr<ExecutionEngine> BuildIndex(const std::string& location, EngineType engine_type) = 0; Loading
core/src/db/engine/ExecutionEngineImpl.cpp +55 −21 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ #include <utility> #include <vector> //#define ON_SEARCH namespace milvus { namespace engine { Loading Loading @@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { auto key = location_ + ".quantizer"; auto quantizer = std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key)); auto conf = std::make_shared<knowhere::QuantizerCfg>(); conf->gpu_id = device_id; if (quantizer) { // cache hit conf->mode = 2; auto new_index = index_->LoadData(quantizer->Data(), conf); index_ = new_index; } else { auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } return Status::OK(); } Loading Loading @@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t Status ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const { bool hybrid) { if (index_type_ == EngineType::FAISS_IVFSQ8H) { if (!hybrid) { const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); const int64_t NOT_FOUND = -1; int64_t device_id = NOT_FOUND; // cache hit { knowhere::QuantizerPtr quantizer = nullptr; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); if (auto cached_quantizer = cache->GetIndex(key)) { device_id = gpu; quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data(); } } if (device_id != NOT_FOUND) { // cache hit auto config = std::make_shared<knowhere::QuantizerCfg>(); config->gpu_id = device_id; config->mode = 2; auto new_index = index_->LoadData(quantizer, config); index_ = new_index; } } if (device_id == NOT_FOUND) { // cache miss std::vector<int64_t> all_free_mem; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); all_free_mem.push_back(free_mem); } auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); auto best_index = std::distance(all_free_mem.begin(), max_e); device_id = gpus[best_index]; auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } } } if (index_ == nullptr) { ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search"; return Status(DB_ERROR, "index is null"); Loading
core/src/db/engine/ExecutionEngineImpl.h +1 −1 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine { Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid = false) const override; bool hybrid = false) override; ExecutionEnginePtr BuildIndex(const std::string& location, EngineType engine_type) override; Loading