Loading core/src/db/DBImpl.cpp +7 −1 Original line number Diff line number Diff line Loading @@ -540,7 +540,13 @@ DBImpl::StartMetricTask() { server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total); if (cache_total > 0) { double cache_usage_double = cache_usage; server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total); } else { server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0); } server::Metrics::GetInstance().GpuCacheUsageGaugeSet(); uint64_t size; Size(size); Loading core/src/db/engine/ExecutionEngine.h +1 −2 Original line number Diff line number Diff line Loading @@ -80,8 +80,7 @@ class ExecutionEngine { Merge(const std::string& location) = 0; virtual Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const = 0; Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0; virtual std::shared_ptr<ExecutionEngine> BuildIndex(const std::string& location, EngineType engine_type) = 0; Loading core/src/db/engine/ExecutionEngineImpl.cpp +55 −21 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ #include <utility> #include <vector> //#define ON_SEARCH namespace milvus { namespace engine { Loading Loading @@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { auto key = location_ + ".quantizer"; auto quantizer = std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key)); auto conf = std::make_shared<knowhere::QuantizerCfg>(); conf->gpu_id = device_id; if (quantizer) { // cache hit conf->mode = 2; auto new_index = index_->LoadData(quantizer->Data(), conf); index_ = new_index; } else { auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } return Status::OK(); } Loading Loading @@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t Status ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const { bool hybrid) { if (index_type_ == EngineType::FAISS_IVFSQ8H) { if (!hybrid) { const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); const int64_t NOT_FOUND = -1; int64_t device_id = NOT_FOUND; // cache hit { knowhere::QuantizerPtr quantizer = nullptr; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); if (auto cached_quantizer = cache->GetIndex(key)) { device_id = gpu; quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data(); } } if (device_id != NOT_FOUND) { // cache hit auto config = std::make_shared<knowhere::QuantizerCfg>(); config->gpu_id = device_id; config->mode = 2; auto new_index = index_->LoadData(quantizer, config); index_ = new_index; } } if (device_id == NOT_FOUND) { // cache miss std::vector<int64_t> all_free_mem; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); all_free_mem.push_back(free_mem); } auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); auto best_index = std::distance(all_free_mem.begin(), max_e); device_id = gpus[best_index]; auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } } } if (index_ == nullptr) { ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search"; return Status(DB_ERROR, "index is null"); Loading core/src/db/engine/ExecutionEngineImpl.h +1 −1 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine { Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid = false) const override; bool hybrid = false) override; ExecutionEnginePtr BuildIndex(const std::string& location, EngineType engine_type) override; Loading core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +5 −5 Original line number Diff line number Diff line Loading @@ -91,11 +91,11 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option); std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index);; std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index); auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res); return new_idx; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading Loading @@ -151,7 +151,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { gpu_mode = 1; return q; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading Loading @@ -214,7 +214,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res); return sq_idx; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading @@ -241,7 +241,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float); return std::make_pair(new_idx, q); } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading Loading
core/src/db/DBImpl.cpp +7 −1 Original line number Diff line number Diff line Loading @@ -540,7 +540,13 @@ DBImpl::StartMetricTask() { server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total); if (cache_total > 0) { double cache_usage_double = cache_usage; server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total); } else { server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0); } server::Metrics::GetInstance().GpuCacheUsageGaugeSet(); uint64_t size; Size(size); Loading
core/src/db/engine/ExecutionEngine.h +1 −2 Original line number Diff line number Diff line Loading @@ -80,8 +80,7 @@ class ExecutionEngine { Merge(const std::string& location) = 0; virtual Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const = 0; Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0; virtual std::shared_ptr<ExecutionEngine> BuildIndex(const std::string& location, EngineType engine_type) = 0; Loading
core/src/db/engine/ExecutionEngineImpl.cpp +55 −21 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ #include <utility> #include <vector> //#define ON_SEARCH namespace milvus { namespace engine { Loading Loading @@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { auto key = location_ + ".quantizer"; auto quantizer = std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key)); auto conf = std::make_shared<knowhere::QuantizerCfg>(); conf->gpu_id = device_id; if (quantizer) { // cache hit conf->mode = 2; auto new_index = index_->LoadData(quantizer->Data(), conf); index_ = new_index; } else { auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } return Status::OK(); } Loading Loading @@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t Status ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) const { bool hybrid) { if (index_type_ == EngineType::FAISS_IVFSQ8H) { if (!hybrid) { const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); const int64_t NOT_FOUND = -1; int64_t device_id = NOT_FOUND; // cache hit { knowhere::QuantizerPtr quantizer = nullptr; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); if (auto cached_quantizer = cache->GetIndex(key)) { device_id = gpu; quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data(); } } if (device_id != NOT_FOUND) { // cache hit auto config = std::make_shared<knowhere::QuantizerCfg>(); config->gpu_id = device_id; config->mode = 2; auto new_index = index_->LoadData(quantizer, config); index_ = new_index; } } if (device_id == NOT_FOUND) { // cache miss std::vector<int64_t> all_free_mem; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); all_free_mem.push_back(free_mem); } auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); auto best_index = std::distance(all_free_mem.begin(), max_e); device_id = gpus[best_index]; auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } } } if (index_ == nullptr) { ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search"; return Status(DB_ERROR, "index is null"); Loading
core/src/db/engine/ExecutionEngineImpl.h +1 −1 Original line number Diff line number Diff line Loading @@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine { Status Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid = false) const override; bool hybrid = false) override; ExecutionEnginePtr BuildIndex(const std::string& location, EngineType engine_type) override; Loading
core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp +5 −5 Original line number Diff line number Diff line Loading @@ -91,11 +91,11 @@ IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) { auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option); std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index);; std::shared_ptr<faiss::Index> device_index = std::shared_ptr<faiss::Index>(gpu_index); auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res); return new_idx; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading Loading @@ -151,7 +151,7 @@ IVFSQHybrid::LoadQuantizer(const Config& conf) { gpu_mode = 1; return q; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading Loading @@ -214,7 +214,7 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) { auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res); return sq_idx; } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading @@ -241,7 +241,7 @@ IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& c q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float); return std::make_pair(new_idx, q); } else { KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource"); KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu: " + std::to_string(gpu_id_) + "resource"); } } Loading