Loading core/src/db/engine/ExecutionEngineImpl.cpp +50 −3 Original line number Diff line number Diff line Loading @@ -22,10 +22,7 @@ #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" #include "knowhere/common/Config.h" #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "scheduler/Utils.h" #include "server/Config.h" #include "wrapper/ConfAdapter.h" Loading Loading @@ -249,6 +246,56 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { #if 1 const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); const int64_t NOT_FOUND = -1; int64_t device_id = NOT_FOUND; // cache hit { knowhere::QuantizerPtr quantizer = nullptr; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); if (auto cached_quantizer = cache->GetIndex(key)) { device_id = gpu; quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data(); } } if (device_id != NOT_FOUND) { // cache hit auto config = std::make_shared<knowhere::QuantizerCfg>(); config->gpu_id = device_id; config->mode = 2; auto new_index = index_->LoadData(quantizer, config); index_ = new_index; } } if (device_id == NOT_FOUND) { // cache miss std::vector<int64_t> all_free_mem; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); all_free_mem.push_back(free_mem); } auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); auto best_index = std::distance(all_free_mem.begin(), max_e); device_id = gpus[best_index]; auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } #endif return Status::OK(); } Loading core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h +1 −1 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ class FaissBaseIndex { virtual void SealImpl(); protected: public: std::shared_ptr<faiss::Index> index_ = nullptr; }; Loading core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +5 −6 Original line number Diff line number Diff line Loading @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. #include <faiss/gpu/GpuAutoTune.h> #include <faiss/gpu/GpuIndexFlat.h> #include <memory> #include <faiss/gpu/GpuIndexIVF.h> #include <faiss/gpu/GpuIndexIVFFlat.h> #include <faiss/index_io.h> #include <memory> #include <faiss/gpu/GpuCloner.h> #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" Loading Loading @@ -130,13 +130,12 @@ void GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { std::lock_guard<std::mutex> lk(mutex_); // TODO(linxj): gpu index support GenParams if (auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_)) { auto search_cfg = std::dynamic_pointer_cast<IVFCfg>(cfg); device_index->setNumProbes(search_cfg->nprobe); device_index->nprobe = search_cfg->nprobe; // assert(device_index->getNumProbes() == search_cfg->nprobe); { // TODO(linxj): allocate gpu mem ResScope rs(res_, gpu_id_); device_index->search(n, (float*)data, k, distances, labels); } Loading core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -16,8 +16,10 @@ // under the License. #include <faiss/IndexIVFPQ.h> #include <faiss/gpu/GpuAutoTune.h> #include <faiss/gpu/GpuIndexIVFPQ.h> #include <faiss/index_factory.h> #include <faiss/gpu/GpuCloner.h> #include <memory> #include "knowhere/adapter/VectorAdapter.h" Loading core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +3 −11 Original line number Diff line number Diff line Loading @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. #include <faiss/gpu/GpuAutoTune.h> #include <faiss/index_factory.h> #include <faiss/gpu/GpuCloner.h> #include <memory> #include <utility> #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" Loading Loading @@ -71,13 +72,4 @@ GPUIVFSQ::CopyGpuToCpu(const Config& config) { return std::make_shared<IVFSQ>(new_index); } void GPUIVFSQ::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { #ifdef CUSTOMIZATION GPUIVF::search_impl(n, data, k, distances, labels, cfg); #else IVF::search_impl(n, data, k, distances, labels, cfg); #endif } } // namespace knowhere Loading
core/src/db/engine/ExecutionEngineImpl.cpp +50 −3 Original line number Diff line number Diff line Loading @@ -22,10 +22,7 @@ #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" #include "knowhere/common/Config.h" #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/IndexIVFSQHybrid.h" #include "scheduler/Utils.h" #include "server/Config.h" #include "wrapper/ConfAdapter.h" Loading Loading @@ -249,6 +246,56 @@ ExecutionEngineImpl::Load(bool to_cache) { Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { if (hybrid) { #if 1 const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); const int64_t NOT_FOUND = -1; int64_t device_id = NOT_FOUND; // cache hit { knowhere::QuantizerPtr quantizer = nullptr; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); if (auto cached_quantizer = cache->GetIndex(key)) { device_id = gpu; quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data(); } } if (device_id != NOT_FOUND) { // cache hit auto config = std::make_shared<knowhere::QuantizerCfg>(); config->gpu_id = device_id; config->mode = 2; auto new_index = index_->LoadData(quantizer, config); index_ = new_index; } } if (device_id == NOT_FOUND) { // cache miss std::vector<int64_t> all_free_mem; for (auto& gpu : gpus) { auto cache = cache::GpuCacheMgr::GetInstance(gpu); auto free_mem = cache->CacheCapacity() - cache->CacheUsage(); all_free_mem.push_back(free_mem); } auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end()); auto best_index = std::distance(all_free_mem.begin(), max_e); device_id = gpus[best_index]; auto pair = index_->CopyToGpuWithQuantizer(device_id); index_ = pair.first; // cache auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second); cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer); } #endif return Status::OK(); } Loading
core/src/index/knowhere/knowhere/index/vector_index/FaissBaseIndex.h +1 −1 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ class FaissBaseIndex { virtual void SealImpl(); protected: public: std::shared_ptr<faiss::Index> index_ = nullptr; }; Loading
core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +5 −6 Original line number Diff line number Diff line Loading @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. #include <faiss/gpu/GpuAutoTune.h> #include <faiss/gpu/GpuIndexFlat.h> #include <memory> #include <faiss/gpu/GpuIndexIVF.h> #include <faiss/gpu/GpuIndexIVFFlat.h> #include <faiss/index_io.h> #include <memory> #include <faiss/gpu/GpuCloner.h> #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" Loading Loading @@ -130,13 +130,12 @@ void GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { std::lock_guard<std::mutex> lk(mutex_); // TODO(linxj): gpu index support GenParams if (auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_)) { auto search_cfg = std::dynamic_pointer_cast<IVFCfg>(cfg); device_index->setNumProbes(search_cfg->nprobe); device_index->nprobe = search_cfg->nprobe; // assert(device_index->getNumProbes() == search_cfg->nprobe); { // TODO(linxj): allocate gpu mem ResScope rs(res_, gpu_id_); device_index->search(n, (float*)data, k, distances, labels); } Loading
core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -16,8 +16,10 @@ // under the License. #include <faiss/IndexIVFPQ.h> #include <faiss/gpu/GpuAutoTune.h> #include <faiss/gpu/GpuIndexIVFPQ.h> #include <faiss/index_factory.h> #include <faiss/gpu/GpuCloner.h> #include <memory> #include "knowhere/adapter/VectorAdapter.h" Loading
core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +3 −11 Original line number Diff line number Diff line Loading @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. #include <faiss/gpu/GpuAutoTune.h> #include <faiss/index_factory.h> #include <faiss/gpu/GpuCloner.h> #include <memory> #include <utility> #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" Loading Loading @@ -71,13 +72,4 @@ GPUIVFSQ::CopyGpuToCpu(const Config& config) { return std::make_shared<IVFSQ>(new_index); } void GPUIVFSQ::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { #ifdef CUSTOMIZATION GPUIVF::search_impl(n, data, k, distances, labels, cfg); #else IVF::search_impl(n, data, k, distances, labels, cfg); #endif } } // namespace knowhere