Loading core/src/scheduler/SchedInst.h +10 −12 Original line number Diff line number Diff line Loading @@ -24,9 +24,10 @@ #include "Utils.h" #include "optimizer/BuildIndexPass.h" #include "optimizer/FallbackPass.h" #include "optimizer/HybridPass.h" #include "optimizer/LargeSQ8HPass.h" #include "optimizer/OnlyCPUPass.h" #include "optimizer/FaissFlatPass.h" #include "optimizer/FaissIVFFlatPass.h" #include "optimizer/FaissIVFSQ8Pass.h" #include "optimizer/FaissIVFSQ8HPass.h" #include "optimizer/Optimizer.h" #include "server/Config.h" Loading Loading @@ -100,15 +101,12 @@ class OptimizerInst { std::lock_guard<std::mutex> lock(mutex_); if (instance == nullptr) { std::vector<PassPtr> pass_list; pass_list.push_back(std::make_shared<LargeSQ8HPass>()); pass_list.push_back(std::make_shared<HybridPass>()); #ifdef MILVUS_CPU_VERSION pass_list.push_back(std::make_shared<OnlyCPUPass>()); #else server::Config& config = server::Config::GetInstance(); std::vector<int32_t> build_resources; config.GetGpuResourceConfigBuildIndexResources(build_resources); pass_list.push_back(std::make_shared<BuildIndexPass>(build_resources)); #ifdef MILVUS_GPU_VERSION pass_list.push_back(std::make_shared<BuildIndexPass>()); pass_list.push_back(std::make_shared<FaissFlatPass>()); pass_list.push_back(std::make_shared<FaissIVFFlatPass>()); pass_list.push_back(std::make_shared<FaissIVFSQ8Pass>()); pass_list.push_back(std::make_shared<FaissIVFSQ8HPass>()); #endif pass_list.push_back(std::make_shared<FallbackPass>()); instance = std::make_shared<Optimizer>(pass_list); Loading core/src/scheduler/optimizer/BuildIndexPass.cpp +6 −3 Original line number Diff line number Diff line Loading @@ -23,11 +23,14 @@ namespace milvus { namespace scheduler { BuildIndexPass::BuildIndexPass(std::vector<int32_t>& build_gpu_ids) : build_gpu_ids_(build_gpu_ids) { } void BuildIndexPass::Init() { server::Config& config = server::Config::GetInstance(); std::vector<int32_t> build_resources; Status s = config.GetGpuResourceConfigBuildIndexResources(build_resources); if (!s.ok()) { throw; } } bool Loading core/src/scheduler/optimizer/BuildIndexPass.h +1 −1 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ namespace scheduler { class BuildIndexPass : public Pass { public: explicit BuildIndexPass(std::vector<int32_t>& build_gpu_id); BuildIndexPass() = default; public: void Loading core/src/scheduler/optimizer/HybridPass.cpp→core/src/scheduler/optimizer/FaissFlatPass.cpp +69 −0 Original line number Diff line number Diff line Loading @@ -15,33 +15,55 @@ // specific language governing permissions and limitations // under the License. #include "scheduler/optimizer/HybridPass.h" #include "scheduler/optimizer/FaissFlatPass.h" #include "cache/GpuCacheMgr.h" #include "scheduler/SchedInst.h" #include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" #include "server/Config.h" #include "utils/Log.h" namespace milvus { namespace scheduler { void HybridPass::Init() { FaissFlatPass::Init() { server::Config& config = server::Config::GetInstance(); Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); if (!s.ok()) { threshold_ = std::numeric_limits<int32_t>::max(); } s = config.GetGpuResourceConfigSearchResources(gpus); if (!s.ok()) { throw; } } bool HybridPass::Run(const TaskPtr& task) { // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu if (task->Type() != TaskType::SearchTask) FaissFlatPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask) { return false; } auto search_task = std::static_pointer_cast<XSearchTask>(task); if (search_task->file_->engine_type_ == (int)engine::EngineType::FAISS_IVFSQ8H) { // TODO: remove "cpu" hardcode ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); auto label = std::make_shared<SpecResLabel>(std::weak_ptr<Resource>(res_ptr)); if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IDMAP) { return false; } auto search_job = std::static_pointer_cast<SearchJob>(search_task->job_.lock()); ResourcePtr res_ptr; if (search_job->nq() < threshold_) { res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); } else { auto best_device_id = count_ % gpus.size(); count_++; res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); } auto label = std::make_shared<SpecResLabel>(res_ptr); task->label() = label; return true; } return false; } } // namespace scheduler } // namespace milvus core/src/scheduler/optimizer/LargeSQ8HPass.h→core/src/scheduler/optimizer/FaissFlatPass.h +3 −3 Original line number Diff line number Diff line Loading @@ -33,9 +33,9 @@ namespace milvus { namespace scheduler { class LargeSQ8HPass : public Pass { class FaissFlatPass : public Pass { public: LargeSQ8HPass() = default; FaissFlatPass() = default; public: void Loading @@ -50,7 +50,7 @@ class LargeSQ8HPass : public Pass { std::vector<int32_t> gpus; }; using LargeSQ8HPassPtr = std::shared_ptr<LargeSQ8HPass>; using FaissFlatPassPtr = std::shared_ptr<FaissFlatPass>; } // namespace scheduler } // namespace milvus Loading
core/src/scheduler/SchedInst.h +10 −12 Original line number Diff line number Diff line Loading @@ -24,9 +24,10 @@ #include "Utils.h" #include "optimizer/BuildIndexPass.h" #include "optimizer/FallbackPass.h" #include "optimizer/HybridPass.h" #include "optimizer/LargeSQ8HPass.h" #include "optimizer/OnlyCPUPass.h" #include "optimizer/FaissFlatPass.h" #include "optimizer/FaissIVFFlatPass.h" #include "optimizer/FaissIVFSQ8Pass.h" #include "optimizer/FaissIVFSQ8HPass.h" #include "optimizer/Optimizer.h" #include "server/Config.h" Loading Loading @@ -100,15 +101,12 @@ class OptimizerInst { std::lock_guard<std::mutex> lock(mutex_); if (instance == nullptr) { std::vector<PassPtr> pass_list; pass_list.push_back(std::make_shared<LargeSQ8HPass>()); pass_list.push_back(std::make_shared<HybridPass>()); #ifdef MILVUS_CPU_VERSION pass_list.push_back(std::make_shared<OnlyCPUPass>()); #else server::Config& config = server::Config::GetInstance(); std::vector<int32_t> build_resources; config.GetGpuResourceConfigBuildIndexResources(build_resources); pass_list.push_back(std::make_shared<BuildIndexPass>(build_resources)); #ifdef MILVUS_GPU_VERSION pass_list.push_back(std::make_shared<BuildIndexPass>()); pass_list.push_back(std::make_shared<FaissFlatPass>()); pass_list.push_back(std::make_shared<FaissIVFFlatPass>()); pass_list.push_back(std::make_shared<FaissIVFSQ8Pass>()); pass_list.push_back(std::make_shared<FaissIVFSQ8HPass>()); #endif pass_list.push_back(std::make_shared<FallbackPass>()); instance = std::make_shared<Optimizer>(pass_list); Loading
core/src/scheduler/optimizer/BuildIndexPass.cpp +6 −3 Original line number Diff line number Diff line Loading @@ -23,11 +23,14 @@ namespace milvus { namespace scheduler { BuildIndexPass::BuildIndexPass(std::vector<int32_t>& build_gpu_ids) : build_gpu_ids_(build_gpu_ids) { } void BuildIndexPass::Init() { server::Config& config = server::Config::GetInstance(); std::vector<int32_t> build_resources; Status s = config.GetGpuResourceConfigBuildIndexResources(build_resources); if (!s.ok()) { throw; } } bool Loading
core/src/scheduler/optimizer/BuildIndexPass.h +1 −1 Original line number Diff line number Diff line Loading @@ -34,7 +34,7 @@ namespace scheduler { class BuildIndexPass : public Pass { public: explicit BuildIndexPass(std::vector<int32_t>& build_gpu_id); BuildIndexPass() = default; public: void Loading
core/src/scheduler/optimizer/HybridPass.cpp→core/src/scheduler/optimizer/FaissFlatPass.cpp +69 −0 Original line number Diff line number Diff line Loading @@ -15,33 +15,55 @@ // specific language governing permissions and limitations // under the License. #include "scheduler/optimizer/HybridPass.h" #include "scheduler/optimizer/FaissFlatPass.h" #include "cache/GpuCacheMgr.h" #include "scheduler/SchedInst.h" #include "scheduler/Utils.h" #include "scheduler/task/SearchTask.h" #include "scheduler/tasklabel/SpecResLabel.h" #include "server/Config.h" #include "utils/Log.h" namespace milvus { namespace scheduler { void HybridPass::Init() { FaissFlatPass::Init() { server::Config& config = server::Config::GetInstance(); Status s = config.GetEngineConfigGpuSearchThreshold(threshold_); if (!s.ok()) { threshold_ = std::numeric_limits<int32_t>::max(); } s = config.GetGpuResourceConfigSearchResources(gpus); if (!s.ok()) { throw; } } bool HybridPass::Run(const TaskPtr& task) { // TODO: future, Index::IVFSQ8H, if nq < threshold set cpu, else set gpu if (task->Type() != TaskType::SearchTask) FaissFlatPass::Run(const TaskPtr& task) { if (task->Type() != TaskType::SearchTask) { return false; } auto search_task = std::static_pointer_cast<XSearchTask>(task); if (search_task->file_->engine_type_ == (int)engine::EngineType::FAISS_IVFSQ8H) { // TODO: remove "cpu" hardcode ResourcePtr res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); auto label = std::make_shared<SpecResLabel>(std::weak_ptr<Resource>(res_ptr)); if (search_task->file_->engine_type_ != (int)engine::EngineType::FAISS_IDMAP) { return false; } auto search_job = std::static_pointer_cast<SearchJob>(search_task->job_.lock()); ResourcePtr res_ptr; if (search_job->nq() < threshold_) { res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); } else { auto best_device_id = count_ % gpus.size(); count_++; res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, best_device_id); } auto label = std::make_shared<SpecResLabel>(res_ptr); task->label() = label; return true; } return false; } } // namespace scheduler } // namespace milvus
core/src/scheduler/optimizer/LargeSQ8HPass.h→core/src/scheduler/optimizer/FaissFlatPass.h +3 −3 Original line number Diff line number Diff line Loading @@ -33,9 +33,9 @@ namespace milvus { namespace scheduler { class LargeSQ8HPass : public Pass { class FaissFlatPass : public Pass { public: LargeSQ8HPass() = default; FaissFlatPass() = default; public: void Loading @@ -50,7 +50,7 @@ class LargeSQ8HPass : public Pass { std::vector<int32_t> gpus; }; using LargeSQ8HPassPtr = std::shared_ptr<LargeSQ8HPass>; using FaissFlatPassPtr = std::shared_ptr<FaissFlatPass>; } // namespace scheduler } // namespace milvus