Loading core/conf/server_cpu_config.template +8 −7 Original line number Diff line number Diff line Loading @@ -27,8 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -36,8 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu index_build_resources: # define the devices used for index building, must be in format: cpu or gpux - cpu gpu_resource_config: enable_gpu: false # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0 core/conf/server_gpu_config.template +6 −8 Original line number Diff line number Diff line Loading @@ -27,10 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -38,9 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu gpu_resource_config: enable_gpu: true # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 index_build_resources: # define the devices used for index building, must be in format: cpu or gpux build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0 core/src/cache/GpuCacheMgr.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ GpuCacheMgr::GpuCacheMgr() { Status s; int64_t gpu_cache_cap; s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); s = config.GetGpuResourceConfigCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading @@ -45,7 +45,7 @@ GpuCacheMgr::GpuCacheMgr() { cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32); float gpu_mem_threshold; s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold); s = config.GetGpuResourceConfigCacheThreshold(gpu_mem_threshold); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading core/src/db/engine/ExecutionEngineImpl.cpp +11 −2 Original line number Diff line number Diff line Loading @@ -144,7 +144,14 @@ ExecutionEngineImpl::HybridLoad() const { } const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpus; Status s = config.GetGpuResourceConfigSearchResources(gpus); if (!s.ok()) { ENGINE_LOG_ERROR << s.message(); return; } // cache hit { Loading Loading @@ -578,7 +585,9 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { // TODO(linxj): remove. Status ExecutionEngineImpl::Init() { auto gpu_ids = scheduler::get_build_resources(); server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpu_ids; Status s = config.GetGpuResourceConfigBuildIndexResources(gpu_ids); for (auto id : gpu_ids) { if (gpu_num_ == id) { return Status::OK(); Loading core/src/scheduler/SchedInst.cpp +8 −11 Original line number Diff line number Diff line Loading @@ -45,17 +45,6 @@ std::mutex BuildMgrInst::mutex_; void load_simple_config() { server::Config& config = server::Config::GetInstance(); std::string mode; config.GetResourceConfigMode(mode); std::vector<std::string> pool; config.GetResourceConfigSearchResources(pool); // get resources auto gpu_ids = get_gpu_pool(); auto build_gpu_ids = get_build_resources(); // create and connect ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false)); Loading @@ -63,6 +52,13 @@ load_simple_config() { ResMgrInst::GetInstance()->Add(ResourceFactory::Create("cpu", "CPU", 0, true, true)); ResMgrInst::GetInstance()->Connect("disk", "cpu", io); // get resources #ifdef MILVUS_GPU_VERSION server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpu_ids; config.GetGpuResourceConfigSearchResources(gpu_ids); std::vector<int32_t> build_gpu_ids; config.GetGpuResourceConfigBuildIndexResources(build_gpu_ids); auto pcie = Connection("pcie", 12000); std::vector<int64_t> not_find_build_ids; Loading @@ -89,6 +85,7 @@ load_simple_config() { ResourceFactory::Create(std::to_string(not_find_id), "GPU", not_find_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(not_find_id), pcie); } #endif } void Loading Loading
core/conf/server_cpu_config.template +8 −7 Original line number Diff line number Diff line Loading @@ -27,8 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -36,8 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu index_build_resources: # define the devices used for index building, must be in format: cpu or gpux - cpu gpu_resource_config: enable_gpu: false # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0
core/conf/server_gpu_config.template +6 −8 Original line number Diff line number Diff line Loading @@ -27,10 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -38,9 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu gpu_resource_config: enable_gpu: true # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 index_build_resources: # define the devices used for index building, must be in format: cpu or gpux build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0
core/src/cache/GpuCacheMgr.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ GpuCacheMgr::GpuCacheMgr() { Status s; int64_t gpu_cache_cap; s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); s = config.GetGpuResourceConfigCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading @@ -45,7 +45,7 @@ GpuCacheMgr::GpuCacheMgr() { cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32); float gpu_mem_threshold; s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold); s = config.GetGpuResourceConfigCacheThreshold(gpu_mem_threshold); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading
core/src/db/engine/ExecutionEngineImpl.cpp +11 −2 Original line number Diff line number Diff line Loading @@ -144,7 +144,14 @@ ExecutionEngineImpl::HybridLoad() const { } const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpus; Status s = config.GetGpuResourceConfigSearchResources(gpus); if (!s.ok()) { ENGINE_LOG_ERROR << s.message(); return; } // cache hit { Loading Loading @@ -578,7 +585,9 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { // TODO(linxj): remove. Status ExecutionEngineImpl::Init() { auto gpu_ids = scheduler::get_build_resources(); server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpu_ids; Status s = config.GetGpuResourceConfigBuildIndexResources(gpu_ids); for (auto id : gpu_ids) { if (gpu_num_ == id) { return Status::OK(); Loading
core/src/scheduler/SchedInst.cpp +8 −11 Original line number Diff line number Diff line Loading @@ -45,17 +45,6 @@ std::mutex BuildMgrInst::mutex_; void load_simple_config() { server::Config& config = server::Config::GetInstance(); std::string mode; config.GetResourceConfigMode(mode); std::vector<std::string> pool; config.GetResourceConfigSearchResources(pool); // get resources auto gpu_ids = get_gpu_pool(); auto build_gpu_ids = get_build_resources(); // create and connect ResMgrInst::GetInstance()->Add(ResourceFactory::Create("disk", "DISK", 0, true, false)); Loading @@ -63,6 +52,13 @@ load_simple_config() { ResMgrInst::GetInstance()->Add(ResourceFactory::Create("cpu", "CPU", 0, true, true)); ResMgrInst::GetInstance()->Connect("disk", "cpu", io); // get resources #ifdef MILVUS_GPU_VERSION server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpu_ids; config.GetGpuResourceConfigSearchResources(gpu_ids); std::vector<int32_t> build_gpu_ids; config.GetGpuResourceConfigBuildIndexResources(build_gpu_ids); auto pcie = Connection("pcie", 12000); std::vector<int64_t> not_find_build_ids; Loading @@ -89,6 +85,7 @@ load_simple_config() { ResourceFactory::Create(std::to_string(not_find_id), "GPU", not_find_id, true, true)); ResMgrInst::GetInstance()->Connect("cpu", std::to_string(not_find_id), pcie); } #endif } void Loading