Loading CHANGELOG.md +2 −1 Original line number Diff line number Diff line Loading @@ -17,8 +17,9 @@ Please mark all change in change log and use the ticket from JIRA. ## Feature - \#12 - Pure CPU version for Milvus - \#77 - Support table partition - \#226 - Experimental shards middleware for Milvus - \#127 - Support new Index type IVFPQ - \#226 - Experimental shards middleware for Milvus - \#346 - Support build index with multiple gpu ## Improvement - \#275 - Rename C++ SDK IndexType Loading core/conf/server_cpu_config.template +8 −7 Original line number Diff line number Diff line Loading @@ -27,9 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -37,7 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the device used for search computation - cpu index_build_device: cpu # CPU used for building index gpu_resource_config: enable_gpu: false # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0 core/conf/server_gpu_config.template +7 −8 Original line number Diff line number Diff line Loading @@ -27,10 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -38,8 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu gpu_resource_config: enable_gpu: true # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0 index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux core/src/cache/GpuCacheMgr.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ GpuCacheMgr::GpuCacheMgr() { Status s; int64_t gpu_cache_cap; s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); s = config.GetGpuResourceConfigCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading @@ -45,7 +45,7 @@ GpuCacheMgr::GpuCacheMgr() { cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32); float gpu_mem_threshold; s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold); s = config.GetGpuResourceConfigCacheThreshold(gpu_mem_threshold); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading core/src/db/engine/ExecutionEngineImpl.cpp +17 −5 Original line number Diff line number Diff line Loading @@ -144,7 +144,14 @@ ExecutionEngineImpl::HybridLoad() const { } const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpus; Status s = config.GetGpuResourceConfigSearchResources(gpus); if (!s.ok()) { ENGINE_LOG_ERROR << s.message(); return; } // cache hit { Loading Loading @@ -355,6 +362,7 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { Status ExecutionEngineImpl::CopyToIndexFileToGpu(uint64_t device_id) { gpu_num_ = device_id; auto to_index_data = std::make_shared<ToIndexData>(PhysicalSize()); cache::DataObjPtr obj = std::static_pointer_cast<cache::DataObj>(to_index_data); milvus::cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(location_, obj); Loading Loading @@ -578,12 +586,16 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { Status ExecutionEngineImpl::Init() { server::Config& config = server::Config::GetInstance(); Status s = config.GetResourceConfigIndexBuildDevice(gpu_num_); if (!s.ok()) { return s; std::vector<int32_t> gpu_ids; Status s = config.GetGpuResourceConfigBuildIndexResources(gpu_ids); for (auto id : gpu_ids) { if (gpu_num_ == id) { return Status::OK(); } } return Status::OK(); std::string msg = "Invalid gpu_num"; return Status(SERVER_INVALID_ARGUMENT, msg); } } // namespace engine Loading Loading
CHANGELOG.md +2 −1 Original line number Diff line number Diff line Loading @@ -17,8 +17,9 @@ Please mark all change in change log and use the ticket from JIRA. ## Feature - \#12 - Pure CPU version for Milvus - \#77 - Support table partition - \#226 - Experimental shards middleware for Milvus - \#127 - Support new Index type IVFPQ - \#226 - Experimental shards middleware for Milvus - \#346 - Support build index with multiple gpu ## Improvement - \#275 - Rename C++ SDK IndexType Loading
core/conf/server_cpu_config.template +8 −7 Original line number Diff line number Diff line Loading @@ -27,9 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -37,7 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the device used for search computation - cpu index_build_device: cpu # CPU used for building index gpu_resource_config: enable_gpu: false # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0
core/conf/server_gpu_config.template +7 −8 Original line number Diff line number Diff line Loading @@ -27,10 +27,7 @@ metric_config: port: 8080 # port prometheus uses to fetch metrics, must in range [1025, 65534] cache_config: cpu_cache_capacity: 16 # GB, CPU memory used for cache, must be a positive integer cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] gpu_cache_capacity: 4 # GB, GPU memory used for cache, must be a positive integer gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0] cpu_cache_capacity: 16 # GB, size of CPU memory used for cache, must be a positive integer cache_insert_data: false # whether to load inserted data into cache, must be a boolean engine_config: Loading @@ -38,8 +35,10 @@ engine_config: # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times gpu_search_threshold: 1000 # threshold beyond which the search computation is executed on GPUs only resource_config: search_resources: # define the devices used for search computation, must be in format: cpu or gpux - cpu gpu_resource_config: enable_gpu: true # whether to enable GPU resources cache_capacity: 4 # GB, size of GPU memory per card used for cache, must be a positive integer search_resources: # define the GPU devices used for search computation, must be in format gpux - gpu0 build_index_resources: # define the GPU devices used for index building, must be in format gpux - gpu0 index_build_device: gpu0 # CPU / GPU used for building index, must be in format: cpu or gpux
core/src/cache/GpuCacheMgr.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -37,7 +37,7 @@ GpuCacheMgr::GpuCacheMgr() { Status s; int64_t gpu_cache_cap; s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap); s = config.GetGpuResourceConfigCacheCapacity(gpu_cache_cap); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading @@ -45,7 +45,7 @@ GpuCacheMgr::GpuCacheMgr() { cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32); float gpu_mem_threshold; s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold); s = config.GetGpuResourceConfigCacheThreshold(gpu_mem_threshold); if (!s.ok()) { SERVER_LOG_ERROR << s.message(); } Loading
core/src/db/engine/ExecutionEngineImpl.cpp +17 −5 Original line number Diff line number Diff line Loading @@ -144,7 +144,14 @@ ExecutionEngineImpl::HybridLoad() const { } const std::string key = location_ + ".quantizer"; std::vector<uint64_t> gpus = scheduler::get_gpu_pool(); server::Config& config = server::Config::GetInstance(); std::vector<int32_t> gpus; Status s = config.GetGpuResourceConfigSearchResources(gpus); if (!s.ok()) { ENGINE_LOG_ERROR << s.message(); return; } // cache hit { Loading Loading @@ -355,6 +362,7 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) { Status ExecutionEngineImpl::CopyToIndexFileToGpu(uint64_t device_id) { gpu_num_ = device_id; auto to_index_data = std::make_shared<ToIndexData>(PhysicalSize()); cache::DataObjPtr obj = std::static_pointer_cast<cache::DataObj>(to_index_data); milvus::cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(location_, obj); Loading Loading @@ -578,12 +586,16 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { Status ExecutionEngineImpl::Init() { server::Config& config = server::Config::GetInstance(); Status s = config.GetResourceConfigIndexBuildDevice(gpu_num_); if (!s.ok()) { return s; std::vector<int32_t> gpu_ids; Status s = config.GetGpuResourceConfigBuildIndexResources(gpu_ids); for (auto id : gpu_ids) { if (gpu_num_ == id) { return Status::OK(); } } return Status::OK(); std::string msg = "Invalid gpu_num"; return Status(SERVER_INVALID_ARGUMENT, msg); } } // namespace engine Loading