Unverified Commit 796750f6 authored by Jin Hai's avatar Jin Hai Committed by GitHub
Browse files

Merge pull request #414 from fishpenguin/0.6.0-yk-update-config

Support build index with multiple gpu and update config
parents 6d9ed166 c451f8cb
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -17,8 +17,9 @@ Please mark all change in change log and use the ticket from JIRA.
## Feature
- \#12 - Pure CPU version for Milvus
- \#77 - Support table partition
- \#226 - Experimental shards middleware for Milvus
- \#127 - Support new Index type IVFPQ
- \#226 - Experimental shards middleware for Milvus
- \#346 - Support build index with multiple gpu

## Improvement
- \#275 - Rename C++ SDK IndexType
+8 −7
Original line number Diff line number Diff line
@@ -27,9 +27,7 @@ metric_config:
    port: 8080                      # port prometheus uses to fetch metrics, must in range [1025, 65534]

cache_config:

  cpu_cache_capacity: 16            # GB, CPU memory used for cache, must be a positive integer
  cpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0]
  cpu_cache_capacity: 16            # GB, size of CPU memory used for cache, must be a positive integer
  cache_insert_data: false          # whether to load inserted data into cache, must be a boolean

engine_config:
@@ -37,7 +35,10 @@ engine_config:
                                    # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times
  gpu_search_threshold: 1000        # threshold beyond which the search computation is executed on GPUs only

resource_config:
  search_resources:                 # define the device used for search computation
    - cpu
  index_build_device: cpu           # CPU used for building index
gpu_resource_config:
  enable_gpu: false                 # whether to enable GPU resources
  cache_capacity: 4                 # GB, size of GPU memory per card used for cache, must be a positive integer
  search_resources:                 # define the GPU devices used for search computation, must be in format gpux
    - gpu0
  build_index_resources:            # define the GPU devices used for index building, must be in format gpux
    - gpu0
+7 −8
Original line number Diff line number Diff line
@@ -27,10 +27,7 @@ metric_config:
    port: 8080                      # port prometheus uses to fetch metrics, must in range [1025, 65534]

cache_config:
  cpu_cache_capacity: 16            # GB, CPU memory used for cache, must be a positive integer
  cpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0]
  gpu_cache_capacity: 4             # GB, GPU memory used for cache, must be a positive integer
  gpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0]
  cpu_cache_capacity: 16            # GB, size of CPU memory used for cache, must be a positive integer
  cache_insert_data: false          # whether to load inserted data into cache, must be a boolean

engine_config:
@@ -38,8 +35,10 @@ engine_config:
                                    # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times
  gpu_search_threshold: 1000        # threshold beyond which the search computation is executed on GPUs only

resource_config:
  search_resources:                 # define the devices used for search computation, must be in format: cpu or gpux
    - cpu
gpu_resource_config:
  enable_gpu: true                  # whether to enable GPU resources
  cache_capacity: 4                 # GB, size of GPU memory per card used for cache, must be a positive integer
  search_resources:                 # define the GPU devices used for search computation, must be in format gpux
    - gpu0
  build_index_resources:            # define the GPU devices used for index building, must be in format gpux
    - gpu0
  index_build_device: gpu0          # CPU / GPU used for building index, must be in format: cpu or gpux
+2 −2
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ GpuCacheMgr::GpuCacheMgr() {
    Status s;

    int64_t gpu_cache_cap;
    s = config.GetCacheConfigGpuCacheCapacity(gpu_cache_cap);
    s = config.GetGpuResourceConfigCacheCapacity(gpu_cache_cap);
    if (!s.ok()) {
        SERVER_LOG_ERROR << s.message();
    }
@@ -45,7 +45,7 @@ GpuCacheMgr::GpuCacheMgr() {
    cache_ = std::make_shared<Cache<DataObjPtr>>(cap, 1UL << 32);

    float gpu_mem_threshold;
    s = config.GetCacheConfigGpuCacheThreshold(gpu_mem_threshold);
    s = config.GetGpuResourceConfigCacheThreshold(gpu_mem_threshold);
    if (!s.ok()) {
        SERVER_LOG_ERROR << s.message();
    }
+17 −5
Original line number Diff line number Diff line
@@ -144,7 +144,14 @@ ExecutionEngineImpl::HybridLoad() const {
    }

    const std::string key = location_ + ".quantizer";
    std::vector<uint64_t> gpus = scheduler::get_gpu_pool();

    server::Config& config = server::Config::GetInstance();
    std::vector<int32_t> gpus;
    Status s = config.GetGpuResourceConfigSearchResources(gpus);
    if (!s.ok()) {
        ENGINE_LOG_ERROR << s.message();
        return;
    }

    // cache hit
    {
@@ -355,6 +362,7 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {

Status
ExecutionEngineImpl::CopyToIndexFileToGpu(uint64_t device_id) {
    gpu_num_ = device_id;
    auto to_index_data = std::make_shared<ToIndexData>(PhysicalSize());
    cache::DataObjPtr obj = std::static_pointer_cast<cache::DataObj>(to_index_data);
    milvus::cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(location_, obj);
@@ -578,12 +586,16 @@ ExecutionEngineImpl::GpuCache(uint64_t gpu_id) {
Status
ExecutionEngineImpl::Init() {
    server::Config& config = server::Config::GetInstance();
    Status s = config.GetResourceConfigIndexBuildDevice(gpu_num_);
    if (!s.ok()) {
        return s;
    std::vector<int32_t> gpu_ids;
    Status s = config.GetGpuResourceConfigBuildIndexResources(gpu_ids);
    for (auto id : gpu_ids) {
        if (gpu_num_ == id) {
            return Status::OK();
        }
    }

    return Status::OK();
    std::string msg = "Invalid gpu_num";
    return Status(SERVER_INVALID_ARGUMENT, msg);
}

}  // namespace engine
Loading