Commit fddfd1eb authored by 蔡宇东's avatar 蔡宇东
Browse files

#346 update config APIs to support build index with multiple GPUs

parent a47b7284
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -27,7 +27,6 @@ metric_config:
    port: 8080                      # port prometheus uses to fetch metrics, must in range [1025, 65534]

cache_config:

  cpu_cache_capacity: 16            # GB, CPU memory used for cache, must be a positive integer
  cpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0]
  cache_insert_data: false          # whether to load inserted data into cache, must be a boolean
@@ -38,6 +37,7 @@ engine_config:
  gpu_search_threshold: 1000        # threshold beyond which the search computation is executed on GPUs only

resource_config:
  search_resources:                 # define the device used for search computation
  search_resources:                 # define the devices used for search computation, must be in format: cpu or gpux
    - cpu
  index_build_resources:            # define the devices used for index building, must be in format: cpu or gpux
    - cpu
  index_build_device: cpu           # CPU used for building index
+2 −1
Original line number Diff line number Diff line
@@ -42,4 +42,5 @@ resource_config:
  search_resources:                 # define the devices used for search computation, must be in format: cpu or gpux
    - cpu
    - gpu0
  index_build_device: gpu0          # CPU / GPU used for building index, must be in format: cpu or gpux
  index_build_resources:            # define the devices used for index building, must be in format: cpu or gpux
    - gpu0
 No newline at end of file
+46 −38
Original line number Diff line number Diff line
@@ -215,8 +215,8 @@ Config::ValidateConfig() {
        return s;
    }

    int32_t resource_index_build_device;
    s = GetResourceConfigIndexBuildDevice(resource_index_build_device);
    std::vector<std::string> index_build_resources;
    s = GetResourceConfigIndexBuildResources(index_build_resources);
    if (!s.ok()) {
        return s;
    }
@@ -351,7 +351,7 @@ Config::ResetDefaultConfig() {
        return s;
    }

    s = SetResourceConfigIndexBuildDevice(CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT);
    s = SetResourceConfigIndexBuildResources(CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT);
    if (!s.ok()) {
        return s;
    }
@@ -599,13 +599,18 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) {
        return Status(SERVER_INVALID_ARGUMENT, msg);
    } else {
        uint64_t gpu_cache_capacity = std::stoi(value) * GB;
        int device_id;
        Status s = GetResourceConfigIndexBuildDevice(device_id);
        std::vector<std::string> resources;
        Status s = GetResourceConfigIndexBuildResources(resources);
        if (!s.ok()) {
            return s;
        }

        size_t gpu_memory;
        for (auto& resource : resources) {
            if (resource == "cpu") {
                continue;
            }
            int32_t device_id = std::stoi(resource.substr(3));
            if (!ValidationUtil::GetGpuMemory(device_id, gpu_memory).ok()) {
                std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(device_id);
                return Status(SERVER_UNEXPECTED_ERROR, msg);
@@ -617,6 +622,7 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) {
                std::cerr << "Warning: gpu cache capacity value is too big" << std::endl;
            }
        }
    }
    return Status::OK();
}

@@ -745,11 +751,19 @@ Config::CheckResourceConfigSearchResources(const std::vector<std::string>& value
}

Status
Config::CheckResourceConfigIndexBuildDevice(const std::string& value) {
    auto status = CheckResource(value);
Config::CheckResourceConfigIndexBuildResources(const std::vector<std::string>& value) {
    if (value.empty()) {
        std::string msg =
            "Invalid build index resource. "
            "Possible reason: resource_config.build_index_resources is empty.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    for (auto& resource : value) {
        auto status = CheckResource(resource);
        if (!status.ok()) {
            return Status(SERVER_INVALID_ARGUMENT, status.message());
        }
    }
    return Status::OK();
}

@@ -1030,27 +1044,18 @@ Status
Config::GetResourceConfigSearchResources(std::vector<std::string>& value) {
    std::string str =
        GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_SEARCH_RESOURCES,
                             CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT);
    server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, value);
                             CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT);
    server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value);
    return CheckResourceConfigSearchResources(value);
}

Status
Config::GetResourceConfigIndexBuildDevice(int32_t& value) {
Config::GetResourceConfigIndexBuildResources(std::vector<std::string>& value) {
    std::string str =
        GetConfigStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT);
    Status s = CheckResourceConfigIndexBuildDevice(str);
    if (!s.ok()) {
        return s;
    }

    if (str == "cpu") {
        value = CPU_DEVICE_ID;
    } else {
        value = std::stoi(str.substr(3));
    }

    return Status::OK();
        GetConfigSequenceStr(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES,
                        CONFIG_RESOURCE_RESOURCES_DELIMITER, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT);
    server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_RESOURCE_RESOURCES_DELIMITER, value);
    return CheckResourceConfigIndexBuildResources(value);
}

///////////////////////////////////////////////////////////////////////////////
@@ -1305,7 +1310,7 @@ Config::SetResourceConfigMode(const std::string& value) {
Status
Config::SetResourceConfigSearchResources(const std::string& value) {
    std::vector<std::string> res_vec;
    server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER, res_vec);
    server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec);

    Status s = CheckResourceConfigSearchResources(res_vec);
    if (!s.ok()) {
@@ -1317,13 +1322,16 @@ Config::SetResourceConfigSearchResources(const std::string& value) {
}

Status
Config::SetResourceConfigIndexBuildDevice(const std::string& value) {
    Status s = CheckResourceConfigIndexBuildDevice(value);
Config::SetResourceConfigIndexBuildResources(const std::string &value) {
    std::vector<std::string> res_vec;
    server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_RESOURCE_RESOURCES_DELIMITER, res_vec);

    Status s = CheckResourceConfigIndexBuildResources(res_vec);
    if (!s.ok()) {
        return s;
    }

    SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_DEVICE, value);
    SetConfigValueInMem(CONFIG_RESOURCE, CONFIG_RESOURCE_INDEX_BUILD_RESOURCES, value);
    return Status::OK();
}

+7 −9
Original line number Diff line number Diff line
@@ -91,20 +91,18 @@ static const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT = "1000";
static const char* CONFIG_RESOURCE = "resource_config";
static const char* CONFIG_RESOURCE_MODE = "mode";
static const char* CONFIG_RESOURCE_MODE_DEFAULT = "simple";
static const char* CONFIG_RESOURCE_RESOURCES_DELIMITER = ",";
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES = "search_resources";
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DELIMITER = ",";

#ifdef MILVUS_CPU_VERSION
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu";
#else
static const char* CONFIG_RESOURCE_SEARCH_RESOURCES_DEFAULT = "cpu,gpu0";
#endif

static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE = "index_build_device";
static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES = "index_build_resources";
#ifdef MILVUS_CPU_VERSION
static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "cpu";
static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "cpu";
#else
static const char* CONFIG_RESOURCE_INDEX_BUILD_DEVICE_DEFAULT = "gpu0";
static const char* CONFIG_RESOURCE_INDEX_BUILD_RESOURCES_DEFAULT = "gpu0";
#endif
const int32_t CPU_DEVICE_ID = -1;

@@ -190,7 +188,7 @@ class Config {
    Status
    CheckResourceConfigSearchResources(const std::vector<std::string>& value);
    Status
    CheckResourceConfigIndexBuildDevice(const std::string& value);
    CheckResourceConfigIndexBuildResources(const std::vector<std::string>& value);

    std::string
    GetConfigStr(const std::string& parent_key, const std::string& child_key, const std::string& default_value = "");
@@ -259,7 +257,7 @@ class Config {
    Status
    GetResourceConfigSearchResources(std::vector<std::string>& value);
    Status
    GetResourceConfigIndexBuildDevice(int32_t& value);
    GetResourceConfigIndexBuildResources(std::vector<std::string>& value);

 public:
    /* server config */
@@ -320,7 +318,7 @@ class Config {
    Status
    SetResourceConfigSearchResources(const std::string& value);
    Status
    SetResourceConfigIndexBuildDevice(const std::string& value);
    SetResourceConfigIndexBuildResources(const std::string& value);

 private:
    std::unordered_map<std::string, std::unordered_map<std::string, std::string>> config_map_;
+2 −2
Original line number Diff line number Diff line
@@ -182,7 +182,7 @@ ValidationUtil::ValidatePartitionTags(const std::vector<std::string>& partition_
}

Status
ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) {
ValidationUtil::ValidateGpuIndex(int32_t gpu_index) {
#ifdef MILVUS_GPU_VERSION
    int num_devices = 0;
    auto cuda_err = cudaGetDeviceCount(&num_devices);
@@ -203,7 +203,7 @@ ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) {
}

Status
ValidationUtil::GetGpuMemory(uint32_t gpu_index, size_t& memory) {
ValidationUtil::GetGpuMemory(int32_t gpu_index, size_t& memory) {
#ifdef MILVUS_GPU_VERSION

    cudaDeviceProp deviceProp;
Loading