Commit ea35f43f authored by Jin Hai's avatar Jin Hai Committed by GitHub
Browse files

Merge pull request #121 from yhmo/0.5.1

#90 The server start error messages could be improved to enhance user experience

Former-commit-id: 5905807e0aa0ae8cc34b5a7c216a0c4ad13a34a4
parents d4fe7607 d807ec6c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.5.1 (TODO)

## Bug
- \#90 - The server start error messages could be improved to enhance user experience
- \#104 - test_scheduler core dump

## Improvement
+12 −12
Original line number Diff line number Diff line
@@ -2,9 +2,9 @@

server_config:
  address: 0.0.0.0                  # milvus server ip address (IPv4)
  port: 19530                       # port range: 1025 ~ 65534
  port: 19530                       # milvus server port, must in range [1025, 65534]
  deploy_mode: single               # deployment type: single, cluster_readonly, cluster_writable
  time_zone: UTC+8
  time_zone: UTC+8                  # time zone, must be in format: UTC+X

db_config:
  primary_path: @MILVUS_DB_PATH@    # path used to store data and meta
@@ -14,30 +14,30 @@ db_config:
                                    # Keep 'dialect://:@:/', and replace other texts with real values
                                    # Replace 'dialect' with 'mysql' or 'sqlite'

  insert_buffer_size: 4             # GB, maximum insert buffer size allowed
  insert_buffer_size: 4             # GB, maximum insert buffer size allowed, must be a positive integer
                                    # sum of insert_buffer_size and cpu_cache_capacity cannot exceed total memory

  preload_table:                    # preload data at startup, '*' means load all tables, empty value means no preload
                                    # you can specify preload tables like this: table1,table2,table3

metric_config:
  enable_monitor: false             # enable monitoring or not
  enable_monitor: false             # enable monitoring or not, must be a boolean
  collector: prometheus             # prometheus
  prometheus_config:
    port: 8080                      # port prometheus uses to fetch metrics
    port: 8080                      # port prometheus uses to fetch metrics, must in range [1025, 65534]

cache_config:
  cpu_cache_capacity: 16            # GB, CPU memory used for cache
  cpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered
  gpu_cache_capacity: 4             # GB, GPU memory used for cache
  gpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered
  cache_insert_data: false          # whether to load inserted data into cache
  cpu_cache_capacity: 16            # GB, CPU memory used for cache, must be a positive integer
  cpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0]
  gpu_cache_capacity: 4             # GB, GPU memory used for cache, must be a positive integer
  gpu_cache_threshold: 0.85         # percentage of data that will be kept when cache cleanup is triggered, must be in range (0.0, 1.0]
  cache_insert_data: false          # whether to load inserted data into cache, must be a boolean

engine_config:
  use_blas_threshold: 20            # if nq <  use_blas_threshold, use SSE, faster with fluctuated response times
                                    # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times

resource_config:
  search_resources:                 # define the GPUs used for search computation, valid value: gpux
  search_resources:                 # define the GPUs used for search computation, must be in format: gpux
    - gpu0
  index_build_device: gpu0          # GPU used for building index
 No newline at end of file
  index_build_device: gpu0          # GPU used for building index, must be in format: gpux
 No newline at end of file
+1 −1
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "Job.h"
#include "scheduler/job/Job.h"

namespace milvus {
namespace scheduler {
+115 −42
Original line number Diff line number Diff line
@@ -363,7 +363,9 @@ Config::PrintAll() {
Status
Config::CheckServerConfigAddress(const std::string& value) {
    if (!ValidationUtil::ValidateIpAddress(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid server config address: " + value);
        std::string msg =
            "Invalid server IP address: " + value + ". Possible reason: server_config.address is invalid.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -371,11 +373,14 @@ Config::CheckServerConfigAddress(const std::string& value) {
Status
Config::CheckServerConfigPort(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid server config port: " + value);
        std::string msg = "Invalid server port: " + value + ". Possible reason: server_config.port is not a number.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    } else {
        int32_t port = std::stoi(value);
        if (!(port > 1024 && port < 65535)) {
            return Status(SERVER_INVALID_ARGUMENT, "Server config port out of range (1024, 65535): " + value);
            std::string msg = "Invalid server port: " + value +
                              ". Possible reason: server_config.port is not in range [1025, 65534].";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }
    }
    return Status::OK();
@@ -385,7 +390,8 @@ Status
Config::CheckServerConfigDeployMode(const std::string& value) {
    if (value != "single" && value != "cluster_readonly" && value != "cluster_writable") {
        return Status(SERVER_INVALID_ARGUMENT,
                      "Invalid server config mode [single, cluster_readonly, cluster_writable]: " + value);
                      "server_config.deploy_mode is not one of "
                      "single, cluster_readonly, and cluster_writable.");
    }
    return Status::OK();
}
@@ -393,15 +399,15 @@ Config::CheckServerConfigDeployMode(const std::string& value) {
Status
Config::CheckServerConfigTimeZone(const std::string& value) {
    if (value.length() <= 3) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value);
        return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value);
    } else {
        if (value.substr(0, 3) != "UTC") {
            return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value);
            return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value);
        } else {
            try {
                stoi(value.substr(3));
            } catch (...) {
                return Status(SERVER_INVALID_ARGUMENT, "Invalid server config time_zone: " + value);
                return Status(SERVER_INVALID_ARGUMENT, "Invalid server_config.time_zone: " + value);
            }
        }
    }
@@ -411,7 +417,7 @@ Config::CheckServerConfigTimeZone(const std::string& value) {
Status
Config::CheckDBConfigPrimaryPath(const std::string& value) {
    if (value.empty()) {
        return Status(SERVER_INVALID_ARGUMENT, "DB config primary_path empty");
        return Status(SERVER_INVALID_ARGUMENT, "db_config.db_path is empty.");
    }
    return Status::OK();
}
@@ -424,7 +430,10 @@ Config::CheckDBConfigSecondaryPath(const std::string& value) {
Status
Config::CheckDBConfigBackendUrl(const std::string& value) {
    if (!ValidationUtil::ValidateDbURI(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config backend_url: " + value);
        std::string msg =
            "Invalid backend url: " + value + ". Possible reason: db_config.db_backend_url is invalid. " +
            "The correct format should be like sqlite://:@:/ or mysql://root:123456@127.0.0.1:3306/milvus.";
        return Status(SERVER_INVALID_ARGUMENT, "invalid db_backend_url: " + value);
    }
    return Status::OK();
}
@@ -432,7 +441,9 @@ Config::CheckDBConfigBackendUrl(const std::string& value) {
Status
Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_disk_threshold: " + value);
        std::string msg = "Invalid archive disk threshold: " + value +
                          ". Possible reason: db_config.archive_disk_threshold is invalid.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -440,7 +451,9 @@ Config::CheckDBConfigArchiveDiskThreshold(const std::string& value) {
Status
Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config archive_days_threshold: " + value);
        std::string msg = "Invalid archive days threshold: " + value +
                          ". Possible reason: db_config.archive_disk_threshold is invalid.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -448,13 +461,23 @@ Config::CheckDBConfigArchiveDaysThreshold(const std::string& value) {
Status
Config::CheckDBConfigInsertBufferSize(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid DB config insert_buffer_size: " + value);
        std::string msg = "Invalid insert buffer size: " + value +
                          ". Possible reason: db_config.insert_buffer_size is not a positive integer.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    } else {
        int64_t buffer_size = std::stoi(value) * GB;
        if (buffer_size <= 0) {
            std::string msg = "Invalid insert buffer size: " + value +
                              ". Possible reason: db_config.insert_buffer_size is not a positive integer.";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }

        uint64_t total_mem = 0, free_mem = 0;
        CommonUtil::GetSystemMemInfo(total_mem, free_mem);
        if (buffer_size >= total_mem) {
            return Status(SERVER_INVALID_ARGUMENT, "DB config insert_buffer_size exceed system memory: " + value);
            std::string msg = "Invalid insert buffer size: " + value +
                              ". Possible reason: db_config.insert_buffer_size exceeds system memory.";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }
    }
    return Status::OK();
@@ -463,7 +486,9 @@ Config::CheckDBConfigInsertBufferSize(const std::string& value) {
Status
Config::CheckMetricConfigEnableMonitor(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsBool(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config auto_bootup: " + value);
        std::string msg =
            "Invalid metric config: " + value + ". Possible reason: metric_config.enable_monitor is not a boolean.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -471,7 +496,9 @@ Config::CheckMetricConfigEnableMonitor(const std::string& value) {
Status
Config::CheckMetricConfigCollector(const std::string& value) {
    if (value != "prometheus") {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config collector: " + value);
        std::string msg =
            "Invalid metric collector: " + value + ". Possible reason: metric_config.collector is invalid.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -479,6 +506,8 @@ Config::CheckMetricConfigCollector(const std::string& value) {
Status
Config::CheckMetricConfigPrometheusPort(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        std::string msg = "Invalid metric port: " + value +
                          ". Possible reason: metric_config.prometheus_config.port is not in range [1025, 65534].";
        return Status(SERVER_INVALID_ARGUMENT, "Invalid metric config prometheus_port: " + value);
    }
    return Status::OK();
@@ -487,15 +516,25 @@ Config::CheckMetricConfigPrometheusPort(const std::string& value) {
Status
Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_capacity: " + value);
        std::string msg = "Invalid cpu cache capacity: " + value +
                          ". Possible reason: cache_config.cpu_cache_capacity is not a positive integer.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    } else {
        uint64_t cpu_cache_capacity = std::stoi(value) * GB;
        int64_t cpu_cache_capacity = std::stoi(value) * GB;
        if (cpu_cache_capacity <= 0) {
            std::string msg = "Invalid cpu cache capacity: " + value +
                              ". Possible reason: cache_config.cpu_cache_capacity is not a positive integer.";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }

        uint64_t total_mem = 0, free_mem = 0;
        CommonUtil::GetSystemMemInfo(total_mem, free_mem);
        if (cpu_cache_capacity >= total_mem) {
            return Status(SERVER_INVALID_ARGUMENT, "Cache config cpu_cache_capacity exceed system memory: " + value);
        } else if (cpu_cache_capacity > static_cast<double>(total_mem * 0.9)) {
            std::cerr << "Warning: cpu_cache_capacity value is too big" << std::endl;
        if (static_cast<uint64_t>(cpu_cache_capacity) >= total_mem) {
            std::string msg = "Invalid cpu cache capacity: " + value +
                              ". Possible reason: cache_config.cpu_cache_capacity exceeds system memory.";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        } else if (static_cast<double>(cpu_cache_capacity) > static_cast<double>(total_mem * 0.9)) {
            std::cerr << "WARNING: cpu cache capacity value is too big" << std::endl;
        }

        int32_t buffer_value;
@@ -506,7 +545,10 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) {

        int64_t insert_buffer_size = buffer_value * GB;
        if (insert_buffer_size + cpu_cache_capacity >= total_mem) {
            return Status(SERVER_INVALID_ARGUMENT, "Sum of cpu_cache_capacity and buffer_size exceed system memory");
            std::string msg = "Invalid cpu cache capacity: " + value +
                              ". Possible reason: sum of cache_config.cpu_cache_capacity and "
                              "db_config.insert_buffer_size exceeds system memory.";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }
    }
    return Status::OK();
@@ -515,11 +557,15 @@ Config::CheckCacheConfigCpuCacheCapacity(const std::string& value) {
Status
Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsFloat(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value);
        std::string msg = "Invalid cpu cache threshold: " + value +
                          ". Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0].";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    } else {
        float cpu_cache_threshold = std::stof(value);
        if (cpu_cache_threshold <= 0.0 || cpu_cache_threshold >= 1.0) {
            return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cpu_cache_threshold: " + value);
            std::string msg = "Invalid cpu cache threshold: " + value +
                              ". Possible reason: cache_config.cpu_cache_threshold is not in range (0.0, 1.0].";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }
    }
    return Status::OK();
@@ -528,7 +574,9 @@ Config::CheckCacheConfigCpuCacheThreshold(const std::string& value) {
Status
Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_capacity: " + value);
        std::string msg = "Invalid gpu cache capacity: " + value +
                          ". Possible reason: cache_config.gpu_cache_capacity is not a positive integer.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    } else {
        uint64_t gpu_cache_capacity = std::stoi(value) * GB;
        int gpu_index;
@@ -539,13 +587,14 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) {

        size_t gpu_memory;
        if (!ValidationUtil::GetGpuMemory(gpu_index, gpu_memory).ok()) {
            return Status(SERVER_UNEXPECTED_ERROR,
                          "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index));
            std::string msg = "Fail to get GPU memory for GPU device: " + std::to_string(gpu_index);
            return Status(SERVER_UNEXPECTED_ERROR, msg);
        } else if (gpu_cache_capacity >= gpu_memory) {
            return Status(SERVER_INVALID_ARGUMENT,
                          "Cache config gpu_cache_capacity exceed GPU memory: " + std::to_string(gpu_memory));
            std::string msg = "Invalid gpu cache capacity: " + value +
                              ". Possible reason: cache_config.gpu_cache_capacity exceeds GPU memory.";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        } else if (gpu_cache_capacity > (double)gpu_memory * 0.9) {
            std::cerr << "Warning: gpu_cache_capacity value is too big" << std::endl;
            std::cerr << "Warning: gpu cache capacity value is too big" << std::endl;
        }
    }
    return Status::OK();
@@ -554,11 +603,15 @@ Config::CheckCacheConfigGpuCacheCapacity(const std::string& value) {
Status
Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsFloat(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value);
        std::string msg = "Invalid gpu cache threshold: " + value +
                          ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0].";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    } else {
        float gpu_cache_threshold = std::stof(value);
        if (gpu_cache_threshold <= 0.0 || gpu_cache_threshold >= 1.0) {
            return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config gpu_cache_threshold: " + value);
            std::string msg = "Invalid gpu cache threshold: " + value +
                              ". Possible reason: cache_config.gpu_cache_threshold is not in range (0.0, 1.0].";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }
    }
    return Status::OK();
@@ -567,7 +620,9 @@ Config::CheckCacheConfigGpuCacheThreshold(const std::string& value) {
Status
Config::CheckCacheConfigCacheInsertData(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsBool(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid cache config cache_insert_data: " + value);
        std::string msg = "Invalid cache insert option: " + value +
                          ". Possible reason: cache_config.cache_insert_data is not a boolean.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -575,7 +630,9 @@ Config::CheckCacheConfigCacheInsertData(const std::string& value) {
Status
Config::CheckEngineConfigUseBlasThreshold(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config use_blas_threshold: " + value);
        std::string msg = "Invalid blas threshold: " + value +
                          ". Possible reason: engine_config.use_blas_threshold is not a positive integer.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -583,14 +640,18 @@ Config::CheckEngineConfigUseBlasThreshold(const std::string& value) {
Status
Config::CheckEngineConfigOmpThreadNum(const std::string& value) {
    if (!ValidationUtil::ValidateStringIsNumber(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value);
        std::string msg = "Invalid omp thread number: " + value +
                          ". Possible reason: engine_config.omp_thread_num is not a positive integer.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }

    int32_t omp_thread = std::stoi(value);
    uint32_t sys_thread_cnt = 8;
    CommonUtil::GetSystemAvailableThreads(sys_thread_cnt);
    if (omp_thread > static_cast<int32_t>(sys_thread_cnt)) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid engine config omp_thread_num: " + value);
        std::string msg = "Invalid omp thread number: " + value +
                          ". Possible reason: engine_config.omp_thread_num exceeds system cpu cores.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -598,7 +659,8 @@ Config::CheckEngineConfigOmpThreadNum(const std::string& value) {
Status
Config::CheckResourceConfigMode(const std::string& value) {
    if (value != "simple") {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config mode: " + value);
        std::string msg = "Invalid resource mode: " + value + ". Possible reason: resource_config.mode is invalid.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -608,12 +670,16 @@ CheckGpuDevice(const std::string& value) {
    const std::regex pat("gpu(\\d+)");
    std::cmatch m;
    if (!std::regex_match(value.c_str(), m, pat)) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value);
        std::string msg = "Invalid gpu device: " + value +
                          ". Possible reason: resource_config.search_resources does not match your hardware.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }

    int32_t gpu_index = std::stoi(value.substr(3));
    if (!ValidationUtil::ValidateGpuIndex(gpu_index).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid gpu device: " + value);
        std::string msg = "Invalid gpu device: " + value +
                          ". Possible reason: resource_config.search_resources does not match your hardware.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
@@ -621,12 +687,17 @@ CheckGpuDevice(const std::string& value) {
Status
Config::CheckResourceConfigSearchResources(const std::vector<std::string>& value) {
    if (value.empty()) {
        return Status(SERVER_INVALID_ARGUMENT, "Empty resource config search_resources");
        std::string msg =
            "Invalid search resource. "
            "Possible reason: resource_config.search_resources is empty.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }

    for (auto& gpu_device : value) {
        if (!CheckGpuDevice(gpu_device).ok()) {
            return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config search_resources: " + gpu_device);
            std::string msg = "Invalid search resource: " + gpu_device +
                              ". Possible reason: resource_config.search_resources does not match your hardware.";
            return Status(SERVER_INVALID_ARGUMENT, msg);
        }
    }
    return Status::OK();
@@ -635,7 +706,9 @@ Config::CheckResourceConfigSearchResources(const std::vector<std::string>& value
Status
Config::CheckResourceConfigIndexBuildDevice(const std::string& value) {
    if (!CheckGpuDevice(value).ok()) {
        return Status(SERVER_INVALID_ARGUMENT, "Invalid resource config index_build_device: " + value);
        std::string msg = "Invalid index build device: " + value +
                          ". Possible reason: resource_config.index_build_device does not match your hardware.";
        return Status(SERVER_INVALID_ARGUMENT, msg);
    }
    return Status::OK();
}
+24 −9

File changed.

Preview size limit exceeded, changes collapsed.

Loading