Commit 5469d526 authored by groot's avatar groot
Browse files

Merge remote-tracking branch 'source/branch-0.5.0' into branch-0.5.0


Former-commit-id: 7201afdddcc337ec6bb655e2b771617d86e5cc92
parents d570972e aa8ab2e8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-644 - Search crashed with index-type: flat
- MS-624 - Search vectors failed if time ranges long enough
- MS-652 - IVFSQH quantization double free
- MS-605 - Server going down during searching vectors
- MS-654 - Describe index timeout when building index

## Improvement
+7 −1
Original line number Diff line number Diff line
@@ -509,7 +509,13 @@ DBImpl::StartMetricTask() {
    server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
    int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
    int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
    server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage * 100 / cache_total);
    if (cache_total > 0) {
        double cache_usage_double = cache_usage;
        server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total);
    } else {
        server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0);
    }

    server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
    uint64_t size;
    Size(size);
+1 −2
Original line number Diff line number Diff line
@@ -80,8 +80,7 @@ class ExecutionEngine {
    Merge(const std::string& location) = 0;

    virtual Status
    Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
           bool hybrid) const = 0;
    Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels, bool hybrid) = 0;

    virtual std::shared_ptr<ExecutionEngine>
    BuildIndex(const std::string& location, EngineType engine_type) = 0;
+55 −21
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#include <utility>
#include <vector>

//#define ON_SEARCH
namespace milvus {
namespace engine {

@@ -248,26 +249,6 @@ ExecutionEngineImpl::Load(bool to_cache) {
Status
ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
    if (hybrid) {
        auto key = location_ + ".quantizer";
        auto quantizer =
            std::static_pointer_cast<CachedQuantizer>(cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(key));

        auto conf = std::make_shared<knowhere::QuantizerCfg>();
        conf->gpu_id = device_id;

        if (quantizer) {
            // cache hit
            conf->mode = 2;
            auto new_index = index_->LoadData(quantizer->Data(), conf);
            index_ = new_index;
        } else {
            auto pair = index_->CopyToGpuWithQuantizer(device_id);
            index_ = pair.first;

            // cache
            auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
            cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
        }
        return Status::OK();
    }

@@ -415,7 +396,60 @@ ExecutionEngineImpl::BuildIndex(const std::string& location, EngineType engine_t

Status
ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
                            bool hybrid) const {
                            bool hybrid) {
    if (index_type_ == EngineType::FAISS_IVFSQ8H) {
        if (!hybrid) {
            const std::string key = location_ + ".quantizer";
            std::vector<uint64_t> gpus = scheduler::get_gpu_pool();

            const int64_t NOT_FOUND = -1;
            int64_t device_id = NOT_FOUND;

            // cache hit
            {
                knowhere::QuantizerPtr quantizer = nullptr;

                for (auto& gpu : gpus) {
                    auto cache = cache::GpuCacheMgr::GetInstance(gpu);
                    if (auto cached_quantizer = cache->GetIndex(key)) {
                        device_id = gpu;
                        quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data();
                    }
                }

                if (device_id != NOT_FOUND) {
                    // cache hit
                    auto config = std::make_shared<knowhere::QuantizerCfg>();
                    config->gpu_id = device_id;
                    config->mode = 2;
                    auto new_index = index_->LoadData(quantizer, config);
                    index_ = new_index;
                }
            }

            if (device_id == NOT_FOUND) {
                // cache miss
                std::vector<int64_t> all_free_mem;
                for (auto& gpu : gpus) {
                    auto cache = cache::GpuCacheMgr::GetInstance(gpu);
                    auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
                    all_free_mem.push_back(free_mem);
                }

                auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
                auto best_index = std::distance(all_free_mem.begin(), max_e);
                device_id = gpus[best_index];

                auto pair = index_->CopyToGpuWithQuantizer(device_id);
                index_ = pair.first;

                // cache
                auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
                cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
            }
        }
    }

    if (index_ == nullptr) {
        ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to search";
        return Status(DB_ERROR, "index is null");
+1 −1
Original line number Diff line number Diff line
@@ -72,7 +72,7 @@ class ExecutionEngineImpl : public ExecutionEngine {

    Status
    Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
           bool hybrid = false) const override;
           bool hybrid = false) override;

    ExecutionEnginePtr
    BuildIndex(const std::string& location, EngineType engine_type) override;
Loading