Unverified Commit 014a0e86 authored by groot's avatar groot Committed by GitHub
Browse files

QPS decrease (#2372)



* optimize merge strategy

Signed-off-by: default avataryhmo <yihua.mo@zilliz.com>

* #2365

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>

* fix typo

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>

* optimize search

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>

* code format

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>
parent b9e8acb5
Loading
Loading
Loading
Loading
+13 −6
Original line number Diff line number Diff line
@@ -1393,9 +1393,10 @@ DBImpl::CreateIndex(const std::shared_ptr<server::Context>& context, const std::
        }
    }

    // step 3: let merge file thread finish
    // to avoid duplicate data bug
    WaitMergeFileFinish();
    // step 3: wait merge file thread finished to avoid duplicate data bug
    WaitMergeFileFinish();  // let merge file thread finish
    StartMergeTask(true);   // start force-merge task
    WaitMergeFileFinish();  // let force-merge file thread finish

    // step 4: wait and build index
    status = index_failed_checker_.CleanFailedIndexFileOfCollection(collection_id);
@@ -1897,7 +1898,7 @@ DBImpl::StartMetricTask() {
}

void
DBImpl::StartMergeTask() {
DBImpl::StartMergeTask(bool force_merge_all) {
    // LOG_ENGINE_DEBUG_ << "Begin StartMergeTask";
    // merge task has been finished?
    {
@@ -1927,7 +1928,7 @@ DBImpl::StartMergeTask() {

            // start merge file thread
            merge_thread_results_.push_back(
                merge_thread_pool_.enqueue(&DBImpl::BackgroundMerge, this, merge_collection_ids_));
                merge_thread_pool_.enqueue(&DBImpl::BackgroundMerge, this, merge_collection_ids_, force_merge_all));
            merge_collection_ids_.clear();
        }
    }
@@ -2031,14 +2032,20 @@ DBImpl::MergeHybridFiles(const std::string& collection_id, meta::FilesHolder& fi
}

void
DBImpl::BackgroundMerge(std::set<std::string> collection_ids) {
DBImpl::BackgroundMerge(std::set<std::string> collection_ids, bool force_merge_all) {
    // LOG_ENGINE_TRACE_ << " Background merge thread start";

    Status status;
    for (auto& collection_id : collection_ids) {
        const std::lock_guard<std::mutex> lock(flush_merge_compact_mutex_);

        auto old_strategy = merge_mgr_ptr_->Strategy();
        if (force_merge_all) {
            merge_mgr_ptr_->UseStrategy(MergeStrategyType::ADAPTIVE);
        }

        auto status = merge_mgr_ptr_->MergeFiles(collection_id);
        merge_mgr_ptr_->UseStrategy(old_strategy);
        if (!status.ok()) {
            LOG_ENGINE_ERROR_ << "Failed to get merge files for collection: " << collection_id
                              << " reason:" << status.message();
+2 −2
Original line number Diff line number Diff line
@@ -228,10 +228,10 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi
    StartMetricTask();

    void
    StartMergeTask();
    StartMergeTask(bool force_merge_all = false);

    void
    BackgroundMerge(std::set<std::string> collection_ids);
    BackgroundMerge(std::set<std::string> collection_ids, bool force_merge_all);

    Status
    MergeHybridFiles(const std::string& table_id, meta::FilesHolder& files_holder);
+91 −0
Original line number Diff line number Diff line
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.

#include "db/merge/MergeAdaptiveStrategy.h"
#include "utils/Log.h"

#include <algorithm>
#include <vector>

namespace milvus {
namespace engine {
namespace {
struct {
    bool
    operator()(meta::SegmentSchema& left, meta::SegmentSchema& right) const {
        return left.file_size_ > right.file_size_;
    }
} CompareSegment;
}  // namespace

Status
MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) {
    meta::SegmentsSchema sort_files;
    meta::SegmentsSchema& files = files_holder.HoldFiles();
    for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) {
        meta::SegmentSchema& file = *iter;
        if (file.index_file_size_ > 0 && file.file_size_ > file.index_file_size_) {
            // file that no need to merge
            continue;
        }
        sort_files.push_back(file);
    }

    // no need to merge single file
    if (sort_files.size() < 2) {
        return Status::OK();
    }

    // two files, simply merge them
    if (sort_files.size() == 2) {
        files_groups.emplace_back(sort_files);
        return Status::OK();
    }

    // arrange files by file size in descending order
    std::sort(sort_files.begin(), sort_files.end(), CompareSegment);

    // pick files to merge
    int64_t index_file_size = sort_files[0].index_file_size_;
    while (true) {
        meta::SegmentsSchema temp_group;
        int64_t sum_size = 0;
        for (auto iter = sort_files.begin(); iter != sort_files.end();) {
            meta::SegmentSchema& file = *iter;
            if (sum_size + file.file_size_ <= index_file_size) {
                temp_group.push_back(file);
                sum_size += file.file_size_;
                iter = sort_files.erase(iter);
            } else {
                if ((iter + 1 == sort_files.end()) && sum_size < index_file_size) {
                    temp_group.push_back(file);
                    sort_files.erase(iter);
                    break;
                } else {
                    ++iter;
                }
            }
        }

        if (!temp_group.empty()) {
            files_groups.emplace_back(temp_group);
        }

        if (sort_files.empty()) {
            break;
        }
    }

    return Status::OK();
}

}  // namespace engine
}  // namespace milvus
+29 −0
Original line number Diff line number Diff line
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.

#pragma once

#include <vector>

#include "db/merge/MergeStrategy.h"
#include "utils/Status.h"

namespace milvus {
namespace engine {

class MergeAdaptiveStrategy : public MergeStrategy {
 public:
    Status
    RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) override;
};  // MergeSimpleStrategy

}  // namespace engine
}  // namespace milvus
+1 −2
Original line number Diff line number Diff line
@@ -40,8 +40,7 @@ MergeLayeredStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGr
    for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) {
        meta::SegmentSchema& file = *iter;
        if (file.index_file_size_ > 0 && file.file_size_ > (size_t)(file.index_file_size_)) {
            // release file that no need to merge
            files_holder.UnmarkFile(file);
            // file that no need to merge
            continue;
        }

Loading