Commit 58c7a8cf authored by groot's avatar groot Committed by JinHai-CN
Browse files

optimize merge process (#2419)



* optimize merge process

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>

* typo

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>

* refine code

Signed-off-by: default avataryhmo <yihua.mo@zilliz.com>

* drop collecion issue

Signed-off-by: default avataryhmo <yihua.mo@zilliz.com>
parent ec342b4b
Loading
Loading
Loading
Loading
+4 −9
Original line number Diff line number Diff line
@@ -17,14 +17,6 @@

namespace milvus {
namespace engine {
namespace {
struct {
    bool
    operator()(meta::SegmentSchema& left, meta::SegmentSchema& right) const {
        return left.file_size_ > right.file_size_;
    }
} CompareSegment;
}  // namespace

Status
MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) {
@@ -54,7 +46,10 @@ MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesG
    }

    // arrange files by file size in descending order
    std::sort(sort_files.begin(), sort_files.end(), CompareSegment);
    std::sort(sort_files.begin(), sort_files.end(),
              [](const meta::SegmentSchema& left, const meta::SegmentSchema& right) {
                  return left.file_size_ > right.file_size_;
              });

    // pick files to merge
    int64_t index_file_size = sort_files[0].index_file_size_;
+30 −3
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include "db/meta/MetaConsts.h"
#include "utils/Log.h"

#include <algorithm>
#include <map>
#include <vector>

@@ -34,13 +35,39 @@ MergeLayeredStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGr
        {1UL << 30, meta::SegmentsSchema()},  // 1GB
    };

    meta::SegmentsSchema& files = files_holder.HoldFiles();
    meta::SegmentsSchema sort_files = files_holder.HoldFiles();
    // no need to merge single file
    if (sort_files.size() < 2) {
        return Status::OK();
    }

    // arrange files by file size in descending order
    std::sort(sort_files.begin(), sort_files.end(),
              [](const meta::SegmentSchema& left, const meta::SegmentSchema& right) {
                  return left.file_size_ > right.file_size_;
              });

    // priority pick files that merge size greater than index_file_size
    // to avoid big files such as index_file_size = 1024, merged file size = 1280
    int64_t index_file_size = sort_files[0].index_file_size_;
    size_t biggest_size = sort_files[0].file_size_;
    for (auto iter = sort_files.end() - 1; iter != sort_files.begin() + 1; --iter) {
        if ((*iter).file_size_ + biggest_size > index_file_size) {
            meta::SegmentsSchema temp_group = {*sort_files.begin(), *iter};
            files_groups.emplace_back(temp_group);
            sort_files.erase(iter);
            sort_files.erase(sort_files.begin());
            break;
        }
    }

    meta::SegmentsSchema huge_files;
    // iterater from end, because typically the files_holder get files in order from largest to smallest
    for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) {
    // put files to layers
    for (meta::SegmentsSchema::reverse_iterator iter = sort_files.rbegin(); iter != sort_files.rend(); ++iter) {
        meta::SegmentSchema& file = *iter;
        if (file.index_file_size_ > 0 && file.file_size_ > (size_t)(file.index_file_size_)) {
            // file that no need to merge
            files_holder.UnmarkFile(file);
            continue;
        }

+3 −0
Original line number Diff line number Diff line
@@ -76,6 +76,9 @@ DropCollectionRequest::OnExecute() {
            return status;
        }

        // step 4: flush to trigger CleanUpFilesWithTTL
        status = DBWrapper::DB()->Flush();

        rc.ElapseFromBegin("total cost");
    } catch (std::exception& ex) {
        return Status(SERVER_UNEXPECTED_ERROR, ex.what());