Commit 10ee4d90 authored by groot's avatar groot Committed by JinHai-CN
Browse files

#2366 Reduce SQL execution times (#2383)



* #2366

Signed-off-by: default avataryhmo <yihua.mo@zilliz.com>

* fix ut

Signed-off-by: default avataryhmo <yihua.mo@zilliz.com>

* fix python test

Signed-off-by: default avataryhmo <yihua.mo@zilliz.com>

* fix ut

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>

* changelog

Signed-off-by: default avatargroot <yihua.mo@zilliz.com>
parent dfca26d3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ Please mark all change in change log and use the issue from GitHub
# Milvus 0.9.1 (TBD)

## Bug
-   \#2366 Reduce SQL execution times for collection contains lot of partitions
-   \#2378 Duplicate data after server restart
-   \#2399 The nlist set by the user may not take effect
-   \#2403 MySQL max_idle_time is 10 by default
+56 −0
Original line number Diff line number Diff line
@@ -393,6 +393,7 @@ DBImpl::PreloadCollection(const std::string& collection_id) {

    // step 1: get all collection files from parent collection
    meta::FilesHolder files_holder;
#if 0
    auto status = meta_ptr_->FilesToSearch(collection_id, files_holder);
    if (!status.ok()) {
        return status;
@@ -404,6 +405,25 @@ DBImpl::PreloadCollection(const std::string& collection_id) {
    for (auto& schema : partition_array) {
        status = meta_ptr_->FilesToSearch(schema.collection_id_, files_holder);
    }
#else
    auto status = meta_ptr_->FilesToSearch(collection_id, files_holder);
    if (!status.ok()) {
        return status;
    }

    std::vector<meta::CollectionSchema> partition_array;
    status = meta_ptr_->ShowPartitions(collection_id, partition_array);

    std::set<std::string> partition_ids;
    for (auto& schema : partition_array) {
        partition_ids.insert(schema.collection_id_);
    }

    status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder);
    if (!status.ok()) {
        return status;
    }
#endif

    int64_t size = 0;
    int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
@@ -1642,6 +1662,7 @@ DBImpl::Query(const std::shared_ptr<server::Context>& context, const std::string
    Status status;
    meta::FilesHolder files_holder;
    if (partition_tags.empty()) {
#if 0
        // no partition tag specified, means search in whole collection
        // get all collection files from parent collection
        status = meta_ptr_->FilesToSearch(collection_id, files_holder);
@@ -1654,11 +1675,33 @@ DBImpl::Query(const std::shared_ptr<server::Context>& context, const std::string
        for (auto& schema : partition_array) {
            status = meta_ptr_->FilesToSearch(schema.collection_id_, files_holder);
        }
#else
        // no partition tag specified, means search in whole collection
        // get files from root collection
        status = meta_ptr_->FilesToSearch(collection_id, files_holder);
        if (!status.ok()) {
            return status;
        }

        // get files from partitions
        std::set<std::string> partition_ids;
        std::vector<meta::CollectionSchema> partition_array;
        status = meta_ptr_->ShowPartitions(collection_id, partition_array);
        for (auto& id : partition_array) {
            partition_ids.insert(id.collection_id_);
        }

        status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder);
        if (!status.ok()) {
            return status;
        }
#endif

        if (files_holder.HoldFiles().empty()) {
            return Status::OK();  // no files to search
        }
    } else {
#if 0
        // get files from specified partitions
        std::set<std::string> partition_name_array;
        status = GetPartitionsByTags(collection_id, partition_tags, partition_name_array);
@@ -1669,7 +1712,20 @@ DBImpl::Query(const std::shared_ptr<server::Context>& context, const std::string
        for (auto& partition_name : partition_name_array) {
            status = meta_ptr_->FilesToSearch(partition_name, files_holder);
        }
#else
        std::set<std::string> partition_name_array;
        status = GetPartitionsByTags(collection_id, partition_tags, partition_name_array);
        if (!status.ok()) {
            return status;  // didn't match any partition.
        }

        std::set<std::string> partition_ids;
        for (auto& partition_name : partition_name_array) {
            partition_ids.insert(partition_name);
        }

        status = meta_ptr_->FilesToSearchEx(collection_id, partition_ids, files_holder);
#endif
        if (files_holder.HoldFiles().empty()) {
            return Status::OK();  // no files to search
        }
+4 −1
Original line number Diff line number Diff line
@@ -28,17 +28,20 @@ struct {

Status
MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) {
    meta::SegmentsSchema sort_files;
    meta::SegmentsSchema sort_files, ignore_files;
    meta::SegmentsSchema& files = files_holder.HoldFiles();
    for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) {
        meta::SegmentSchema& file = *iter;
        if (file.index_file_size_ > 0 && (int64_t)file.file_size_ > file.index_file_size_) {
            // file that no need to merge
            ignore_files.push_back(file);
            continue;
        }
        sort_files.push_back(file);
    }

    files_holder.UnmarkFiles(ignore_files);

    // no need to merge single file
    if (sort_files.size() < 2) {
        return Status::OK();
+3 −1
Original line number Diff line number Diff line
@@ -1714,7 +1714,7 @@ MySQLMetaImpl::FilesToSearchEx(const std::string& root_collection, const std::se
        // distribute id array to batchs
        const int64_t batch_size = 50;
        std::vector<std::vector<std::string>> id_groups;
        std::vector<std::string> temp_group = {root_collection};
        std::vector<std::string> temp_group;
        int64_t count = 1;
        for (auto& id : partition_id_array) {
            temp_group.push_back(id);
@@ -1739,6 +1739,8 @@ MySQLMetaImpl::FilesToSearchEx(const std::string& root_collection, const std::se
                mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);

                bool is_null_connection = (connectionPtr == nullptr);
                fiu_do_on("MySQLMetaImpl.FilesToSearch.null_connection", is_null_connection = true);
                fiu_do_on("MySQLMetaImpl.FilesToSearch.throw_exception", throw std::exception(););
                if (is_null_connection) {
                    return Status(DB_ERROR, "Failed to connect to meta server(mysql)");
                }
+2 −1
Original line number Diff line number Diff line
@@ -1135,6 +1135,7 @@ SqliteMetaImpl::FilesToSearchEx(const std::string& root_collection,
                                FilesHolder& files_holder) {
    try {
        server::MetricCollector metric;
        fiu_do_on("SqliteMetaImpl.FilesToSearch.throw_exception", throw std::exception());

        // get root collection information
        CollectionSchema collection_schema;
@@ -1147,7 +1148,7 @@ SqliteMetaImpl::FilesToSearchEx(const std::string& root_collection,
        // distribute id array to batchs
        const int64_t batch_size = 50;
        std::vector<std::vector<std::string>> id_groups;
        std::vector<std::string> temp_group = {root_collection};
        std::vector<std::string> temp_group;
        int64_t count = 1;
        for (auto& id : partition_id_array) {
            temp_group.push_back(id);