Commit 0895f787 authored by peng.xu's avatar peng.xu
Browse files

Merge branch 'branch-0.5.0' into 'branch-0.5.0'

MS-624 Search vectors failed if time ranges long enough

See merge request megasearch/milvus!712

Former-commit-id: db0a8669faabbabecdc01d6791e9bb708b416757
parents c28a9ebe e1d60965
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -9,14 +9,15 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-572 - Milvus crash when get SIGINT
- MS-577 - Unittest Query randomly hung
- MS-587 - Count get wrong result after adding vectors and index built immediately
- MS-599 - search wrong result when table created with metric_type: IP
- MS-599 - Search wrong result when table created with metric_type: IP
- MS-601 - Docker logs error caused by get CPUTemperature error
- MS-622 - Delete vectors should be failed if date range is invalid
- MS-620 - Get table row counts display wrong error code
- MS-637 - out of memory when load too many tasks
- MS-637 - Out of memory when load too many tasks
- MS-640 - Cache object size calculate incorrect
- MS-641 - Segment fault(signal 11) in PickToLoad
- MS-639 - SQ8H index created failed and server hang
- MS-624 - Search vectors failed if time ranges long enough

## Improvement
- MS-552 - Add and change the easylogging library
+1 −1
Original line number Diff line number Diff line
@@ -81,7 +81,7 @@ class Meta {
    UpdateTableFiles(TableFilesSchema& files) = 0;

    virtual Status
    FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& partition,
    FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
                  DatePartionedTableFilesSchema& files) = 0;

    virtual Status
+3 −3
Original line number Diff line number Diff line
@@ -1088,7 +1088,7 @@ MySQLMetaImpl::FilesToIndex(TableFilesSchema& files) {
}

Status
MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& partition,
MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
                             DatePartionedTableFilesSchema& files) {
    files.clear();

@@ -1108,9 +1108,9 @@ MySQLMetaImpl::FilesToSearch(const std::string& table_id, const std::vector<size
                << META_TABLEFILES << " "
                << "WHERE table_id = " << mysqlpp::quote << table_id;

            if (!partition.empty()) {
            if (!dates.empty()) {
                std::stringstream partitionListSS;
                for (auto& date : partition) {
                for (auto& date : dates) {
                    partitionListSS << std::to_string(date) << ", ";
                }
                std::string partitionListStr = partitionListSS.str();
+1 −1
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ class MySQLMetaImpl : public Meta {
    UpdateTableFiles(TableFilesSchema& files) override;

    Status
    FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& partition,
    FilesToSearch(const std::string& table_id, const std::vector<size_t>& ids, const DatesT& dates,
                  DatePartionedTableFilesSchema& files) override;

    Status
+70 −20
Original line number Diff line number Diff line
@@ -159,7 +159,7 @@ SqliteMetaImpl::Initialize() {
Status
SqliteMetaImpl::DropPartitionsByDates(const std::string &table_id,
                                      const DatesT &dates) {
    if (dates.size() == 0) {
    if (dates.empty()) {
        return Status::OK();
    }

@@ -171,16 +171,35 @@ SqliteMetaImpl::DropPartitionsByDates(const std::string &table_id,
    }

    try {
        //sqlite_orm has a bug, 'in' statement cannot handle too many elements
        //so we split one query into multi-queries, this is a work-around!!
        std::vector<DatesT> split_dates;
        split_dates.push_back(DatesT());
        const size_t batch_size = 30;
        for(DateT date : dates) {
            DatesT& last_batch = *split_dates.rbegin();
            last_batch.push_back(date);
            if(last_batch.size() > batch_size) {
                split_dates.push_back(DatesT());
            }
        }

        //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here
        std::lock_guard<std::mutex> meta_lock(meta_mutex_);

        for(auto& batch_dates : split_dates) {
            if(batch_dates.empty()) {
                continue;
            }

            ConnectorPtr->update_all(
                set(
                    c(&TableFileSchema::file_type_) = (int)TableFileSchema::TO_DELETE,
                    c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()),
                where(
                    c(&TableFileSchema::table_id_) == table_id and
                    in(&TableFileSchema::date_, dates)));
                    in(&TableFileSchema::date_, batch_dates)));
        }

        ENGINE_LOG_DEBUG << "Successfully drop partitions, table id = " << table_schema.table_id_;
    } catch (std::exception &e) {
@@ -673,7 +692,7 @@ SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) {
Status
SqliteMetaImpl::FilesToSearch(const std::string &table_id,
                              const std::vector<size_t> &ids,
                              const DatesT &partition,
                              const DatesT &dates,
                              DatePartionedTableFilesSchema &files) {
    files.clear();
    server::MetricCollector metric;
@@ -702,23 +721,54 @@ SqliteMetaImpl::FilesToSearch(const std::string &table_id,
        auto status = DescribeTable(table_schema);
        if (!status.ok()) { return status; }

        //sqlite_orm has a bug, 'in' statement cannot handle too many elements
        //so we split one query into multi-queries, this is a work-around!!
        std::vector<DatesT> split_dates;
        split_dates.push_back(DatesT());
        const size_t batch_size = 30;
        for(DateT date : dates) {
            DatesT& last_batch = *split_dates.rbegin();
            last_batch.push_back(date);
            if(last_batch.size() > batch_size) {
                split_dates.push_back(DatesT());
            }
        }

        //perform query
        decltype(ConnectorPtr->select(select_columns)) selected;
        if (partition.empty() && ids.empty()) {
        if (dates.empty() && ids.empty()) {
            auto filter = where(match_tableid and match_type);
            selected = ConnectorPtr->select(select_columns, filter);
        } else if (partition.empty() && !ids.empty()) {
        } else if (dates.empty() && !ids.empty()) {
            auto match_fileid = in(&TableFileSchema::id_, ids);
            auto filter = where(match_tableid and match_fileid and match_type);
            selected = ConnectorPtr->select(select_columns, filter);
        } else if (!partition.empty() && ids.empty()) {
            auto match_date = in(&TableFileSchema::date_, partition);
        } else if (!dates.empty() && ids.empty()) {
            for(auto& batch_dates : split_dates) {
                if(batch_dates.empty()) {
                    continue;
                }
                auto match_date = in(&TableFileSchema::date_, batch_dates);
                auto filter = where(match_tableid and match_date and match_type);
            selected = ConnectorPtr->select(select_columns, filter);
        } else if (!partition.empty() && !ids.empty()) {
                auto batch_selected = ConnectorPtr->select(select_columns, filter);
                for (auto &file : batch_selected) {
                    selected.push_back(file);
                }
            }

        } else if (!dates.empty() && !ids.empty()) {
            for(auto& batch_dates : split_dates) {
                if(batch_dates.empty()) {
                    continue;
                }
                auto match_fileid = in(&TableFileSchema::id_, ids);
            auto match_date = in(&TableFileSchema::date_, partition);
                auto match_date = in(&TableFileSchema::date_, batch_dates);
                auto filter = where(match_tableid and match_fileid and match_date and match_type);
            selected = ConnectorPtr->select(select_columns, filter);
                auto batch_selected = ConnectorPtr->select(select_columns, filter);
                for (auto &file : batch_selected) {
                    selected.push_back(file);
                }
            }
        }

        Status ret;
Loading