Commit 0d6d5908 authored by groot's avatar groot
Browse files

Merge branch 'binary' of github.com:yhmo/milvus into binary

parents 81b0275c c39ea4fd
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -46,8 +46,10 @@ Please mark all change in change log and use the issue from GitHub
-   \#1549 Fix server/wal config setting bug
-   \#1556 Index file not created after table and index created
-   \#1560 Search crashed with Super-high dimensional binary vector
-   \#1564 Too low recall for glove-200-angular, ivf_pq index
-   \#1571 Meta engine type become IDMAP after dropping index for BINARY table
-   \#1574 Set all existing bitset in cache when applying deletes
-   \#1577 Row count incorrect if delete vectors then create index

## Feature
-   \#216 Add CLI to get server info
+4 −4
Original line number Diff line number Diff line
@@ -118,12 +118,12 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {

        //    std::stringstream ss_res_id, ss_res_dist;
        //    for (int i = 0; i < 10; ++i) {
        //        printf("%llu", res_ids[i]);
        //        printf("%llu", p_id[i]);
        //        printf("\n");
        //        printf("%.6f", res_dis[i]);
        //        printf("%.6f", p_dist[i]);
        //        printf("\n");
        //        ss_res_id << res_ids[i] << " ";
        //        ss_res_dist << res_dis[i] << " ";
        //        ss_res_id << p_id[i] << " ";
        //        ss_res_dist << p_dist[i] << " ";
        //    }
        //    std::cout << std::endl << "after search: " << std::endl;
        //    std::cout << ss_res_id.str() << std::endl;
+2 −1
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include "scheduler/task/BuildIndexTask.h"

#include <fiu-local.h>

#include <memory>
#include <string>
#include <thread>
@@ -207,7 +208,7 @@ XBuildIndexTask::Execute() {
        // step 6: update meta
        table_file.file_type_ = engine::meta::TableFileSchema::INDEX;
        table_file.file_size_ = index->PhysicalSize();
        table_file.row_count_ = index->Count();
        table_file.row_count_ = file_->row_count_;  // index->Count();

        auto origin_file = *file_;
        origin_file.file_type_ = engine::meta::TableFileSchema::BACKUP;
+2 −1
Original line number Diff line number Diff line
@@ -101,7 +101,8 @@ XSearchTask::XSearchTask(const std::shared_ptr<server::Context>& context, TableF
    if (file_) {
        // distance -- value 0 means two vectors equal, ascending reduce, L2/HAMMING/JACCARD/TONIMOTO ...
        // similarity -- infinity value means two vectors equal, descending reduce, IP
        if (file_->metric_type_ == static_cast<int>(MetricType::IP)) {
        if (file_->metric_type_ == static_cast<int>(MetricType::IP) &&
            file_->engine_type_ != static_cast<int>(EngineType::FAISS_PQ)) {
            ascending_reduce = false;
        }

+76 −0
Original line number Diff line number Diff line
@@ -264,6 +264,82 @@ TEST_F(DeleteTest, delete_multiple_times) {
    }
}

TEST_F(DeleteTest, delete_before_create_index) {
    milvus::engine::meta::TableSchema table_info = BuildTableSchema();
    table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT;
    auto stat = db_->CreateTable(table_info);

    milvus::engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = table_info.table_id_;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_TRUE(stat.ok());
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    int64_t nb = 10000;
    milvus::engine::VectorsData xb;
    BuildVectors(nb, xb);

    for (int64_t i = 0; i < nb; i++) {
        xb.id_array_.push_back(i);
    }

    stat = db_->InsertVectors(table_info.table_id_, "", xb);
    ASSERT_TRUE(stat.ok());

    stat = db_->Flush();
    ASSERT_TRUE(stat.ok());

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<int64_t> dis(0, nb - 1);

    int64_t num_query = 10;
    std::map<int64_t, milvus::engine::VectorsData> search_vectors;
    for (int64_t i = 0; i < num_query; ++i) {
        int64_t index = dis(gen);
        milvus::engine::VectorsData search;
        search.vector_count_ = 1;
        for (int64_t j = 0; j < TABLE_DIM; j++) {
            search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]);
        }
        search_vectors.insert(std::make_pair(xb.id_array_[index], search));
    }

    milvus::engine::IDNumbers ids_to_delete;
    for (auto& kv : search_vectors) {
        ids_to_delete.emplace_back(kv.first);
    }
    stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete);

    stat = db_->Flush();
    ASSERT_TRUE(stat.ok());

    milvus::engine::TableIndex index;
    index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8;
    index.extra_params_ = {{"nlist", 100}};
    stat = db_->CreateIndex(table_info.table_id_, index);
    ASSERT_TRUE(stat.ok());

    uint64_t row_count;
    stat = db_->GetTableRowCount(table_info.table_id_, row_count);
    ASSERT_TRUE(stat.ok());
    ASSERT_EQ(row_count, nb - ids_to_delete.size());

    int topk = 10, nprobe = 10;
    for (auto& pair : search_vectors) {
        auto& search = pair.second;

        std::vector<std::string> tags;
        milvus::engine::ResultIds result_ids;
        milvus::engine::ResultDistances result_distances;
        stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids,
                          result_distances);
        ASSERT_NE(result_ids[0], pair.first);
        //        ASSERT_LT(result_distances[0], 1e-4);
        ASSERT_GT(result_distances[0], 1);
    }
}

TEST_F(DeleteTest, delete_with_index) {
    milvus::engine::meta::TableSchema table_info = BuildTableSchema();
    table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT;