Commit 43d2609c authored by Jin Hai's avatar Jin Hai Committed by GitHub
Browse files

Merge pull request #172 from cydrain/caiyd_reduce_opt

#168 improve result reduce

Former-commit-id: 80644a5c84c295b90b5c20b921bd6eada6ea6e3c
parents e35d4621 3f085729
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ Please mark all change in change log and use the ticket from JIRA.
- \#149 - Improve large query optimizer pass
- \#156 - Not return error when search_resources and index_build_device set cpu
- \#159 - Change the configuration name from 'use_gpu_threshold' to 'gpu_search_threshold'
- \#168 - Improve result reduce

## Task

+4 −3
Original line number Diff line number Diff line
@@ -67,15 +67,16 @@ class DB {

    virtual Status
    Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
          QueryResults& results) = 0;
          ResultIds& result_ids, ResultDistances& result_distances) = 0;

    virtual Status
    Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
          const meta::DatesT& dates, QueryResults& results) = 0;
          const meta::DatesT& dates, ResultIds& result_ids, ResultDistances& result_distances) = 0;

    virtual Status
    Query(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
          uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0;
          uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
          ResultDistances& result_distances) = 0;

    virtual Status
    Size(uint64_t& result) = 0;
+10 −8
Original line number Diff line number Diff line
@@ -336,20 +336,20 @@ DBImpl::DropIndex(const std::string& table_id) {

Status
DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
              QueryResults& results) {
              ResultIds& result_ids, ResultDistances& result_distances) {
    if (shutting_down_.load(std::memory_order_acquire)) {
        return Status(DB_ERROR, "Milsvus server is shutdown!");
    }

    meta::DatesT dates = {utils::GetDate()};
    Status result = Query(table_id, k, nq, nprobe, vectors, dates, results);
    Status result = Query(table_id, k, nq, nprobe, vectors, dates, result_ids, result_distances);

    return result;
}

Status
DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
              const meta::DatesT& dates, QueryResults& results) {
              const meta::DatesT& dates, ResultIds& result_ids, ResultDistances& result_distances) {
    if (shutting_down_.load(std::memory_order_acquire)) {
        return Status(DB_ERROR, "Milsvus server is shutdown!");
    }
@@ -372,14 +372,15 @@ DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t npr
    }

    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info before query
    status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results);
    status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, result_ids, result_distances);
    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info after query
    return status;
}

Status
DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
              uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) {
              uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
              ResultDistances& result_distances) {
    if (shutting_down_.load(std::memory_order_acquire)) {
        return Status(DB_ERROR, "Milsvus server is shutdown!");
    }
@@ -413,7 +414,7 @@ DBImpl::Query(const std::string& table_id, const std::vector<std::string>& file_
    }

    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info before query
    status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, results);
    status = QueryAsync(table_id, file_id_array, k, nq, nprobe, vectors, result_ids, result_distances);
    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info after query
    return status;
}
@@ -432,7 +433,7 @@ DBImpl::Size(uint64_t& result) {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Status
DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
                   uint64_t nprobe, const float* vectors, QueryResults& results) {
                   uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) {
    server::CollectQueryMetrics metrics(nq);

    TimeRecorder rc("");
@@ -453,7 +454,8 @@ DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& fi
    }

    // step 3: construct results
    results = job->GetResult();
    result_ids = job->GetResultIds();
    result_distances = job->GetResultDistances();
    rc.ElapseFromBegin("Engine query totally cost");

    return Status::OK();
+5 −4
Original line number Diff line number Diff line
@@ -91,15 +91,16 @@ class DBImpl : public DB {

    Status
    Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
          QueryResults& results) override;
          ResultIds& result_ids, ResultDistances& result_distances) override;

    Status
    Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors,
          const meta::DatesT& dates, QueryResults& results) override;
          const meta::DatesT& dates, ResultIds& result_ids, ResultDistances& result_distances) override;

    Status
    Query(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
          uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) override;
          uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
          ResultDistances& result_distances) override;

    Status
    Size(uint64_t& result) override;
@@ -107,7 +108,7 @@ class DBImpl : public DB {
 private:
    Status
    QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
               uint64_t nprobe, const float* vectors, QueryResults& results);
               uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances);

    void
    BackgroundTimerTask();
+5 −3
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@

#include "db/engine/ExecutionEngine.h"

#include <faiss/Index.h>
#include <stdint.h>
#include <utility>
#include <vector>
@@ -26,12 +27,13 @@
namespace milvus {
namespace engine {

typedef int64_t IDNumber;
using IDNumber = faiss::Index::idx_t;

typedef IDNumber* IDNumberPtr;
typedef std::vector<IDNumber> IDNumbers;

typedef std::vector<std::pair<IDNumber, double>> QueryResult;
typedef std::vector<QueryResult> QueryResults;
typedef std::vector<faiss::Index::idx_t> ResultIds;
typedef std::vector<faiss::Index::distance_t> ResultDistances;

struct TableIndex {
    int32_t engine_type_ = (int)EngineType::FAISS_IDMAP;
Loading