Commit 6b2afea6 authored by Jin Hai's avatar Jin Hai Committed by GitHub
Browse files

Merge pull request #57 from JinHai-CN/0.5.0

Merge from internal

Former-commit-id: 3355cdc86fa6d40f9c7494906050a691aa2ecf08
parents 4e0781e4 e9bc7218
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ Please mark all change in change log and use the ticket from JIRA.
- \#39 - Create SQ8H index hang if using github server version
- \#30 - Some troubleshoot messages in Milvus do not provide enough information
- \#48 - Config unittest failed
- \#59 - Topk result is incorrect for small dataset

## Improvement
- MS-552 - Add and change the easylogging library
+2 −1
Original line number Diff line number Diff line
@@ -243,7 +243,8 @@ if(CUSTOMIZATION)
        # set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0
        # set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0
        # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1
        set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
        # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1
        set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1
    endif()
else()
    set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz")
+12 −0
Original line number Diff line number Diff line
@@ -24,17 +24,21 @@
#include <faiss/IndexIVFPQ.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/index_io.h>
#include <chrono>
#include <memory>
#include <utility>
#include <vector>

#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/common/Log.h"
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "knowhere/index/vector_index/IndexIVF.h"

namespace knowhere {

using stdclock = std::chrono::high_resolution_clock;

IndexModelPtr
IVF::Train(const DatasetPtr& dataset, const Config& config) {
    auto build_cfg = std::dynamic_pointer_cast<IVFCfg>(config);
@@ -216,7 +220,15 @@ IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const C
void
IVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) {
    auto params = GenParams(cfg);
    stdclock::time_point before = stdclock::now();
    faiss::ivflib::search_with_parameters(index_.get(), n, (float*)data, k, distances, labels, params.get());
    stdclock::time_point after = stdclock::now();
    double search_cost = (std::chrono::duration<double, std::micro>(after - before)).count();
    KNOWHERE_LOG_DEBUG << "IVF search cost: " << search_cost
                       << ", quantization cost: " << faiss::indexIVF_stats.quantization_time
                       << ", data search cost: " << faiss::indexIVF_stats.search_time;
    faiss::indexIVF_stats.quantization_time = 0;
    faiss::indexIVF_stats.search_time = 0;
}

VectorIndexPtr
+2 −2
Original line number Diff line number Diff line
@@ -253,7 +253,7 @@ XSearchTask::MergeTopkToResultSet(const std::vector<int64_t>& input_ids, const s

        if (result[i].empty()) {
            result_buf.resize(input_k, scheduler::IdDistPair(-1, 0.0));
            uint64_t input_k_multi_i = input_k * i;
            uint64_t input_k_multi_i = topk * i;
            for (auto k = 0; k < input_k; ++k) {
                uint64_t idx = input_k_multi_i + k;
                auto& result_buf_item = result_buf[k];
@@ -266,7 +266,7 @@ XSearchTask::MergeTopkToResultSet(const std::vector<int64_t>& input_ids, const s
            result_buf.resize(output_k, scheduler::IdDistPair(-1, 0.0));
            size_t buf_k = 0, src_k = 0, tar_k = 0;
            uint64_t src_idx;
            uint64_t input_k_multi_i = input_k * i;
            uint64_t input_k_multi_i = topk * i;
            while (buf_k < output_k && src_k < input_k && tar_k < tar_size) {
                src_idx = input_k_multi_i + src_k;
                auto& result_buf_item = result_buf[buf_k];