Commit 077d8882 authored by jinhai's avatar jinhai
Browse files

Merge branch 'branch-0.5.0' into 'branch-0.5.0'

SQ8H in GPU

See merge request megasearch/milvus!701

Former-commit-id: abea98cb2d493696b6eb66d8838e808ae7d6180c
parents f604a61c 8dc37ec4
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -115,6 +115,19 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {

    search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config);

    //    std::stringstream ss_res_id, ss_res_dist;
    //    for (int i = 0; i < 10; ++i) {
    //        printf("%llu", res_ids[i]);
    //        printf("\n");
    //        printf("%.6f", res_dis[i]);
    //        printf("\n");
    //        ss_res_id << res_ids[i] << " ";
    //        ss_res_dist << res_dis[i] << " ";
    //    }
    //    std::cout << std::endl << "after search: " << std::endl;
    //    std::cout << ss_res_id.str() << std::endl;
    //    std::cout << ss_res_dist.str() << std::endl << std::endl;

    auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
    auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);

+39 −20
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
// under the License.

#include "knowhere/index/vector_index/IndexIVFSQHybrid.h"
#include <utility>
#include "faiss/AutoTune.h"
#include "faiss/gpu/GpuAutoTune.h"
#include "faiss/gpu/GpuIndexIVF.h"
@@ -79,20 +80,8 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) {
VectorIndexPtr
IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
    if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
        ResScope rs(res, device_id, false);
        faiss::gpu::GpuClonerOptions option;
        option.allInGpu = true;

        faiss::IndexComposition index_composition;
        index_composition.index = index_.get();
        index_composition.quantizer = nullptr;
        index_composition.mode = 0;  // copy all

        auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);

        std::shared_ptr<faiss::Index> device_index;
        device_index.reset(gpu_index);
        return std::make_shared<IVFSQHybrid>(device_index, device_id, res);
        auto p = CopyCpuToGpuWithQuantizer(device_id, config);
        return p.first;
    } else {
        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
    }
@@ -180,7 +169,7 @@ IVFSQHybrid::UnsetQuantizer() {
    ivf_index->quantizer = nullptr;
}

void
VectorIndexPtr
IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
    auto quantizer_conf = std::dynamic_pointer_cast<QuantizerCfg>(conf);
    if (quantizer_conf != nullptr) {
@@ -188,9 +177,10 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
            KNOWHERE_THROW_MSG("mode only support 2 in this func");
        }
    }
    if (quantizer_conf->gpu_id != gpu_id_) {
        KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
    }
    //    if (quantizer_conf->gpu_id != gpu_id_) {
    //        KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
    //    }
    gpu_id_ = quantizer_conf->gpu_id;

    if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
        ResScope rs(res, gpu_id_, false);
@@ -207,8 +197,37 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
        index_composition->mode = quantizer_conf->mode;  // only 2

        auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option);
        index_.reset(gpu_index);
        gpu_mode = 2;  // all in gpu
        std::shared_ptr<faiss::Index> new_idx;
        new_idx.reset(gpu_index);
        auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res);
        return sq_idx;
    } else {
        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
    }
}

std::pair<VectorIndexPtr, QuantizerPtr>
IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) {
    if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
        ResScope rs(res, device_id, false);
        faiss::gpu::GpuClonerOptions option;
        option.allInGpu = true;

        faiss::IndexComposition index_composition;
        index_composition.index = index_.get();
        index_composition.quantizer = nullptr;
        index_composition.mode = 0;  // copy all

        auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);

        std::shared_ptr<faiss::Index> device_index;
        device_index.reset(gpu_index);
        auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);

        auto q = std::make_shared<FaissIVFQuantizer>();
        q->quantizer = index_composition.quantizer;
        q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
        return std::make_pair(new_idx, q);
    } else {
        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
    }
+5 −1
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@

#include <faiss/index_io.h>
#include <memory>
#include <utility>

#include "IndexGPUIVFSQ.h"
#include "Quantizer.h"
@@ -60,9 +61,12 @@ class IVFSQHybrid : public GPUIVFSQ {
    void
    UnsetQuantizer();

    void
    VectorIndexPtr
    LoadData(const knowhere::QuantizerPtr& q, const Config& conf);

    std::pair<VectorIndexPtr, QuantizerPtr>
    CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config);

    IndexModelPtr
    Train(const DatasetPtr& dataset, const Config& config) override;

+17 −17
Original line number Diff line number Diff line
@@ -243,23 +243,23 @@ TEST_P(IVFTest, hybrid) {
        hybrid_1_idx->UnsetQuantizer();
    }

    //    {
    //        auto hybrid_2_idx = std::make_shared<knowhere::IVFSQHybrid>(device_id);
    //
    //        auto binaryset = index_->Serialize();
    //        hybrid_2_idx->Load(binaryset);
    //
    //        auto quantizer_conf = std::make_shared<knowhere::QuantizerCfg>();
    //        quantizer_conf->mode = 1;
    //        quantizer_conf->gpu_id = device_id;
    //        auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf);
    //        quantizer_conf->mode = 2;
    //        hybrid_2_idx->LoadData(q, quantizer_conf);
    //
    //        auto result = hybrid_2_idx->Search(query_dataset, conf);
    //        AssertAnns(result, nq, conf->k);
    //        PrintResult(result, nq, k);
    //    }
    {
        auto hybrid_2_idx = std::make_shared<knowhere::IVFSQHybrid>(device_id);

        auto binaryset = index_->Serialize();
        hybrid_2_idx->Load(binaryset);

        auto quantizer_conf = std::make_shared<knowhere::QuantizerCfg>();
        quantizer_conf->mode = 1;
        quantizer_conf->gpu_id = device_id;
        auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf);
        quantizer_conf->mode = 2;
        auto gpu_idx = hybrid_2_idx->LoadData(q, quantizer_conf);

        auto result = gpu_idx->Search(query_dataset, conf);
        AssertAnns(result, nq, conf->k);
        PrintResult(result, nq, k);
    }
}

// TEST_P(IVFTest, gpu_to_cpu) {
+3 −2
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@ class ExecutionEngine {
    Load(bool to_cache = true) = 0;

    virtual Status
    CopyToGpu(uint64_t device_id) = 0;
    CopyToGpu(uint64_t device_id, bool hybrid) = 0;

    virtual Status
    CopyToIndexFileToGpu(uint64_t device_id) = 0;
@@ -80,7 +80,8 @@ class ExecutionEngine {
    Merge(const std::string& location) = 0;

    virtual Status
    Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels) const = 0;
    Search(int64_t n, const float* data, int64_t k, int64_t nprobe, float* distances, int64_t* labels,
           bool hybrid) const = 0;

    virtual std::shared_ptr<ExecutionEngine>
    BuildIndex(const std::string& location, EngineType engine_type) = 0;
Loading