Commit 7338a044 authored by 王翔宇's avatar 王翔宇
Browse files

SQ8H in GPU part3


Former-commit-id: bd95d08bede45255fa10f4d8fdeb8674e435860b
parent e1e9ffeb
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -115,6 +115,20 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {

    search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config);

//    std::stringstream ss_res_id, ss_res_dist;
//    for (int i = 0; i < 10; ++i) {
//        printf("%llu", res_ids[i]);
//        printf("\n");
//        printf("%.6f", res_dis[i]);
//        printf("\n");
//        ss_res_id << res_ids[i] << " ";
//        ss_res_dist << res_dis[i] << " ";
//    }
//    std::cout << std::endl << "after search: " << std::endl;
//    std::cout << ss_res_id.str() << std::endl;
//    std::cout << ss_res_dist.str() << std::endl << std::endl;


    auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
    auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);

+34 −17
Original line number Diff line number Diff line
@@ -79,20 +79,8 @@ IVFSQHybrid::CopyGpuToCpu(const Config& config) {
VectorIndexPtr
IVFSQHybrid::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
    if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {
        ResScope rs(res, device_id, false);
        faiss::gpu::GpuClonerOptions option;
        option.allInGpu = true;

        faiss::IndexComposition index_composition;
        index_composition.index = index_.get();
        index_composition.quantizer = nullptr;
        index_composition.mode = 0;  // copy all

        auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);

        std::shared_ptr<faiss::Index> device_index;
        device_index.reset(gpu_index);
        return std::make_shared<IVFSQHybrid>(device_index, device_id, res);
        auto p = CopyCpuToGpuWithQuantizer(device_id, config);
        return p.first;
    } else {
        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
    }
@@ -188,9 +176,10 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
            KNOWHERE_THROW_MSG("mode only support 2 in this func");
        }
    }
    if (quantizer_conf->gpu_id != gpu_id_) {
        KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
    }
//    if (quantizer_conf->gpu_id != gpu_id_) {
//        KNOWHERE_THROW_MSG("quantizer and data must on the same gpu card");
//    }
    gpu_id_ = quantizer_conf->gpu_id;

    if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
        ResScope rs(res, gpu_id_, false);
@@ -216,6 +205,34 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
    }
}

std::pair<VectorIndexPtr, QuantizerPtr>
IVFSQHybrid::CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config) {
    if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) {

        ResScope rs(res, device_id, false);
        faiss::gpu::GpuClonerOptions option;
        option.allInGpu = true;

        faiss::IndexComposition index_composition;
        index_composition.index = index_.get();
        index_composition.quantizer = nullptr;
        index_composition.mode = 0;  // copy all

        auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, &index_composition, &option);

        std::shared_ptr<faiss::Index> device_index;
        device_index.reset(gpu_index);
                auto new_idx = std::make_shared<IVFSQHybrid>(device_index, device_id, res);

        auto q = std::make_shared<FaissIVFQuantizer>();
        q->quantizer = index_composition.quantizer;
        q->size = index_composition.quantizer->d * index_composition.quantizer->getNumVecs() * sizeof(float);
        return std::make_pair(new_idx, q);
    } else {
        KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
    }
}

FaissIVFQuantizer::~FaissIVFQuantizer() {
    if (quantizer != nullptr) {
        delete quantizer;
+3 −0
Original line number Diff line number Diff line
@@ -63,6 +63,9 @@ class IVFSQHybrid : public GPUIVFSQ {
    VectorIndexPtr
    LoadData(const knowhere::QuantizerPtr& q, const Config& conf);

    std::pair<VectorIndexPtr, QuantizerPtr>
    CopyCpuToGpuWithQuantizer(const int64_t& device_id, const Config& config);

    IndexModelPtr
    Train(const DatasetPtr& dataset, const Config& config) override;

+3 −14
Original line number Diff line number Diff line
@@ -256,27 +256,16 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
        conf->gpu_id = device_id;

        if (quantizer) {
            std::cout << "cache hit" << std::endl;
            // cache hit
            conf->mode = 2;
            auto new_index = index_->LoadData(quantizer->Data(), conf);
            index_ = new_index;
        } else {
            std::cout << "cache miss" << std::endl;
            // cache hit
            // cache miss
            if (index_ == nullptr) {
                ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to copy to gpu";
                return Status(DB_ERROR, "index is null");
            }
            conf->mode = 1;
            auto q = index_->LoadQuantizer(conf);
            conf->mode = 2;
            auto new_index = index_->LoadData(q, conf);
            index_ = new_index;
            auto pair = index_->CopyToGpuWithQuantizer(device_id);
            index_ = pair.first;

            // cache
            auto cached_quantizer = std::make_shared<CachedQuantizer>(q);
            auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
            cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
        }
        return Status::OK();
+19 −0
Original line number Diff line number Diff line
@@ -332,5 +332,24 @@ IVFHybridIndex::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
    return nullptr;
}

std::pair<VecIndexPtr, knowhere::QuantizerPtr>
IVFHybridIndex::CopyToGpuWithQuantizer(const int64_t& device_id, const Config& cfg) {
    try {
        // TODO(linxj): Hardcode here
        if (auto hybrid_idx = std::dynamic_pointer_cast<knowhere::IVFSQHybrid>(index_)) {
            auto pair = hybrid_idx->CopyCpuToGpuWithQuantizer(device_id, cfg);
            auto new_idx = std::make_shared<IVFHybridIndex>(pair.first, type);
            return std::make_pair(new_idx, pair.second);
        } else {
            WRAPPER_LOG_ERROR << "Hybrid mode not support for index type: " << int(type);
        }
    } catch (knowhere::KnowhereException& e) {
        WRAPPER_LOG_ERROR << e.what();
    } catch (std::exception& e) {
        WRAPPER_LOG_ERROR << e.what();
    }
    return std::make_pair(nullptr, nullptr);
}

}  // namespace engine
}  // namespace milvus
Loading