Commit d2c7ab5b authored by jinhai's avatar jinhai
Browse files

Merge branch 'fix_hybrid_and_upgrade_faiss' into 'branch-0.5.1'

MS-671 MS-670 Fix hybrid and upgrade faiss

See merge request megasearch/milvus!772

Former-commit-id: 084f80379d6de681484e0f1946025ec2565a57fd
parents 1044e3fe b98159ab
Loading
Loading
Loading
Loading
+53 −6
Original line number Diff line number Diff line
@@ -18,16 +18,13 @@
#include "db/engine/ExecutionEngineImpl.h"
#include "cache/CpuCacheMgr.h"
#include "cache/GpuCacheMgr.h"
#include "knowhere/common/Config.h"
#include "metrics/Metrics.h"
#include "scheduler/Utils.h"
#include "server/Config.h"
#include "utils/CommonUtil.h"
#include "utils/Exception.h"
#include "utils/Log.h"

#include "knowhere/common/Config.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIVFSQHybrid.h"
#include "scheduler/Utils.h"
#include "server/Config.h"
#include "wrapper/ConfAdapter.h"
#include "wrapper/ConfAdapterMgr.h"
#include "wrapper/VecImpl.h"
@@ -249,6 +246,56 @@ ExecutionEngineImpl::Load(bool to_cache) {
Status
ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
    if (hybrid) {
#if 1
        const std::string key = location_ + ".quantizer";
        std::vector<uint64_t> gpus = scheduler::get_gpu_pool();

        const int64_t NOT_FOUND = -1;
        int64_t device_id = NOT_FOUND;

        // cache hit
        {
            knowhere::QuantizerPtr quantizer = nullptr;

            for (auto& gpu : gpus) {
                auto cache = cache::GpuCacheMgr::GetInstance(gpu);
                if (auto cached_quantizer = cache->GetIndex(key)) {
                    device_id = gpu;
                    quantizer = std::static_pointer_cast<CachedQuantizer>(cached_quantizer)->Data();
                }
            }

            if (device_id != NOT_FOUND) {
                // cache hit
                auto config = std::make_shared<knowhere::QuantizerCfg>();
                config->gpu_id = device_id;
                config->mode = 2;
                auto new_index = index_->LoadData(quantizer, config);
                index_ = new_index;
            }
        }

        if (device_id == NOT_FOUND) {
            // cache miss
            std::vector<int64_t> all_free_mem;
            for (auto& gpu : gpus) {
                auto cache = cache::GpuCacheMgr::GetInstance(gpu);
                auto free_mem = cache->CacheCapacity() - cache->CacheUsage();
                all_free_mem.push_back(free_mem);
            }

            auto max_e = std::max_element(all_free_mem.begin(), all_free_mem.end());
            auto best_index = std::distance(all_free_mem.begin(), max_e);
            device_id = gpus[best_index];

            auto pair = index_->CopyToGpuWithQuantizer(device_id);
            index_ = pair.first;

            // cache
            auto cached_quantizer = std::make_shared<CachedQuantizer>(pair.second);
            cache::GpuCacheMgr::GetInstance(device_id)->InsertItem(key, cached_quantizer);
        }
#endif
        return Status::OK();
    }

+1 −1
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ class FaissBaseIndex {
    virtual void
    SealImpl();

 protected:
 public:
    std::shared_ptr<faiss::Index> index_ = nullptr;
};

+5 −6
Original line number Diff line number Diff line
@@ -15,12 +15,12 @@
// specific language governing permissions and limitations
// under the License.

#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <memory>

#include <faiss/gpu/GpuCloner.h>
#include <faiss/gpu/GpuIndexIVF.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/index_io.h>
#include <memory>

#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
@@ -130,13 +130,12 @@ void
GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) {
    std::lock_guard<std::mutex> lk(mutex_);

    // TODO(linxj): gpu index support GenParams
    if (auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_)) {
        auto search_cfg = std::dynamic_pointer_cast<IVFCfg>(cfg);
        device_index->setNumProbes(search_cfg->nprobe);
        device_index->nprobe = search_cfg->nprobe;
        //        assert(device_index->getNumProbes() == search_cfg->nprobe);

        {
            // TODO(linxj): allocate gpu mem
            ResScope rs(res_, gpu_id_);
            device_index->search(n, (float*)data, k, distances, labels);
        }
+3 −1
Original line number Diff line number Diff line
@@ -16,8 +16,10 @@
// under the License.

#include <faiss/IndexIVFPQ.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuCloner.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
#include <faiss/index_factory.h>

#include <memory>

#include "knowhere/adapter/VectorAdapter.h"
+3 −11
Original line number Diff line number Diff line
@@ -15,9 +15,10 @@
// specific language governing permissions and limitations
// under the License.

#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuCloner.h>
#include <faiss/index_factory.h>

#include <memory>
#include <utility>

#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
@@ -71,13 +72,4 @@ GPUIVFSQ::CopyGpuToCpu(const Config& config) {
    return std::make_shared<IVFSQ>(new_index);
}

void
GPUIVFSQ::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) {
#ifdef CUSTOMIZATION
    GPUIVF::search_impl(n, data, k, distances, labels, cfg);
#else
    IVF::search_impl(n, data, k, distances, labels, cfg);
#endif
}

}  // namespace knowhere
Loading