Unverified Commit 8c7ef47f authored by Jin Hai's avatar Jin Hai Committed by GitHub
Browse files

Fix duplicate vector ID issue (#1508)



* #1499 Fix duplicated ID number issue

Signed-off-by: default avatarjinhai <hai.jin@zilliz.com>

* #1499 Fix duplicated ID number issue - part 1

Signed-off-by: default avatarJinHai-CN <hai.jin@zilliz.com>

* #1499 Fix duplicated ID number issue - part 2

Signed-off-by: default avatarJinHai-CN <hai.jin@zilliz.com>

* #1499 Fix duplicated ID number issue - part 3

Signed-off-by: default avatarjinhai <hai.jin@zilliz.com>

* #1499 Fix duplicated ID number issue - part 4

Signed-off-by: default avatarjinhai <hai.jin@zilliz.com>

* Fix format issue

Signed-off-by: default avatarJinHai-CN <hai.jin@zilliz.com>

* Update CHANGELOG

Signed-off-by: default avatarJinHai-CN <hai.jin@zilliz.com>

* Add return value check

Signed-off-by: default avatarJinHai-CN <hai.jin@zilliz.com>
parent 9899cfef
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ Please mark all change in change log and use the issue from GitHub
-   \#1359 Negative distance value returned when searching with HNSW index type
-   \#1429 Server crashed when searching vectors using GPU
-   \#1484 Index type changed to IDMAP after compacted 
-   \#1499 Fix duplicated ID number issue
-   \#1491 Server crashed during adding vectors  
-   \#1504 Avoid possible race condition between delete and search 

+5 −2
Original line number Diff line number Diff line
@@ -484,8 +484,11 @@ DBImpl::InsertVectors(const std::string& table_id, const std::string& partition_
    // insert vectors into target table
    // (zhiru): generate ids
    if (vectors.id_array_.empty()) {
        auto id_generator = std::make_shared<SimpleIDGenerator>();
        id_generator->GetNextIDNumbers(vectors.vector_count_, vectors.id_array_);
        SafeIDGenerator& id_generator = SafeIDGenerator::GetInstance();
        Status status = id_generator.GetNextIDNumbers(vectors.vector_count_, vectors.id_array_);
        if (!status.ok()) {
            return status;
        }
    }

    Status status;
+69 −4
Original line number Diff line number Diff line
@@ -10,11 +10,13 @@
// or implied. See the License for the specific language governing permissions and limitations under the License.

#include "db/IDGenerator.h"
#include "utils/Log.h"

#include <assert.h>
#include <fiu-local.h>
#include <chrono>
#include <iostream>
#include <string>

namespace milvus {
namespace engine {
@@ -30,15 +32,15 @@ SimpleIDGenerator::GetNextIDNumber() {
    return micros * MAX_IDS_PER_MICRO;
}

void
Status
SimpleIDGenerator::NextIDNumbers(size_t n, IDNumbers& ids) {
    if (n > MAX_IDS_PER_MICRO) {
        NextIDNumbers(n - MAX_IDS_PER_MICRO, ids);
        NextIDNumbers(MAX_IDS_PER_MICRO, ids);
        return;
        return Status::OK();
    }
    if (n <= 0) {
        return;
        return Status::OK();
    }

    auto now = std::chrono::system_clock::now();
@@ -48,12 +50,75 @@ SimpleIDGenerator::NextIDNumbers(size_t n, IDNumbers& ids) {
    for (int pos = 0; pos < n; ++pos) {
        ids.push_back(micros + pos);
    }
    return Status::OK();
}

void
Status
SimpleIDGenerator::GetNextIDNumbers(size_t n, IDNumbers& ids) {
    ids.clear();
    NextIDNumbers(n, ids);

    return Status::OK();
}

IDNumber
SafeIDGenerator::GetNextIDNumber() {
    auto now = std::chrono::system_clock::now();
    auto micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
    std::lock_guard<std::mutex> lock(mtx_);
    if (micros <= time_stamp_ms_) {
        time_stamp_ms_ += 1;
    } else {
        time_stamp_ms_ = micros;
    }
    return time_stamp_ms_ * MAX_IDS_PER_MICRO;
}

Status
SafeIDGenerator::GetNextIDNumbers(size_t n, IDNumbers& ids) {
    ids.clear();
    std::lock_guard<std::mutex> lock(mtx_);
    while (n > 0) {
        if (n > MAX_IDS_PER_MICRO) {
            Status status = NextIDNumbers(MAX_IDS_PER_MICRO, ids);
            if (!status.ok()) {
                return status;
            }
            n -= MAX_IDS_PER_MICRO;
        } else {
            Status status = NextIDNumbers(n, ids);
            if (!status.ok()) {
                return status;
            }
            break;
        }
    }
    return Status::OK();
}

Status
SafeIDGenerator::NextIDNumbers(size_t n, IDNumbers& ids) {
    if (n <= 0 || n > MAX_IDS_PER_MICRO) {
        std::string msg = "Invalid ID number: " + std::to_string(n);
        ENGINE_LOG_ERROR << msg;
        return Status(SERVER_UNEXPECTED_ERROR, msg);
    }

    auto now = std::chrono::system_clock::now();
    int64_t micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
    if (micros <= time_stamp_ms_) {
        time_stamp_ms_ += 1;
    } else {
        time_stamp_ms_ = micros;
    }

    int64_t ID_high_part = time_stamp_ms_ * MAX_IDS_PER_MICRO;

    for (int pos = 0; pos < n; ++pos) {
        ids.push_back(ID_high_part + pos);
    }

    return Status::OK();
}

}  // namespace engine
+32 −3
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#pragma once

#include "Types.h"
#include "utils/Status.h"

#include <cstddef>
#include <vector>
@@ -24,7 +25,7 @@ class IDGenerator {
    virtual IDNumber
    GetNextIDNumber() = 0;

    virtual void
    virtual Status
    GetNextIDNumbers(size_t n, IDNumbers& ids) = 0;

    virtual ~IDGenerator() = 0;
@@ -37,15 +38,43 @@ class SimpleIDGenerator : public IDGenerator {
    IDNumber
    GetNextIDNumber() override;

    void
    Status
    GetNextIDNumbers(size_t n, IDNumbers& ids) override;

 private:
    void
    Status
    NextIDNumbers(size_t n, IDNumbers& ids);

    static constexpr size_t MAX_IDS_PER_MICRO = 1000;
};  // SimpleIDGenerator

class SafeIDGenerator : public IDGenerator {
 public:
    static SafeIDGenerator&
    GetInstance() {
        static SafeIDGenerator instance;
        return instance;
    }

    ~SafeIDGenerator() override = default;

    IDNumber
    GetNextIDNumber() override;

    Status
    GetNextIDNumbers(size_t n, IDNumbers& ids) override;

 private:
    SafeIDGenerator() = default;

    Status
    NextIDNumbers(size_t n, IDNumbers& ids);

    static constexpr size_t MAX_IDS_PER_MICRO = 1000;

    std::mutex mtx_;
    int64_t time_stamp_ms_ = 0;
};

}  // namespace engine
}  // namespace milvus
+6 −3
Original line number Diff line number Diff line
@@ -22,8 +22,7 @@
namespace milvus {
namespace engine {

VectorSource::VectorSource(VectorsData vectors)
    : vectors_(std::move(vectors)), id_generator_(std::make_shared<SimpleIDGenerator>()) {
VectorSource::VectorSource(VectorsData vectors) : vectors_(std::move(vectors)) {
    current_num_vectors_added = 0;
}

@@ -38,7 +37,11 @@ VectorSource::Add(/*const ExecutionEnginePtr& execution_engine,*/ const segment:
        current_num_vectors_added + num_vectors_to_add <= n ? num_vectors_to_add : n - current_num_vectors_added;
    IDNumbers vector_ids_to_add;
    if (vectors_.id_array_.empty()) {
        id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add);
        SafeIDGenerator& id_generator = SafeIDGenerator::GetInstance();
        Status status = id_generator.GetNextIDNumbers(num_vectors_added, vector_ids_to_add);
        if (!status.ok()) {
            return status;
        }
    } else {
        vector_ids_to_add.resize(num_vectors_added);
        for (size_t pos = current_num_vectors_added; pos < current_num_vectors_added + num_vectors_added; pos++) {
Loading