Loading CHANGELOG.md +1 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ Please mark all change in change log and use the issue from GitHub - \#1885 Optimize knowhere unittest - \#1886 Refactor log on search and insert request - \#1897 Heap pop and push can be realized by heap_swap_top - \#1928 Fix too many data and uid copies when loading files - \#1930 Upgrade mishards to 0.8.0 ## Task Loading core/src/codecs/default/DefaultVectorsFormat.cpp +3 −6 Original line number Diff line number Diff line Loading @@ -90,15 +90,12 @@ DefaultVectorsFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::Vectors for (; it != it_end; ++it) { const auto& path = it->path(); if (path.extension().string() == raw_vector_extension_) { std::vector<uint8_t> vector_list; auto& vector_list = vectors_read->GetMutableData(); read_vectors_internal(fs_ptr, path.string(), 0, INT64_MAX, vector_list); vectors_read->AddData(vector_list); vectors_read->SetName(path.stem().string()); } if (path.extension().string() == user_id_extension_) { std::vector<segment::doc_id_t> uids; } else if (path.extension().string() == user_id_extension_) { auto& uids = vectors_read->GetMutableUids(); read_uids_internal(fs_ptr, path.string(), uids); vectors_read->AddUids(uids); } } } Loading core/src/db/engine/ExecutionEngineImpl.cpp +6 −10 Original line number Diff line number Diff line Loading @@ -375,8 +375,6 @@ ExecutionEngineImpl::Serialize() { Status ExecutionEngineImpl::Load(bool to_cache) { // TODO(zhiru): refactor index_ = std::static_pointer_cast<knowhere::VecIndex>(cache::CpuCacheMgr::GetInstance()->GetIndex(location_)); bool already_in_cache = (index_ != nullptr); if (!already_in_cache) { Loading Loading @@ -411,21 +409,19 @@ ExecutionEngineImpl::Load(bool to_cache) { auto& vectors = segment_ptr->vectors_ptr_; auto& deleted_docs = segment_ptr->deleted_docs_ptr_->GetDeletedDocs(); auto vectors_uids = vectors->GetUids(); auto& vectors_uids = vectors->GetMutableUids(); auto count = vectors_uids.size(); index_->SetUids(vectors_uids); ENGINE_LOG_DEBUG << "set uids " << index_->GetUids().size() << " for index " << location_; auto vectors_data = vectors->GetData(); auto& vectors_data = vectors->GetData(); faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(vectors->GetCount()); faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(count); for (auto& offset : deleted_docs) { if (!concurrent_bitset_ptr->test(offset)) { concurrent_bitset_ptr->set(offset); } } auto dataset = knowhere::GenDataset(vectors->GetCount(), this->dim_, vectors_data.data()); auto dataset = knowhere::GenDataset(count, this->dim_, vectors_data.data()); if (index_type_ == EngineType::FAISS_IDMAP) { auto bf_index = std::static_pointer_cast<knowhere::IDMAP>(index_); bf_index->Train(knowhere::DatasetPtr(), conf); Loading core/src/segment/Vectors.cpp +10 −4 Original line number Diff line number Diff line Loading @@ -28,10 +28,6 @@ namespace milvus { namespace segment { Vectors::Vectors(std::vector<uint8_t> data, std::vector<doc_id_t> uids, const std::string& name) : data_(std::move(data)), uids_(std::move(uids)), name_(name) { } void Vectors::AddData(const std::vector<uint8_t>& data) { data_.reserve(data_.size() + data.size()); Loading Loading @@ -120,6 +116,16 @@ Vectors::Erase(std::vector<int32_t>& offsets) { << diff.count() << " s"; } std::vector<uint8_t>& Vectors::GetMutableData() { return data_; } std::vector<doc_id_t>& Vectors::GetMutableUids() { return uids_; } const std::vector<uint8_t>& Vectors::GetData() const { return data_; Loading core/src/segment/Vectors.h +6 −2 Original line number Diff line number Diff line Loading @@ -28,8 +28,6 @@ using doc_id_t = int64_t; class Vectors { public: Vectors(std::vector<uint8_t> data, std::vector<doc_id_t> uids, const std::string& name); Vectors() = default; void Loading @@ -41,6 +39,12 @@ class Vectors { void SetName(const std::string& name); std::vector<uint8_t>& GetMutableData(); std::vector<doc_id_t>& GetMutableUids(); const std::vector<uint8_t>& GetData() const; Loading Loading
CHANGELOG.md +1 −0 Original line number Diff line number Diff line Loading @@ -33,6 +33,7 @@ Please mark all change in change log and use the issue from GitHub - \#1885 Optimize knowhere unittest - \#1886 Refactor log on search and insert request - \#1897 Heap pop and push can be realized by heap_swap_top - \#1928 Fix too many data and uid copies when loading files - \#1930 Upgrade mishards to 0.8.0 ## Task Loading
core/src/codecs/default/DefaultVectorsFormat.cpp +3 −6 Original line number Diff line number Diff line Loading @@ -90,15 +90,12 @@ DefaultVectorsFormat::read(const storage::FSHandlerPtr& fs_ptr, segment::Vectors for (; it != it_end; ++it) { const auto& path = it->path(); if (path.extension().string() == raw_vector_extension_) { std::vector<uint8_t> vector_list; auto& vector_list = vectors_read->GetMutableData(); read_vectors_internal(fs_ptr, path.string(), 0, INT64_MAX, vector_list); vectors_read->AddData(vector_list); vectors_read->SetName(path.stem().string()); } if (path.extension().string() == user_id_extension_) { std::vector<segment::doc_id_t> uids; } else if (path.extension().string() == user_id_extension_) { auto& uids = vectors_read->GetMutableUids(); read_uids_internal(fs_ptr, path.string(), uids); vectors_read->AddUids(uids); } } } Loading
core/src/db/engine/ExecutionEngineImpl.cpp +6 −10 Original line number Diff line number Diff line Loading @@ -375,8 +375,6 @@ ExecutionEngineImpl::Serialize() { Status ExecutionEngineImpl::Load(bool to_cache) { // TODO(zhiru): refactor index_ = std::static_pointer_cast<knowhere::VecIndex>(cache::CpuCacheMgr::GetInstance()->GetIndex(location_)); bool already_in_cache = (index_ != nullptr); if (!already_in_cache) { Loading Loading @@ -411,21 +409,19 @@ ExecutionEngineImpl::Load(bool to_cache) { auto& vectors = segment_ptr->vectors_ptr_; auto& deleted_docs = segment_ptr->deleted_docs_ptr_->GetDeletedDocs(); auto vectors_uids = vectors->GetUids(); auto& vectors_uids = vectors->GetMutableUids(); auto count = vectors_uids.size(); index_->SetUids(vectors_uids); ENGINE_LOG_DEBUG << "set uids " << index_->GetUids().size() << " for index " << location_; auto vectors_data = vectors->GetData(); auto& vectors_data = vectors->GetData(); faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(vectors->GetCount()); faiss::ConcurrentBitsetPtr concurrent_bitset_ptr = std::make_shared<faiss::ConcurrentBitset>(count); for (auto& offset : deleted_docs) { if (!concurrent_bitset_ptr->test(offset)) { concurrent_bitset_ptr->set(offset); } } auto dataset = knowhere::GenDataset(vectors->GetCount(), this->dim_, vectors_data.data()); auto dataset = knowhere::GenDataset(count, this->dim_, vectors_data.data()); if (index_type_ == EngineType::FAISS_IDMAP) { auto bf_index = std::static_pointer_cast<knowhere::IDMAP>(index_); bf_index->Train(knowhere::DatasetPtr(), conf); Loading
core/src/segment/Vectors.cpp +10 −4 Original line number Diff line number Diff line Loading @@ -28,10 +28,6 @@ namespace milvus { namespace segment { Vectors::Vectors(std::vector<uint8_t> data, std::vector<doc_id_t> uids, const std::string& name) : data_(std::move(data)), uids_(std::move(uids)), name_(name) { } void Vectors::AddData(const std::vector<uint8_t>& data) { data_.reserve(data_.size() + data.size()); Loading Loading @@ -120,6 +116,16 @@ Vectors::Erase(std::vector<int32_t>& offsets) { << diff.count() << " s"; } std::vector<uint8_t>& Vectors::GetMutableData() { return data_; } std::vector<doc_id_t>& Vectors::GetMutableUids() { return uids_; } const std::vector<uint8_t>& Vectors::GetData() const { return data_; Loading
core/src/segment/Vectors.h +6 −2 Original line number Diff line number Diff line Loading @@ -28,8 +28,6 @@ using doc_id_t = int64_t; class Vectors { public: Vectors(std::vector<uint8_t> data, std::vector<doc_id_t> uids, const std::string& name); Vectors() = default; void Loading @@ -41,6 +39,12 @@ class Vectors { void SetName(const std::string& name); std::vector<uint8_t>& GetMutableData(); std::vector<doc_id_t>& GetMutableUids(); const std::vector<uint8_t>& GetData() const; Loading