Merge remote-tracking branch 'source/0.6.0' into clean_cache (87a36ebd) · Commits · SUMMER2020 / students / proj-2018085

CHANGELOG.md

+1 −0

Original line number	Diff line number	Diff line
		@@ -27,6 +27,7 @@ Please mark all change in change log and use the ticket from JIRA.
		- \#523 - Erase file data from cache once the file is marked as deleted
		- \#527 - faiss benchmark not compatible with faiss 1.6.0
		- \#530 - BuildIndex stop when do build index and search simultaneously
		- \#533 - NSG build failed with MetricType Inner Product

		## Feature
		- \#12 - Pure CPU version for Milvus

ci/jenkins/step/build.groovy

+4 −2

Original line number	Diff line number	Diff line
		timeout(time: 60, unit: 'MINUTES') {
		dir ("ci/scripts") {
		withCredentials([usernamePassword(credentialsId: "${params.JFROG_CREDENTIALS_ID}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
		def checkResult = sh(script: "./check_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache", returnStatus: true)
		if ("${env.BINRARY_VERSION}" == "gpu") {
		sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -g -j -u -c"
		sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -g -u -c"
		} else {
		sh "export JFROG_ARTFACTORY_URL='${params.JFROG_ARTFACTORY_URL}' && export JFROG_USER_NAME='${USERNAME}' && export JFROG_PASSWORD='${PASSWORD}' && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -m -j -u -c"
		sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -m -u -c"
		}
		sh "./update_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache -u ${USERNAME} -p ${PASSWORD}"
		}
		}
		}

core/src/index/knowhere/CMakeLists.txt

+1 −0

Original line number	Diff line number	Diff line
		@@ -38,6 +38,7 @@ set(index_srcs
		knowhere/index/vector_index/nsg/NSG.cpp
		knowhere/index/vector_index/nsg/NSGIO.cpp
		knowhere/index/vector_index/nsg/NSGHelper.cpp
		knowhere/index/vector_index/nsg/Distance.cpp
		knowhere/index/vector_index/IndexIVFSQ.cpp
		knowhere/index/vector_index/IndexIVFPQ.cpp
		knowhere/index/vector_index/FaissBaseIndex.cpp

core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp

+0 −4

Original line number	Diff line number	Diff line
		@@ -115,10 +115,6 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) {
		build_cfg->CheckValid(); // throw exception
		}

		if (build_cfg->metric_type != METRICTYPE::L2) {
		KNOWHERE_THROW_MSG("NSG not support this kind of metric type");
		}

		// TODO(linxj): dev IndexFactory, support more IndexType
		#ifdef MILVUS_GPU_VERSION
		auto preprocess_index = std::make_shared<GPUIVF>(build_cfg->gpu_id);

core/src/index/knowhere/knowhere/index/vector_index/nsg/Distance.cpp

0 → 100644

+247 −0

Original line number	Diff line number	Diff line
		// Licensed to the Apache Software Foundation (ASF) under one
		// or more contributor license agreements. See the NOTICE file
		// distributed with this work for additional information
		// regarding copyright ownership. The ASF licenses this file
		// to you under the Apache License, Version 2.0 (the
		// "License"); you may not use this file except in compliance
		// with the License. You may obtain a copy of the License at
		//
		// http://www.apache.org/licenses/LICENSE-2.0
		//
		// Unless required by applicable law or agreed to in writing,
		// software distributed under the License is distributed on an
		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
		// KIND, either express or implied. See the License for the
		// specific language governing permissions and limitations
		// under the License.

		#pragma once

		#include <immintrin.h>

		#include "knowhere/index/vector_index/nsg/Distance.h"

		namespace knowhere {
		namespace algo {

		float
		DistanceL2::Compare(const float* a, const float* b, unsigned size) const {
		float result = 0;

		#ifdef __GNUC__
		#ifdef __AVX__

		#define AVX_L2SQR(addr1, addr2, dest, tmp1, tmp2) \
		tmp1 = _mm256_loadu_ps(addr1); \
		tmp2 = _mm256_loadu_ps(addr2); \
		tmp1 = _mm256_sub_ps(tmp1, tmp2); \
		tmp1 = _mm256_mul_ps(tmp1, tmp1); \
		dest = _mm256_add_ps(dest, tmp1);

		__m256 sum;
		__m256 l0, l1;
		__m256 r0, r1;
		unsigned D = (size + 7) & ~7U;
		unsigned DR = D % 16;
		unsigned DD = D - DR;
		const float* l = a;
		const float* r = b;
		const float* e_l = l + DD;
		const float* e_r = r + DD;
		float unpack[8] __attribute__((aligned(32))) = {0, 0, 0, 0, 0, 0, 0, 0};

		sum = _mm256_loadu_ps(unpack);
		if (DR) {
		AVX_L2SQR(e_l, e_r, sum, l0, r0);
		}

		for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) {
		AVX_L2SQR(l, r, sum, l0, r0);
		AVX_L2SQR(l + 8, r + 8, sum, l1, r1);
		}
		_mm256_storeu_ps(unpack, sum);
		result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7];

		#else
		#ifdef __SSE2__
		#define SSE_L2SQR(addr1, addr2, dest, tmp1, tmp2) \
		tmp1 = _mm_load_ps(addr1); \
		tmp2 = _mm_load_ps(addr2); \
		tmp1 = _mm_sub_ps(tmp1, tmp2); \
		tmp1 = _mm_mul_ps(tmp1, tmp1); \
		dest = _mm_add_ps(dest, tmp1);

		__m128 sum;
		__m128 l0, l1, l2, l3;
		__m128 r0, r1, r2, r3;
		unsigned D = (size + 3) & ~3U;
		unsigned DR = D % 16;
		unsigned DD = D - DR;
		const float* l = a;
		const float* r = b;
		const float* e_l = l + DD;
		const float* e_r = r + DD;
		float unpack[4] __attribute__((aligned(16))) = {0, 0, 0, 0};

		sum = _mm_load_ps(unpack);
		switch (DR) {
		case 12:
		SSE_L2SQR(e_l + 8, e_r + 8, sum, l2, r2);
		case 8:
		SSE_L2SQR(e_l + 4, e_r + 4, sum, l1, r1);
		case 4:
		SSE_L2SQR(e_l, e_r, sum, l0, r0);
		default:
		break;
		}
		for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) {
		SSE_L2SQR(l, r, sum, l0, r0);
		SSE_L2SQR(l + 4, r + 4, sum, l1, r1);
		SSE_L2SQR(l + 8, r + 8, sum, l2, r2);
		SSE_L2SQR(l + 12, r + 12, sum, l3, r3);
		}
		_mm_storeu_ps(unpack, sum);
		result += unpack[0] + unpack[1] + unpack[2] + unpack[3];

		// nomal distance
		#else

		float diff0, diff1, diff2, diff3;
		const float* last = a + size;
		const float* unroll_group = last - 3;

		/* Process 4 items with each loop for efficiency. */
		while (a < unroll_group) {
		diff0 = a[0] - b[0];
		diff1 = a[1] - b[1];
		diff2 = a[2] - b[2];
		diff3 = a[3] - b[3];
		result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
		a += 4;
		b += 4;
		}
		/* Process last 0-3 pixels. Not needed for standard vector lengths. */
		while (a < last) {
		diff0 = a++ - b++;
		result += diff0 * diff0;
		}
		#endif
		#endif
		#endif

		return result;
		}

		float
		DistanceIP::Compare(const float* a, const float* b, unsigned size) const {
		float result = 0;

		#ifdef __GNUC__
		#ifdef __AVX__
		#define AVX_DOT(addr1, addr2, dest, tmp1, tmp2) \
		tmp1 = _mm256_loadu_ps(addr1); \
		tmp2 = _mm256_loadu_ps(addr2); \
		tmp1 = _mm256_mul_ps(tmp1, tmp2); \
		dest = _mm256_add_ps(dest, tmp1);

		__m256 sum;
		__m256 l0, l1;
		__m256 r0, r1;
		unsigned D = (size + 7) & ~7U;
		unsigned DR = D % 16;
		unsigned DD = D - DR;
		const float* l = a;
		const float* r = b;
		const float* e_l = l + DD;
		const float* e_r = r + DD;
		float unpack[8] __attribute__((aligned(32))) = {0, 0, 0, 0, 0, 0, 0, 0};

		sum = _mm256_loadu_ps(unpack);
		if (DR) {
		AVX_DOT(e_l, e_r, sum, l0, r0);
		}

		for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) {
		AVX_DOT(l, r, sum, l0, r0);
		AVX_DOT(l + 8, r + 8, sum, l1, r1);
		}
		_mm256_storeu_ps(unpack, sum);
		result = unpack[0] + unpack[1] + unpack[2] + unpack[3] + unpack[4] + unpack[5] + unpack[6] + unpack[7];

		#else
		#ifdef __SSE2__
		#define SSE_DOT(addr1, addr2, dest, tmp1, tmp2) \
		tmp1 = _mm128_loadu_ps(addr1); \
		tmp2 = _mm128_loadu_ps(addr2); \
		tmp1 = _mm128_mul_ps(tmp1, tmp2); \
		dest = _mm128_add_ps(dest, tmp1);
		__m128 sum;
		__m128 l0, l1, l2, l3;
		__m128 r0, r1, r2, r3;
		unsigned D = (size + 3) & ~3U;
		unsigned DR = D % 16;
		unsigned DD = D - DR;
		const float* l = a;
		const float* r = b;
		const float* e_l = l + DD;
		const float* e_r = r + DD;
		float unpack[4] __attribute__((aligned(16))) = {0, 0, 0, 0};

		sum = _mm_load_ps(unpack);
		switch (DR) {
		case 12:
		SSE_DOT(e_l + 8, e_r + 8, sum, l2, r2);
		case 8:
		SSE_DOT(e_l + 4, e_r + 4, sum, l1, r1);
		case 4:
		SSE_DOT(e_l, e_r, sum, l0, r0);
		default:
		break;
		}
		for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) {
		SSE_DOT(l, r, sum, l0, r0);
		SSE_DOT(l + 4, r + 4, sum, l1, r1);
		SSE_DOT(l + 8, r + 8, sum, l2, r2);
		SSE_DOT(l + 12, r + 12, sum, l3, r3);
		}
		_mm_storeu_ps(unpack, sum);
		result += unpack[0] + unpack[1] + unpack[2] + unpack[3];
		#else

		float dot0, dot1, dot2, dot3;
		const float* last = a + size;
		const float* unroll_group = last - 3;

		/* Process 4 items with each loop for efficiency. */
		while (a < unroll_group) {
		dot0 = a[0] * b[0];
		dot1 = a[1] * b[1];
		dot2 = a[2] * b[2];
		dot3 = a[3] * b[3];
		result += dot0 + dot1 + dot2 + dot3;
		a += 4;
		b += 4;
		}
		/* Process last 0-3 pixels. Not needed for standard vector lengths. */
		while (a < last) {
		result += a++ *b++;
		}
		#endif
		#endif
		#endif
		return result;
		}

		//#include <faiss/utils/distances.h>
		// float
		// DistanceL2::Compare(const float* a, const float* b, unsigned size) const {
		// return faiss::fvec_L2sqr(a,b,size);
		//}
		//
		// float
		// DistanceIP::Compare(const float* a, const float* b, unsigned size) const {
		// return faiss::fvec_inner_product(a,b,size);
		//}

		} // namespace algo
		} // namespace knowhere