Commit 86444f08 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

cutils: Add SSE4 version

parent efad6682
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -113,6 +113,13 @@ ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO)
#endif

#ifdef CONFIG_AVX2_OPT
#pragma GCC push_options
#pragma GCC target("sse4")
#include <smmintrin.h>
#define SSE4_NONZERO(X)  !_mm_testz_si128((X), (X))
ACCEL_BUFFER_ZERO(buffer_zero_sse4, 64, __m128i, SSE4_NONZERO)
#pragma GCC pop_options

#pragma GCC push_options
#pragma GCC target("avx2")
#include <immintrin.h>
@@ -182,6 +189,9 @@ static bool select_accel_fn(const void *buf, size_t len)
    if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) {
        return buffer_zero_avx2(buf, len);
    }
    if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE4)) {
        return buffer_zero_sse4(buf, len);
    }
#endif
    if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) {
        return buffer_zero_sse2(buf, len);