Commit 8519c8e0 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/amit-migration/tags/migration-for-2.6-6' into staging



migration:
* add avx2 instruction optimization, speeds up zero-page checking on
  compatible architectures and compilers (gcc 4.9+)
* add additional postcopy stats to 'info migrate' output

# gpg: Signature made Tue 08 Mar 2016 11:29:48 GMT using RSA key ID 854083B6
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"

* remotes/amit-migration/tags/migration-for-2.6-6:
  cutils: add avx2 instruction optimization
  configure: detect ifunc and avx2 attribute
  Postcopy: Fix sync count in info migrate

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 3293680d 28b90d9c
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -280,6 +280,7 @@ libusb=""
usb_redir=""
opengl=""
opengl_dmabuf="no"
avx2_opt="no"
zlib="yes"
lzo=""
snappy=""
@@ -1773,6 +1774,21 @@ EOF
fi

##########################################
# avx2 optimization requirement check

cat > $TMPC << EOF
static void bar(void) {}
static void *bar_ifunc(void) {return (void*) bar;}
static void foo(void) __attribute__((ifunc("bar_ifunc")));
int main(void) { foo(); return 0; }
EOF
if compile_prog "-mavx2" "" ; then
    if readelf --syms $TMPE |grep "IFUNC.*foo" >/dev/null 2>&1; then
        avx2_opt="yes"
    fi
fi

#########################################
# zlib check

if test "$zlib" != "no" ; then
@@ -4790,6 +4806,7 @@ echo "bzip2 support $bzip2"
echo "NUMA host support $numa"
echo "tcmalloc support  $tcmalloc"
echo "jemalloc support  $jemalloc"
echo "avx2 optimization $avx2_opt"

if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5178,6 +5195,10 @@ if test "$opengl" = "yes" ; then
  fi
fi

if test "$avx2_opt" = "yes" ; then
  echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
fi

if test "$lzo" = "yes" ; then
  echo "CONFIG_LZO=y" >> $config_host_mak
fi
+1 −7
Original line number Diff line number Diff line
@@ -476,13 +476,7 @@ void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
#endif

#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
static inline bool
can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
{
    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
                   * sizeof(VECTYPE)) == 0
            && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
}
bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len);
size_t buffer_find_nonzero_offset(const void *buf, size_t len);

/*
+1 −0
Original line number Diff line number Diff line
@@ -635,6 +635,7 @@ MigrationInfo *qmp_query_migrate(Error **errp)
        info->ram->normal_bytes = norm_mig_bytes_transferred();
        info->ram->dirty_pages_rate = s->dirty_pages_rate;
        info->ram->mbps = s->mbps;
        info->ram->dirty_sync_count = s->dirty_sync_count;

        if (blk_mig_active()) {
            info->has_disk = true;
+120 −4
Original line number Diff line number Diff line
@@ -160,6 +160,14 @@ int qemu_fdatasync(int fd)
#endif
}

static bool
can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len)
{
    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
                   * sizeof(VECTYPE)) == 0
            && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
}

/*
 * Searches for an area with non-zero content in a buffer
 *
@@ -168,8 +176,8 @@ int qemu_fdatasync(int fd)
 * and addr must be a multiple of sizeof(VECTYPE) due to
 * restriction of optimizations in this function.
 *
 * can_use_buffer_find_nonzero_offset() can be used to check
 * these requirements.
 * can_use_buffer_find_nonzero_offset_inner() can be used to
 * check these requirements.
 *
 * The return value is the offset of the non-zero area rounded
 * down to a multiple of sizeof(VECTYPE) for the first
@@ -180,13 +188,13 @@ int qemu_fdatasync(int fd)
 * If the buffer is all zero the return value is equal to len.
 */

size_t buffer_find_nonzero_offset(const void *buf, size_t len)
static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len)
{
    const VECTYPE *p = buf;
    const VECTYPE zero = (VECTYPE){0};
    size_t i;

    assert(can_use_buffer_find_nonzero_offset(buf, len));
    assert(can_use_buffer_find_nonzero_offset_inner(buf, len));

    if (!len) {
        return 0;
@@ -215,6 +223,114 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t len)
    return i * sizeof(VECTYPE);
}

/*
 * GCC before version 4.9 has a bug which will cause the target
 * attribute work incorrectly and failed to compile in some case,
 * restrict the gcc version to 4.9+ to prevent the failure.
 */

#if defined CONFIG_AVX2_OPT && QEMU_GNUC_PREREQ(4, 9)
#pragma GCC push_options
#pragma GCC target("avx2")
#include <cpuid.h>
#include <immintrin.h>

#define AVX2_VECTYPE        __m256i
#define AVX2_SPLAT(p)       _mm256_set1_epi8(*(p))
#define AVX2_ALL_EQ(v1, v2) \
    (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF)
#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2))

static bool
can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len)
{
    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
                   * sizeof(AVX2_VECTYPE)) == 0
            && ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0);
}

static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len)
{
    const AVX2_VECTYPE *p = buf;
    const AVX2_VECTYPE zero = (AVX2_VECTYPE){0};
    size_t i;

    assert(can_use_buffer_find_nonzero_offset_avx2(buf, len));

    if (!len) {
        return 0;
    }

    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
        if (!AVX2_ALL_EQ(p[i], zero)) {
            return i * sizeof(AVX2_VECTYPE);
        }
    }

    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
         i < len / sizeof(AVX2_VECTYPE);
         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
        AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]);
        AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]);
        AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]);
        AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]);
        AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1);
        AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3);
        if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) {
            break;
        }
    }

    return i * sizeof(AVX2_VECTYPE);
}

static bool avx2_support(void)
{
    int a, b, c, d;

    if (__get_cpuid_max(0, NULL) < 7) {
        return false;
    }

    __cpuid_count(7, 0, a, b, c, d);

    return b & bit_AVX2;
}

bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \
         __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc")));
size_t buffer_find_nonzero_offset(const void *buf, size_t len) \
         __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc")));

static void *buffer_find_nonzero_offset_ifunc(void)
{
    typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ?
        buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner;

    return func;
}

static void *can_use_buffer_find_nonzero_offset_ifunc(void)
{
    typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ?
        can_use_buffer_find_nonzero_offset_avx2 :
        can_use_buffer_find_nonzero_offset_inner;

    return func;
}
#pragma GCC pop_options
#else
bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
{
    return can_use_buffer_find_nonzero_offset_inner(buf, len);
}

size_t buffer_find_nonzero_offset(const void *buf, size_t len)
{
    return buffer_find_nonzero_offset_inner(buf, len);
}
#endif

/*
 * Checks if a buffer is all zeroes
 *