Intrinsics

Intrinsics test source.

/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved.                      *
* This file is part of the LIBXS library.                                     *
*                                                                             *
* For information on the license, see the LICENSE file.                       *
* Further information: https://github.com/hfp/libxs/                          *
* SPDX-License-Identifier: BSD-3-Clause                                       *
******************************************************************************/
#include <libxs_utils.h>

#if defined(_DEBUG) || 1
# define FPRINTF(STREAM, ...) do { fprintf(STREAM, __VA_ARGS__); } while(0)
#else
# define FPRINTF(STREAM, ...) do {} while(0)
#endif

#if (LIBXS_MAX_STATIC_TARGET_ARCH < LIBXS_STATIC_TARGET_ARCH)
# error "LIBXS_MAX_STATIC_TARGET_ARCH < LIBXS_STATIC_TARGET_ARCH"
#endif


/* SSE4.2: _mm_crc32_u32 (needs target("sse4.2") when baseline is lower) */
#if defined(LIBXS_INTRINSICS_SSE42)
LIBXS_INLINE LIBXS_INTRINSICS(LIBXS_X86_SSE42)
int test_intrinsics_sse42(void)
{
  /* CRC32C of 0x12345678 with initial 0 - deterministic */
  const unsigned int crc = _mm_crc32_u32(0, 0x12345678u);
  return (0 != crc) ? 0 : EXIT_FAILURE; /* crc is non-zero */
}
#endif


/* AVX: 256-bit float add */
#if defined(LIBXS_INTRINSICS_AVX)
LIBXS_INLINE LIBXS_INTRINSICS(LIBXS_X86_AVX)
int test_intrinsics_avx(void)
{
  LIBXS_ALIGNED(float buf[8], 32);
  const __m256 a = _mm256_set1_ps(1.5f);
  const __m256 b = _mm256_set1_ps(2.5f);
  const __m256 c = _mm256_add_ps(a, b);
  int i;
  _mm256_store_ps(buf, c);
  for (i = 0; i < 8; ++i) if (4.0f != buf[i]) return EXIT_FAILURE;
  return 0;
}
#endif


/* AVX2: 256-bit integer add */
#if defined(LIBXS_INTRINSICS_AVX2)
LIBXS_INLINE LIBXS_INTRINSICS(LIBXS_X86_AVX2)
int test_intrinsics_avx2(void)
{
  LIBXS_ALIGNED(int buf[8], 32);
  const __m256i a = _mm256_set1_epi32(3);
  const __m256i b = _mm256_set1_epi32(4);
  const __m256i c = _mm256_add_epi32(a, b);
  int i;
  _mm256_store_si256((__m256i*)buf, c);
  for (i = 0; i < 8; ++i) if (7 != buf[i]) return EXIT_FAILURE;
  return 0;
}
#endif


/* AVX-512: 512-bit integer add */
#if defined(LIBXS_INTRINSICS_AVX512)
LIBXS_INLINE LIBXS_INTRINSICS(LIBXS_X86_AVX512)
int test_intrinsics_avx512(void)
{
  LIBXS_ALIGNED(int buf[16], 64);
  const __m512i a = _mm512_set1_epi32(11);
  const __m512i b = _mm512_set1_epi32(22);
  const __m512i c = _mm512_add_epi32(a, b);
  int i;
  _mm512_store_si512(buf, c);
  for (i = 0; i < 16; ++i) if (33 != buf[i]) return EXIT_FAILURE;
  return 0;
}
#endif


int main(int argc, char* argv[])
{
  const int cpuid = libxs_cpuid(NULL);
  int highest = LIBXS_TARGET_ARCH_UNKNOWN;
  int nerrors = 0;
  LIBXS_UNUSED(argc); LIBXS_UNUSED(argv);

  /* macro sanity */
  if (LIBXS_MAX_STATIC_TARGET_ARCH < LIBXS_STATIC_TARGET_ARCH) {
    FPRINTF(stderr, "ERROR: MAX_STATIC_TARGET_ARCH < STATIC_TARGET_ARCH\n");
    ++nerrors;
  }
#if !defined(LIBXS_INTRINSICS_X86) && defined(LIBXS_PLATFORM_X86)
# if !defined(__NO_INTRINSICS)
  FPRINTF(stderr, "ERROR: x86 platform but LIBXS_INTRINSICS_X86 not defined\n");
  ++nerrors;
# endif
#endif

  /* ISA kernels: call only when compiler can generate AND CPU supports */
#if defined(LIBXS_INTRINSICS_SSE42)
  if (LIBXS_X86_SSE42 <= cpuid) {
    if (0 != test_intrinsics_sse42()) {
      FPRINTF(stderr, "ERROR: test_intrinsics_sse42\n");
      ++nerrors;
    }
  }
#endif
#if defined(LIBXS_INTRINSICS_AVX)
  if (LIBXS_X86_AVX <= cpuid) {
    if (0 != test_intrinsics_avx()) {
      FPRINTF(stderr, "ERROR: test_intrinsics_avx\n");
      ++nerrors;
    }
  }
#endif
#if defined(LIBXS_INTRINSICS_AVX2)
  if (LIBXS_X86_AVX2 <= cpuid) {
    if (0 != test_intrinsics_avx2()) {
      FPRINTF(stderr, "ERROR: test_intrinsics_avx2\n");
      ++nerrors;
    }
  }
#endif
#if defined(LIBXS_INTRINSICS_AVX512)
  if (LIBXS_X86_AVX512 <= cpuid) {
    if (0 != test_intrinsics_avx512()) {
      FPRINTF(stderr, "ERROR: test_intrinsics_avx512\n");
      ++nerrors;
    }
  }
#endif

  /* determine highest ISA level that compiled AND ran successfully */
  if (0 == nerrors) {
    highest = LIBXS_STATIC_TARGET_ARCH;
#if defined(LIBXS_INTRINSICS_SSE42)
    if (LIBXS_X86_SSE42 <= cpuid) {
      highest = LIBXS_X86_SSE42;
    }
#endif
#if defined(LIBXS_INTRINSICS_AVX)
    if (LIBXS_X86_AVX <= cpuid) {
      highest = LIBXS_X86_AVX;
    }
#endif
#if defined(LIBXS_INTRINSICS_AVX2)
    if (LIBXS_X86_AVX2 <= cpuid) {
      highest = LIBXS_X86_AVX2;
    }
#endif
#if defined(LIBXS_INTRINSICS_AVX512)
    if (LIBXS_X86_AVX512 <= cpuid) {
      highest = LIBXS_X86_AVX512;
    }
#endif
  }

  if (highest < LIBXS_MAX_STATIC_TARGET_ARCH && LIBXS_MAX_STATIC_TARGET_ARCH <= cpuid) {
    FPRINTF(stderr, "ERROR: cannot reach LIBXS_MAX_STATIC_TARGET_ARCH (%i < %i)\n",
      highest, LIBXS_MAX_STATIC_TARGET_ARCH);
    ++nerrors;
  }

  fprintf(stderr, "static=%s cpuid=%s target=%s\n",
    libxs_cpuid_name(LIBXS_STATIC_TARGET_ARCH),
    libxs_cpuid_name(libxs_cpuid(NULL)),
    libxs_cpuid_name(highest));

  return (0 != nerrors) ? EXIT_FAILURE : EXIT_SUCCESS;
}