CommonDefX86.cpp 7.07 KB
Newer Older
Alberto Gonzalez's avatar
Alberto Gonzalez committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
/* The copyright in this software is being made available under the BSD
 * License, included below. This software may be subject to other third party
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
 * Copyright (c) 2010-2023, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
 *    be used to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * \ingroup CommonLib
 * \file    CommondefX86.cpp
 * \brief   This file contains the SIMD x86 common used functions.
 */

#include <sstream>
#include <map>
#include <iostream>
#include <stdint.h>
#include <string>
#include "CommonLib/CommonDef.h"


#ifdef TARGET_SIMD_X86


#if __GNUC__ // valid for GCC and clang
#define NO_USE_SIMD __attribute__((optimize("no-tree-vectorize")))
#else
#define NO_USE_SIMD
#endif


#if defined ( __MINGW32__ ) && !defined (  __MINGW64__ )
# define SIMD_UP_TO_SSE42 1
#else
# define SIMD_UP_TO_SSE42 0
#endif

/* use __cpuid for windows or inline assembler for gcc and clang */
#if defined( _WIN32 ) && !defined( __MINGW32__ )
#include <intrin.h>
#define do_cpuid    __cpuid
#define do_cpuidex  __cpuidex
#else
#include <cpuid.h>
void do_cpuid(int CPUInfo[4], int InfoType){
    __get_cpuid( (unsigned)InfoType, (unsigned*)&CPUInfo[0], (unsigned*)&CPUInfo[1], (unsigned*)&CPUInfo[2], (unsigned*)&CPUInfo[3] );
}
#if !SIMD_UP_TO_SSE42
#define do_cpuidex(cd, v0, v1) __cpuid_count(v0, v1, cd[0], cd[1], cd[2], cd[3])
#endif
#endif

static inline int64_t xgetbv (int ctr) {
#if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic

    return _xgetbv(ctr);                                   // intrinsic function for XGETBV

#elif defined(__GNUC__)                                    // use inline assembly, Gnu/AT&T syntax

   uint32_t a, d;
#if GCC_VERSION_AT_LEAST(4,4) || CLANG_VERSION_AT_LEAST(3,3)
   __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
#else
   __asm(".byte 0x0f, 0x01, 0xd0" : "=a"(a),"=d"(d) : "c"(ctr) : );
#endif
   return a | (uint64_t(d) << 32);

#else  // #elif defined (_MSC_FULL_VER) || (defined (__INTEL_COMPILER)...) // other compiler. try inline assembly with masm/intel/MS syntax

   uint32_t a, d;
    __asm {
        mov ecx, ctr
        _emit 0x0f
        _emit 0x01
        _emit 0xd0 ; // xgetbv
        mov a, eax
        mov d, edx
    }
   return a | (uint64_t(d) << 32);

#endif
}


#define BIT_HAS_MMX                    (1 << 23)
#define BIT_HAS_SSE                    (1 << 25)
#define BIT_HAS_SSE2                   (1 << 26)
#define BIT_HAS_SSE3                   (1 <<  0)
#define BIT_HAS_SSSE3                  (1 <<  9)
#define BIT_HAS_SSE41                  (1 << 19)
#define BIT_HAS_SSE42                  (1 << 20)
#define BIT_HAS_SSE4a                  (1 <<  6)
#define BIT_HAS_OSXSAVE                (1 << 27)
#define BIT_HAS_AVX                   ((1 << 28)|BIT_HAS_OSXSAVE)
#define BIT_HAS_AVX2                   (1 <<  5)
#define BIT_HAS_AVX512F                (1 << 16)
#define BIT_HAS_AVX512DQ               (1 << 17)
#define BIT_HAS_AVX512BW               (1 << 30)
#define BIT_HAS_FMA3                   (1 << 12)
#define BIT_HAS_FMA4                   (1 << 16)
#define BIT_HAS_X64                    (1 << 29)
#define BIT_HAS_XOP                    (1 << 11)


/**
 * \brief Read instruction set extension support flags from CPU register;
 */
NO_USE_SIMD
X86_VEXT _get_x86_extensions()
{
    int regs[4] = {0, 0, 0, 0};
    X86_VEXT ext;
    ext = SCALAR;

    do_cpuid( regs, 0 );
    if( regs[0] == 0 ) return ext;
    do_cpuid( regs, 1 );
    if (!(regs[2] & BIT_HAS_SSE41)) return ext;
    ext = SSE41;
    if (!(regs[2] & BIT_HAS_SSE42)) return ext;
    ext = SSE42;
#if !SIMD_UP_TO_SSE42
    do_cpuidex( regs, 1, 1 );
    if (!((regs[2] & BIT_HAS_AVX) == BIT_HAS_AVX ))   return ext; // first check if the cpu supports avx
    if ((xgetbv(0) & 6) != 6)       return ext; // then see if the os uses YMM state management via XSAVE etc...
    ext = AVX;
// #ifdef USE_AVX2
    do_cpuidex( regs, 7, 0 );
    if (!(regs[1] & BIT_HAS_AVX2))  return ext;
    ext = AVX2;
// #endif
#ifdef USE_AVX512
    if ((xgetbv(0) & 0xE0) != 0xE0) return ext; // see if OPMASK state and ZMM are availabe and enabled
    do_cpuidex( regs, 7, 0 );
    if (!(regs[1] & BIT_HAS_AVX512F ))  return ext;
    if (!(regs[1] & BIT_HAS_AVX512DQ))  return ext;
    if (!(regs[1] & BIT_HAS_AVX512BW))  return ext;
    ext = AVX512;
#endif
#endif

    return ext;
}

typedef std::map<std::string, X86_VEXT> translate;
static translate m
{ { "SCALAR", SCALAR },{ "SSE41", SSE41 },{ "SSE42", SSE42 },
  { "AVX", AVX },{ "AVX2", AVX2 },{ "AVX512", AVX512 } };

NO_USE_SIMD
X86_VEXT read_x86_extension_flags(const std::string &extStrId)
{
  //static std::atomic<bool> b_detection_finished(false);
  static bool b_detection_finished( false );
  static X86_VEXT ext_flags = SCALAR;

  {
    if( !b_detection_finished )
    {
      if( !extStrId.empty() )
      {
        translate::iterator search = m.find( extStrId );
        if( search != m.end() )
        {
          ext_flags = search->second;
        }
        else
        {
          EXIT( "Mode not supported: " << ext_flags << "\n" );
        }
      }
      else
      {
        ext_flags = _get_x86_extensions();
      }

      b_detection_finished = true;
    }
  }

  return ext_flags;
}

const char* read_x86_extension(const std::string &extStrId)
{
  static const char extension_not_available[] = "NA";

  X86_VEXT vext = read_x86_extension_flags(extStrId);

  for( translate::const_iterator it = m.begin(); it != m.end(); ++it )
    if( it->second == vext )
      return it->first.c_str();

  return extension_not_available;
}

#endif // __x86_64