Файловый менеджер - Редактировать - /home/lakoyani/lakoyani.com.fj/include.tar
Назад
asm_amd64.h 0000644 00000001074 14711252367 0006502 0 ustar 00 // Copyright 2021 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Define features that are guaranteed to be supported by setting the AMD64 variable. // If a feature is supported, there's no need to check it at runtime every time. #ifdef GOAMD64_v2 #define hasPOPCNT #define hasSSE42 #endif #ifdef GOAMD64_v3 #define hasAVX #define hasAVX2 #define hasPOPCNT #define hasSSE42 #endif #ifdef GOAMD64_v4 #define hasAVX #define hasAVX2 #define hasPOPCNT #define hasSSE42 #endif funcdata.h 0000644 00000005064 14711252370 0006511 0 ustar 00 // Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file defines the IDs for PCDATA and FUNCDATA instructions // in Go binaries. It is included by assembly sources, so it must // be written using #defines. // // These must agree with symtab.go and ../cmd/internal/objabi/funcdata.go. #define PCDATA_UnsafePoint 0 #define PCDATA_StackMapIndex 1 #define PCDATA_InlTreeIndex 2 #define PCDATA_ArgLiveIndex 3 #define FUNCDATA_ArgsPointerMaps 0 /* garbage collector blocks */ #define FUNCDATA_LocalsPointerMaps 1 #define FUNCDATA_StackObjects 2 #define FUNCDATA_InlTree 3 #define FUNCDATA_OpenCodedDeferInfo 4 /* info for func with open-coded defers */ #define FUNCDATA_ArgInfo 5 #define FUNCDATA_ArgLiveInfo 6 #define FUNCDATA_WrapInfo 7 // Pseudo-assembly statements. // GO_ARGS, GO_RESULTS_INITIALIZED, and NO_LOCAL_POINTERS are macros // that communicate to the runtime information about the location and liveness // of pointers in an assembly function's arguments, results, and stack frame. // This communication is only required in assembly functions that make calls // to other functions that might be preempted or grow the stack. // NOSPLIT functions that make no calls do not need to use these macros. // GO_ARGS indicates that the Go prototype for this assembly function // defines the pointer map for the function's arguments. // GO_ARGS should be the first instruction in a function that uses it. // It can be omitted if there are no arguments at all. // GO_ARGS is inserted implicitly by the linker for any function whose // name starts with a middle-dot and that also has a Go prototype; it // is therefore usually not necessary to write explicitly. #define GO_ARGS FUNCDATA $FUNCDATA_ArgsPointerMaps, go_args_stackmap(SB) // GO_RESULTS_INITIALIZED indicates that the assembly function // has initialized the stack space for its results and that those results // should be considered live for the remainder of the function. #define GO_RESULTS_INITIALIZED PCDATA $PCDATA_StackMapIndex, $1 // NO_LOCAL_POINTERS indicates that the assembly function stores // no pointers to heap objects in its local stack variables. #define NO_LOCAL_POINTERS FUNCDATA $FUNCDATA_LocalsPointerMaps, no_pointers_stackmap(SB) // ArgsSizeUnknown is set in Func.argsize to mark all functions // whose argument size is unknown (C vararg functions, and // assembly code without an explicit specification). // This value is generated by the compiler, assembler, or linker. #define ArgsSizeUnknown 0x80000000 textflag.h 0000644 00000003030 14711252370 0006531 0 ustar 00 // Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file defines flags attached to various functions // and data objects. The compilers, assemblers, and linker must // all agree on these values. // // Keep in sync with src/cmd/internal/obj/textflag.go. // Don't profile the marked routine. This flag is deprecated. #define NOPROF 1 // It is ok for the linker to get multiple of these symbols. It will // pick one of the duplicates to use. #define DUPOK 2 // Don't insert stack check preamble. #define NOSPLIT 4 // Put this data in a read-only section. #define RODATA 8 // This data contains no pointers. #define NOPTR 16 // This is a wrapper function and should not count as disabling 'recover'. #define WRAPPER 32 // This function uses its incoming context register. #define NEEDCTXT 64 // Allocate a word of thread local storage and store the offset from the // thread local base to the thread local storage in this variable. #define TLSBSS 256 // Do not insert instructions to allocate a stack frame for this function. // Only valid on functions that declare a frame size of 0. // TODO(mwhudson): only implemented for ppc64x at present. #define NOFRAME 512 // Function can call reflect.Type.Method or reflect.Type.MethodByName. #define REFLECTMETHOD 1024 // Function is the outermost frame of the call stack. Call stack unwinders // should stop at this function. #define TOPFRAME 2048 // Function is an ABI wrapper. #define ABIWRAPPER 4096 asm_ppc64x.h 0000644 00000001777 14711252370 0006717 0 ustar 00 // Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // FIXED_FRAME defines the size of the fixed part of a stack frame. A stack // frame looks like this: // // +---------------------+ // | local variable area | // +---------------------+ // | argument area | // +---------------------+ <- R1+FIXED_FRAME // | fixed area | // +---------------------+ <- R1 // // So a function that sets up a stack frame at all uses as least FIXED_FRAME // bytes of stack. This mostly affects assembly that calls other functions // with arguments (the arguments should be stored at FIXED_FRAME+0(R1), // FIXED_FRAME+8(R1) etc) and some other low-level places. // // The reason for using a constant is to make supporting PIC easier (although // we only support PIC on ppc64le which has a minimum 32 bytes of stack frame, // and currently always use that much, PIC on ppc64 would need to use 48). #define FIXED_FRAME 32 xmmintrin.h 0000644 00000121314 14711270661 0006751 0 ustar 00 /* Copyright (C) 2002-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED #ifndef __SSE__ # error "SSE instruction set not enabled" #else /* We need type definitions from the MMX header file. */ #include <mmintrin.h> /* Get _mm_malloc () and _mm_free (). */ #include <mm_malloc.h> /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); /* Internal data types for implementing the intrinsics. */ typedef float __v4sf __attribute__ ((__vector_size__ (16))); /* Create a selector for use with the SHUFPS instruction. */ #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) /* Constants for use with _mm_prefetch. */ enum _mm_hint { _MM_HINT_T0 = 3, _MM_HINT_T1 = 2, _MM_HINT_T2 = 1, _MM_HINT_NTA = 0 }; /* Bits in the MXCSR. */ #define _MM_EXCEPT_MASK 0x003f #define _MM_EXCEPT_INVALID 0x0001 #define _MM_EXCEPT_DENORM 0x0002 #define _MM_EXCEPT_DIV_ZERO 0x0004 #define _MM_EXCEPT_OVERFLOW 0x0008 #define _MM_EXCEPT_UNDERFLOW 0x0010 #define _MM_EXCEPT_INEXACT 0x0020 #define _MM_MASK_MASK 0x1f80 #define _MM_MASK_INVALID 0x0080 #define _MM_MASK_DENORM 0x0100 #define _MM_MASK_DIV_ZERO 0x0200 #define _MM_MASK_OVERFLOW 0x0400 #define _MM_MASK_UNDERFLOW 0x0800 #define _MM_MASK_INEXACT 0x1000 #define _MM_ROUND_MASK 0x6000 #define _MM_ROUND_NEAREST 0x0000 #define _MM_ROUND_DOWN 0x2000 #define _MM_ROUND_UP 0x4000 #define _MM_ROUND_TOWARD_ZERO 0x6000 #define _MM_FLUSH_ZERO_MASK 0x8000 #define _MM_FLUSH_ZERO_ON 0x8000 #define _MM_FLUSH_ZERO_OFF 0x0000 /* Create a vector of zeros. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_ps (void) { return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; } /* Perform the respective operation on the lower SPFP (single-precision floating-point) values of A and B; the upper three SPFP values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_ss (__m128 __A) { return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp_ss (__m128 __A) { return (__m128) __builtin_ia32_rcpss ((__v4sf)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt_ss (__m128 __A) { return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B); } /* Perform the respective operation on the four SPFP values in A and B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_ps (__m128 __A) { return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp_ps (__m128 __A) { return (__m128) __builtin_ia32_rcpps ((__v4sf)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt_ps (__m128 __A) { return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B); } /* Perform logical bit-wise operations on 128-bit values. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_ps (__m128 __A, __m128 __B) { return __builtin_ia32_andps (__A, __B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_ps (__m128 __A, __m128 __B) { return __builtin_ia32_andnps (__A, __B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_ps (__m128 __A, __m128 __B) { return __builtin_ia32_orps (__A, __B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_ps (__m128 __A, __m128 __B) { return __builtin_ia32_xorps (__A, __B); } /* Perform a comparison on the lower SPFP values of A and B. If the comparison is true, place a mask of all ones in the result, otherwise a mask of zeros. The upper three SPFP values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpltss ((__v4sf) __B, (__v4sf) __A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpless ((__v4sf) __B, (__v4sf) __A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpnltss ((__v4sf) __B, (__v4sf) __A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpnless ((__v4sf) __B, (__v4sf) __A)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B); } /* Perform a comparison on the four SPFP values of A and B. For each element, if the comparison is true, place a mask of all ones in the result, otherwise a mask of zeros. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B); } /* Compare the lower SPFP values of A and B and return 1 if true and 0 if false. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comieq_ss (__m128 __A, __m128 __B) { return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comilt_ss (__m128 __A, __m128 __B) { return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comile_ss (__m128 __A, __m128 __B) { return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comigt_ss (__m128 __A, __m128 __B) { return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comige_ss (__m128 __A, __m128 __B) { return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comineq_ss (__m128 __A, __m128 __B) { return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomieq_ss (__m128 __A, __m128 __B) { return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomilt_ss (__m128 __A, __m128 __B) { return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomile_ss (__m128 __A, __m128 __B) { return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomigt_ss (__m128 __A, __m128 __B) { return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomige_ss (__m128 __A, __m128 __B) { return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomineq_ss (__m128 __A, __m128 __B) { return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B); } /* Convert the lower SPFP value to a 32-bit integer according to the current rounding mode. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si32 (__m128 __A) { return __builtin_ia32_cvtss2si ((__v4sf) __A); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_ss2si (__m128 __A) { return _mm_cvtss_si32 (__A); } #ifdef __x86_64__ /* Convert the lower SPFP value to a 32-bit integer according to the current rounding mode. */ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si64 (__m128 __A) { return __builtin_ia32_cvtss2si64 ((__v4sf) __A); } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si64x (__m128 __A) { return __builtin_ia32_cvtss2si64 ((__v4sf) __A); } #endif /* Convert the two lower SPFP values to 32-bit integers according to the current rounding mode. Return the integers in packed form. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi32 (__m128 __A) { return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_ps2pi (__m128 __A) { return _mm_cvtps_pi32 (__A); } /* Truncate the lower SPFP value to a 32-bit integer. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttss_si32 (__m128 __A) { return __builtin_ia32_cvttss2si ((__v4sf) __A); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtt_ss2si (__m128 __A) { return _mm_cvttss_si32 (__A); } #ifdef __x86_64__ /* Truncate the lower SPFP value to a 32-bit integer. */ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttss_si64 (__m128 __A) { return __builtin_ia32_cvttss2si64 ((__v4sf) __A); } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttss_si64x (__m128 __A) { return __builtin_ia32_cvttss2si64 ((__v4sf) __A); } #endif /* Truncate the two lower SPFP values to 32-bit integers. Return the integers in packed form. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttps_pi32 (__m128 __A) { return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtt_ps2pi (__m128 __A) { return _mm_cvttps_pi32 (__A); } /* Convert B to a SPFP value and insert it as element zero in A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_ss (__m128 __A, int __B) { return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_si2ss (__m128 __A, int __B) { return _mm_cvtsi32_ss (__A, __B); } #ifdef __x86_64__ /* Convert B to a SPFP value and insert it as element zero in A. */ /* Intel intrinsic. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_ss (__m128 __A, long long __B) { return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); } /* Microsoft intrinsic. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_ss (__m128 __A, long long __B) { return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); } #endif /* Convert the two 32-bit values in B to SPFP form and insert them as the two lower elements in A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32_ps (__m128 __A, __m64 __B) { return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt_pi2ps (__m128 __A, __m64 __B) { return _mm_cvtpi32_ps (__A, __B); } /* Convert the four signed 16-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi16_ps (__m64 __A) { __v4hi __sign; __v2si __hisi, __losi; __v4sf __zero, __ra, __rb; /* This comparison against zero gives us a mask that can be used to fill in the missing sign bits in the unpack operations below, so that we get signed values after unpacking. */ __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A); /* Convert the four words to doublewords. */ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign); /* Convert the doublewords to floating point two at a time. */ __zero = (__v4sf) _mm_setzero_ps (); __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); return (__m128) __builtin_ia32_movlhps (__ra, __rb); } /* Convert the four unsigned 16-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpu16_ps (__m64 __A) { __v2si __hisi, __losi; __v4sf __zero, __ra, __rb; /* Convert the four words to doublewords. */ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL); /* Convert the doublewords to floating point two at a time. */ __zero = (__v4sf) _mm_setzero_ps (); __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); return (__m128) __builtin_ia32_movlhps (__ra, __rb); } /* Convert the low four signed 8-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi8_ps (__m64 __A) { __v8qi __sign; /* This comparison against zero gives us a mask that can be used to fill in the missing sign bits in the unpack operations below, so that we get signed values after unpacking. */ __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A); /* Convert the four low bytes to words. */ __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign); return _mm_cvtpi16_ps(__A); } /* Convert the low four unsigned 8-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpu8_ps(__m64 __A) { __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL); return _mm_cvtpu16_ps(__A); } /* Convert the four signed 32-bit values in A and B to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32x2_ps(__m64 __A, __m64 __B) { __v4sf __zero = (__v4sf) _mm_setzero_ps (); __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A); __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B); return (__m128) __builtin_ia32_movlhps (__sfa, __sfb); } /* Convert the four SPFP values in A to four signed 16-bit integers. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi16(__m128 __A) { __v4sf __hisf = (__v4sf)__A; __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf); __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf); __v2si __losi = __builtin_ia32_cvtps2pi (__losf); return (__m64) __builtin_ia32_packssdw (__hisi, __losi); } /* Convert the four SPFP values in A to four signed 8-bit integers. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi8(__m128 __A) { __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A); return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL); } /* Selects four specific SPFP values from A and B based on MASK. */ #ifdef __OPTIMIZE__ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) { return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask); } #else #define _mm_shuffle_ps(A, B, MASK) \ ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(MASK))) #endif /* Selects and interleaves the upper two SPFP values from A and B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B); } /* Selects and interleaves the lower two SPFP values from A and B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B); } /* Sets the upper two SPFP values with 64-bits of data loaded from P; the lower two values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pi (__m128 __A, __m64 const *__P) { return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P); } /* Stores the upper two SPFP values of A into P. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pi (__m64 *__P, __m128 __A) { __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A); } /* Moves the upper two values of B into the lower two values of A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movehl_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B); } /* Moves the lower two values of B into the upper two values of A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movelh_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B); } /* Sets the lower two SPFP values with 64-bits of data loaded from P; the upper two values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pi (__m128 __A, __m64 const *__P) { return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P); } /* Stores the lower two SPFP values of A into P. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pi (__m64 *__P, __m128 __A) { __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A); } /* Creates a 4-bit mask from the most significant bits of the SPFP values. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_ps (__m128 __A) { return __builtin_ia32_movmskps ((__v4sf)__A); } /* Return the contents of the control register. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_getcsr (void) { return __builtin_ia32_stmxcsr (); } /* Read exception bits from the control register. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_GET_EXCEPTION_STATE (void) { return _mm_getcsr() & _MM_EXCEPT_MASK; } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_GET_EXCEPTION_MASK (void) { return _mm_getcsr() & _MM_MASK_MASK; } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_GET_ROUNDING_MODE (void) { return _mm_getcsr() & _MM_ROUND_MASK; } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_GET_FLUSH_ZERO_MODE (void) { return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; } /* Set the control register to I. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setcsr (unsigned int __I) { __builtin_ia32_ldmxcsr (__I); } /* Set exception bits in the control register. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_SET_EXCEPTION_STATE(unsigned int __mask) { _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_SET_EXCEPTION_MASK (unsigned int __mask) { _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_SET_ROUNDING_MODE (unsigned int __mode) { _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _MM_SET_FLUSH_ZERO_MODE (unsigned int __mode) { _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode); } /* Create a vector with element 0 as F and the rest zero. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_ss (float __F) { return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f }; } /* Create a vector with all four elements equal to F. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_ps (float __F) { return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F }; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_ps1 (float __F) { return _mm_set1_ps (__F); } /* Create a vector with element 0 as *P and the rest zero. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ss (float const *__P) { return _mm_set_ss (*__P); } /* Create a vector with all four elements equal to *P. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load1_ps (float const *__P) { return _mm_set1_ps (*__P); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ps1 (float const *__P) { return _mm_load1_ps (__P); } /* Load four SPFP values from P. The address must be 16-byte aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_ps (float const *__P) { return (__m128) *(__v4sf *)__P; } /* Load four SPFP values from P. The address need not be 16-byte aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_ps (float const *__P) { return (__m128) __builtin_ia32_loadups (__P); } /* Load four SPFP values in reverse order. The address must be aligned. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadr_ps (float const *__P) { __v4sf __tmp = *(__v4sf *)__P; return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3)); } /* Create the vector [Z Y X W]. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_ps (const float __Z, const float __Y, const float __X, const float __W) { return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z }; } /* Create the vector [W X Y Z]. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_ps (float __Z, float __Y, float __X, float __W) { return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W }; } /* Stores the lower SPFP value. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ss (float *__P, __m128 __A) { *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); } extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_f32 (__m128 __A) { return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); } /* Store four SPFP values. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ps (float *__P, __m128 __A) { *(__v4sf *)__P = (__v4sf)__A; } /* Store four SPFP values. The address need not be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_ps (float *__P, __m128 __A) { __builtin_ia32_storeups (__P, (__v4sf)__A); } /* Store the lower SPFP value across four words. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store1_ps (float *__P, __m128 __A) { __v4sf __va = (__v4sf)__A; __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0)); _mm_storeu_ps (__P, __tmp); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_ps1 (float *__P, __m128 __A) { _mm_store1_ps (__P, __A); } /* Store four SPFP values in reverse order. The address must be aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storer_ps (float *__P, __m128 __A) { __v4sf __va = (__v4sf)__A; __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3)); _mm_store_ps (__P, __tmp); } /* Sets the low SPFP value of A from the low value of B. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B); } /* Extracts one of the four words of A. The selector N must be immediate. */ #ifdef __OPTIMIZE__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_pi16 (__m64 const __A, int const __N) { return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pextrw (__m64 const __A, int const __N) { return _mm_extract_pi16 (__A, __N); } #else #define _mm_extract_pi16(A, N) \ ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N))) #define _m_pextrw(A, N) _mm_extract_pi16(A, N) #endif /* Inserts word D into one of four words of A. The selector N must be immediate. */ #ifdef __OPTIMIZE__ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_pi16 (__m64 const __A, int const __D, int const __N) { return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pinsrw (__m64 const __A, int const __D, int const __N) { return _mm_insert_pi16 (__A, __D, __N); } #else #define _mm_insert_pi16(A, D, N) \ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \ (int)(D), (int)(N))) #define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N) #endif /* Compute the element-wise maximum of signed 16-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pi16 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmaxsw (__m64 __A, __m64 __B) { return _mm_max_pi16 (__A, __B); } /* Compute the element-wise maximum of unsigned 8-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pu8 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmaxub (__m64 __A, __m64 __B) { return _mm_max_pu8 (__A, __B); } /* Compute the element-wise minimum of signed 16-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pi16 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pminsw (__m64 __A, __m64 __B) { return _mm_min_pi16 (__A, __B); } /* Compute the element-wise minimum of unsigned 8-bit values. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pu8 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pminub (__m64 __A, __m64 __B) { return _mm_min_pu8 (__A, __B); } /* Create an 8-bit mask of the signs of 8-bit values. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_pi8 (__m64 __A) { return __builtin_ia32_pmovmskb ((__v8qi)__A); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmovmskb (__m64 __A) { return _mm_movemask_pi8 (__A); } /* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values in B and produce the high 16 bits of the 32-bit results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_pu16 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmulhuw (__m64 __A, __m64 __B) { return _mm_mulhi_pu16 (__A, __B); } /* Return a combination of the four 16-bit values in A. The selector must be an immediate. */ #ifdef __OPTIMIZE__ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi16 (__m64 __A, int const __N) { return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pshufw (__m64 __A, int const __N) { return _mm_shuffle_pi16 (__A, __N); } #else #define _mm_shuffle_pi16(A, N) \ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N))) #define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N) #endif /* Conditionally store byte elements of A into P. The high bit of each byte in the selector N determines whether the corresponding byte from A is stored. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) { __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_maskmovq (__m64 __A, __m64 __N, char *__P) { _mm_maskmove_si64 (__A, __N, __P); } /* Compute the rounded averages of the unsigned 8-bit values in A and B. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_pu8 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pavgb (__m64 __A, __m64 __B) { return _mm_avg_pu8 (__A, __B); } /* Compute the rounded averages of the unsigned 16-bit values in A and B. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_pu16 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pavgw (__m64 __A, __m64 __B) { return _mm_avg_pu16 (__A, __B); } /* Compute the sum of the absolute differences of the unsigned 8-bit values in A and B. Return the value in the lower 16-bit word; the upper words are cleared. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sad_pu8 (__m64 __A, __m64 __B) { return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psadbw (__m64 __A, __m64 __B) { return _mm_sad_pu8 (__A, __B); } /* Loads one cache line from address P to a location "closer" to the processor. The selector I specifies the type of prefetch operation. */ #ifdef __OPTIMIZE__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_prefetch (const void *__P, enum _mm_hint __I) { __builtin_prefetch (__P, 0, __I); } #else #define _mm_prefetch(P, I) \ __builtin_prefetch ((P), 0, (I)) #endif /* Stores the data in A to the address P without polluting the caches. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_pi (__m64 *__P, __m64 __A) { __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A); } /* Likewise. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_ps (float *__P, __m128 __A) { __builtin_ia32_movntps (__P, (__v4sf)__A); } /* Guarantees that every preceding store is globally visible before any subsequent store. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sfence (void) { __builtin_ia32_sfence (); } /* The execution of the next instruction is delayed by an implementation specific amount of time. The instruction does not modify the architectural state. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_pause (void) { __builtin_ia32_pause (); } /* Transpose the 4x4 matrix composed of row[0-3]. */ #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \ __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); \ __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); \ __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \ (row0) = __builtin_ia32_movlhps (__t0, __t1); \ (row1) = __builtin_ia32_movhlps (__t1, __t0); \ (row2) = __builtin_ia32_movlhps (__t2, __t3); \ (row3) = __builtin_ia32_movhlps (__t3, __t2); \ } while (0) /* For backward source compatibility. */ #ifdef __SSE2__ # include <emmintrin.h> #endif #endif /* __SSE__ */ #endif /* _XMMINTRIN_H_INCLUDED */ fmaintrin.h 0000644 00000024234 14711270663 0006720 0 ustar 00 /* Copyright (C) 2011-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _IMMINTRIN_H_INCLUDED # error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." #endif #ifndef _FMAINTRIN_H_INCLUDED #define _FMAINTRIN_H_INCLUDED #ifndef __FMA__ # error "FMA instruction set not enabled" #else extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } #endif #endif omp.h 0000644 00000007102 14711270663 0005517 0 ustar 00 /* Copyright (C) 2005-2013 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU OpenMP Library (libgomp). Libgomp is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef OMP_H #define OMP_H 1 #ifndef _LIBGOMP_OMP_LOCK_DEFINED #define _LIBGOMP_OMP_LOCK_DEFINED 1 /* These two structures get edited by the libgomp build process to reflect the shape of the two types. Their internals are private to the library. */ typedef struct { unsigned char _x[4] __attribute__((__aligned__(4))); } omp_lock_t; typedef struct { unsigned char _x[8 + sizeof (void *)] __attribute__((__aligned__(sizeof (void *)))); } omp_nest_lock_t; #endif typedef enum omp_sched_t { omp_sched_static = 1, omp_sched_dynamic = 2, omp_sched_guided = 3, omp_sched_auto = 4 } omp_sched_t; #ifdef __cplusplus extern "C" { # define __GOMP_NOTHROW throw () #else # define __GOMP_NOTHROW __attribute__((__nothrow__)) #endif extern void omp_set_num_threads (int) __GOMP_NOTHROW; extern int omp_get_num_threads (void) __GOMP_NOTHROW; extern int omp_get_max_threads (void) __GOMP_NOTHROW; extern int omp_get_thread_num (void) __GOMP_NOTHROW; extern int omp_get_num_procs (void) __GOMP_NOTHROW; extern int omp_in_parallel (void) __GOMP_NOTHROW; extern void omp_set_dynamic (int) __GOMP_NOTHROW; extern int omp_get_dynamic (void) __GOMP_NOTHROW; extern void omp_set_nested (int) __GOMP_NOTHROW; extern int omp_get_nested (void) __GOMP_NOTHROW; extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_destroy_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_set_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_unset_lock (omp_lock_t *) __GOMP_NOTHROW; extern int omp_test_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_init_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern void omp_destroy_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern void omp_set_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern void omp_unset_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern int omp_test_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern double omp_get_wtime (void) __GOMP_NOTHROW; extern double omp_get_wtick (void) __GOMP_NOTHROW; void omp_set_schedule (omp_sched_t, int) __GOMP_NOTHROW; void omp_get_schedule (omp_sched_t *, int *) __GOMP_NOTHROW; int omp_get_thread_limit (void) __GOMP_NOTHROW; void omp_set_max_active_levels (int) __GOMP_NOTHROW; int omp_get_max_active_levels (void) __GOMP_NOTHROW; int omp_get_level (void) __GOMP_NOTHROW; int omp_get_ancestor_thread_num (int) __GOMP_NOTHROW; int omp_get_team_size (int) __GOMP_NOTHROW; int omp_get_active_level (void) __GOMP_NOTHROW; int omp_in_final (void) __GOMP_NOTHROW; #ifdef __cplusplus } #endif #endif /* OMP_H */ wmmintrin.h 0000644 00000010406 14711270664 0006752 0 ustar 00 /* Copyright (C) 2008-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 10.1. */ #ifndef _WMMINTRIN_H_INCLUDED #define _WMMINTRIN_H_INCLUDED /* We need definitions from the SSE2 header file. */ #include <emmintrin.h> #if !defined (__AES__) && !defined (__PCLMUL__) # error "AES/PCLMUL instructions not enabled" #else /* AES */ #ifdef __AES__ /* Performs 1 round of AES decryption of the first m128i using the second m128i as a round key. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_aesdec_si128 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y); } /* Performs the last round of AES decryption of the first m128i using the second m128i as a round key. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_aesdeclast_si128 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X, (__v2di)__Y); } /* Performs 1 round of AES encryption of the first m128i using the second m128i as a round key. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_aesenc_si128 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y); } /* Performs the last round of AES encryption of the first m128i using the second m128i as a round key. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_aesenclast_si128 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y); } /* Performs the InverseMixColumn operation on the source m128i and stores the result into m128i destination. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_aesimc_si128 (__m128i __X) { return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X); } /* Generates a m128i round key for the input m128i AES cipher key and byte round constant. The second parameter must be a compile time constant. */ #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_aeskeygenassist_si128 (__m128i __X, const int __C) { return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C); } #else #define _mm_aeskeygenassist_si128(X, C) \ ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \ (int)(C))) #endif #endif /* __AES__ */ /* PCLMUL */ #ifdef __PCLMUL__ /* Performs carry-less integer multiplication of 64-bit halves of 128-bit input operands. The third parameter inducates which 64-bit haves of the input parameters v1 and v2 should be used. It must be a compile time constant. */ #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I) { return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X, (__v2di)__Y, __I); } #else #define _mm_clmulepi64_si128(X, Y, I) \ ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), (int)(I))) #endif #endif /* __PCLMUL__ */ #endif /* __AES__/__PCLMUL__ */ #endif /* _WMMINTRIN_H_INCLUDED */ rtmintrin.h 0000644 00000005044 14711270664 0006756 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _IMMINTRIN_H_INCLUDED # error "Never use <rtmintrin.h> directly; include <immintrin.h> instead." #endif #ifndef __RTM__ # error "RTM instruction set not enabled" #endif /* __RTM__ */ #ifndef _RTMINTRIN_H_INCLUDED #define _RTMINTRIN_H_INCLUDED #define _XBEGIN_STARTED (~0u) #define _XABORT_EXPLICIT (1 << 0) #define _XABORT_RETRY (1 << 1) #define _XABORT_CONFLICT (1 << 2) #define _XABORT_CAPACITY (1 << 3) #define _XABORT_DEBUG (1 << 4) #define _XABORT_NESTED (1 << 5) #define _XABORT_CODE(x) (((x) >> 24) & 0xFF) /* Start an RTM code region. Return _XBEGIN_STARTED on success and the abort condition otherwise. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xbegin (void) { return __builtin_ia32_xbegin (); } /* Specify the end of an RTM code region. If it corresponds to the outermost transaction, then attempts the transaction commit. If the commit fails, then control is transferred to the outermost transaction fallback handler. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xend (void) { __builtin_ia32_xend (); } /* Force an RTM abort condition. The control is transferred to the outermost transaction fallback handler with the abort condition IMM. */ #ifdef __OPTIMIZE__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xabort (const unsigned int imm) { __builtin_ia32_xabort (imm); } #else #define _xabort(N) __builtin_ia32_xabort (N) #endif /* __OPTIMIZE__ */ #endif /* _RTMINTRIN_H_INCLUDED */ avx2intrin.h 0000644 00000160225 14711270664 0007037 0 ustar 00 /* Copyright (C) 2011-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _IMMINTRIN_H_INCLUDED # error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." #endif /* Sum absolute 8-bit integer difference of adjacent groups of 4 byte integers in the first 2 operands. Starting offsets within operands are determined by the 3rd mask operand. */ #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M) { return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X, (__v32qi)__Y, __M); } #else #define _mm256_mpsadbw_epu8(X, Y, M) \ ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X), \ (__v32qi)(__m256i)(Y), (int)(M))) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_abs_epi8 (__m256i __A) { return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_abs_epi16 (__m256i __A) { return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_abs_epi32 (__m256i __A) { return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_packs_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_packs_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_packus_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_packus_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_adds_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_adds_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_adds_epu8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_adds_epu16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B); } #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N) { return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A, (__v4di)__B, __N * 8); } #else /* In that case (__N*8) will be in vreg, and insn will not be matched. */ /* Use define instead */ #define _mm256_alignr_epi8(A, B, N) \ ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \ (__v4di)(__m256i)(B), \ (int)(N) * 8)) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_and_si256 (__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_andnot_si256 (__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_avg_epu8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_avg_epu16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M) { return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X, (__v32qi)__Y, (__v32qi)__M); } #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M) { return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X, (__v16hi)__Y, __M); } #else #define _mm256_blend_epi16(X, Y, M) \ ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X), \ (__v16hi)(__m256i)(Y), (int)(M))) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpeq_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hadd_epi16 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X, (__v16hi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hadd_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hadds_epi16 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X, (__v16hi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hsub_epi16 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X, (__v16hi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hsub_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hsubs_epi16 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X, (__v16hi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maddubs_epi16 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X, (__v32qi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_madd_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_epu8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_epu16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_epu32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_epu8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_epu16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_epu32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_movemask_epi8 (__m256i __A) { return __builtin_ia32_pmovmskb256 ((__v32qi)__A); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi8_epi16 (__m128i __X) { return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi8_epi32 (__m128i __X) { return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi8_epi64 (__m128i __X) { return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi16_epi32 (__m128i __X) { return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi16_epi64 (__m128i __X) { return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi32_epi64 (__m128i __X) { return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepu8_epi16 (__m128i __X) { return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepu8_epi32 (__m128i __X) { return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepu8_epi64 (__m128i __X) { return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepu16_epi32 (__m128i __X) { return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepu16_epi64 (__m128i __X) { return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepu32_epi64 (__m128i __X) { return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mulhrs_epi16 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X, (__v16hi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mulhi_epu16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mulhi_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mullo_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mullo_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_epu32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_or_si256 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sad_epu8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_shuffle_epi8 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X, (__v32qi)__Y); } #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_shuffle_epi32 (__m256i __A, const int __mask) { return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_shufflehi_epi16 (__m256i __A, const int __mask) { return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_shufflelo_epi16 (__m256i __A, const int __mask) { return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask); } #else #define _mm256_shuffle_epi32(A, N) \ ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N))) #define _mm256_shufflehi_epi16(A, N) \ ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N))) #define _mm256_shufflelo_epi16(A, N) \ ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N))) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sign_epi8 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sign_epi16 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sign_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y); } #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_bslli_epi128 (__m256i __A, const int __N) { return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_slli_si256 (__m256i __A, const int __N) { return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8); } #else #define _mm256_bslli_epi128(A, N) \ ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8)) #define _mm256_slli_si256(A, N) \ ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8)) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_slli_epi16 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sll_epi16 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_slli_epi32 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sll_epi32 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_slli_epi64 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sll_epi64 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srai_epi16 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sra_epi16 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srai_epi32 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sra_epi32 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B); } #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_bsrli_epi128 (__m256i __A, const int __N) { return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srli_si256 (__m256i __A, const int __N) { return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8); } #else #define _mm256_bsrli_epi128(A, N) \ ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8)) #define _mm256_srli_si256(A, N) \ ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8)) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srli_epi16 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srl_epi16 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srli_epi32 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srl_epi32 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srli_epi64 (__m256i __A, int __B) { return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srl_epi64 (__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_subs_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_subs_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_subs_epu8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_subs_epu16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpackhi_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpackhi_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpackhi_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpackhi_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpacklo_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpacklo_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpacklo_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpacklo_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_xor_si256 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_stream_load_si256 (__m256i const *__X) { return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_broadcastss_ps (__m128 __X) { return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcastss_ps (__m128 __X) { return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcastsd_pd (__m128d __X) { return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcastsi128_si256 (__m128i __X) { return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X); } #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M) { return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X, (__v4si)__Y, __M); } #else #define _mm_blend_epi32(X, Y, M) \ ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X), \ (__v4si)(__m128i)(Y), (int)(M))) #endif #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M) { return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X, (__v8si)__Y, __M); } #else #define _mm256_blend_epi32(X, Y, M) \ ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X), \ (__v8si)(__m256i)(Y), (int)(M))) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcastb_epi8 (__m128i __X) { return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcastw_epi16 (__m128i __X) { return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcastd_epi32 (__m128i __X) { return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcastq_epi64 (__m128i __X) { return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_broadcastb_epi8 (__m128i __X) { return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_broadcastw_epi16 (__m128i __X) { return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_broadcastd_epi32 (__m128i __X) { return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_broadcastq_epi64 (__m128i __X) { return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y); } #ifdef __OPTIMIZE__ extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute4x64_pd (__m256d __X, const int __M) { return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M); } #else #define _mm256_permute4x64_pd(X, M) \ ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M))) #endif extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permutevar8x32_ps (__m256 __X, __m256i __Y) { return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y); } #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute4x64_epi64 (__m256i __X, const int __M) { return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M); } #else #define _mm256_permute4x64_epi64(X, M) \ ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M))) #endif #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M) { return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M); } #else #define _mm256_permute2x128_si256(X, Y, M) \ ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M))) #endif #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extracti128_si256 (__m256i __X, const int __M) { return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M); } #else #define _mm256_extracti128_si256(X, M) \ ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M))) #endif #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M) { return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M); } #else #define _mm256_inserti128_si256(X, Y, M) \ ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \ (__v2di)(__m128i)(Y), \ (int)(M))) #endif extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskload_epi32 (int const *__X, __m256i __M ) { return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X, (__v8si)__M); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskload_epi64 (long long const *__X, __m256i __M ) { return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X, (__v4di)__M); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskload_epi32 (int const *__X, __m128i __M ) { return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X, (__v4si)__M); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskload_epi64 (long long const *__X, __m128i __M ) { return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X, (__v2di)__M); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y ) { __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y ) { __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y ) { __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y); } extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y ) { __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sllv_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sllv_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sllv_epi64 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sllv_epi64 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srav_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_srav_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srlv_epi32 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_srlv_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_srlv_epi64 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_srlv_epi64 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y); } #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i32gather_pd (double const *base, __m128i index, const int scale) { __v2df src = _mm_setzero_pd (); __v2df mask = _mm_cmpeq_pd (src, src); return (__m128d) __builtin_ia32_gathersiv2df (src, base, (__v4si)index, mask, scale); } extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index, __m128d mask, const int scale) { return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src, base, (__v4si)index, (__v2df)mask, scale); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i32gather_pd (double const *base, __m128i index, const int scale) { __v4df src = _mm256_setzero_pd (); __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ); return (__m256d) __builtin_ia32_gathersiv4df (src, base, (__v4si)index, mask, scale); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i32gather_pd (__m256d src, double const *base, __m128i index, __m256d mask, const int scale) { return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src, base, (__v4si)index, (__v4df)mask, scale); } extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i64gather_pd (double const *base, __m128i index, const int scale) { __v2df src = _mm_setzero_pd (); __v2df mask = _mm_cmpeq_pd (src, src); return (__m128d) __builtin_ia32_gatherdiv2df (src, base, (__v2di)index, mask, scale); } extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index, __m128d mask, const int scale) { return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src, base, (__v2di)index, (__v2df)mask, scale); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i64gather_pd (double const *base, __m256i index, const int scale) { __v4df src = _mm256_setzero_pd (); __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ); return (__m256d) __builtin_ia32_gatherdiv4df (src, base, (__v4di)index, mask, scale); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i64gather_pd (__m256d src, double const *base, __m256i index, __m256d mask, const int scale) { return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src, base, (__v4di)index, (__v4df)mask, scale); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i32gather_ps (float const *base, __m128i index, const int scale) { __v4sf src = _mm_setzero_ps (); __v4sf mask = _mm_cmpeq_ps (src, src); return (__m128) __builtin_ia32_gathersiv4sf (src, base, (__v4si)index, mask, scale); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index, __m128 mask, const int scale) { return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src, base, (__v4si)index, (__v4sf)mask, scale); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i32gather_ps (float const *base, __m256i index, const int scale) { __v8sf src = _mm256_setzero_ps (); __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); return (__m256) __builtin_ia32_gathersiv8sf (src, base, (__v8si)index, mask, scale); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i32gather_ps (__m256 src, float const *base, __m256i index, __m256 mask, const int scale) { return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src, base, (__v8si)index, (__v8sf)mask, scale); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i64gather_ps (float const *base, __m128i index, const int scale) { __v4sf src = _mm_setzero_ps (); __v4sf mask = _mm_cmpeq_ps (src, src); return (__m128) __builtin_ia32_gatherdiv4sf (src, base, (__v2di)index, mask, scale); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index, __m128 mask, const int scale) { return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src, base, (__v2di)index, (__v4sf)mask, scale); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i64gather_ps (float const *base, __m256i index, const int scale) { __v4sf src = _mm_setzero_ps (); __v4sf mask = _mm_cmpeq_ps (src, src); return (__m128) __builtin_ia32_gatherdiv4sf256 (src, base, (__v4di)index, mask, scale); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i64gather_ps (__m128 src, float const *base, __m256i index, __m128 mask, const int scale) { return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src, base, (__v4di)index, (__v4sf)mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i32gather_epi64 (long long int const *base, __m128i index, const int scale) { __v2di src = __extension__ (__v2di){ 0, 0 }; __v2di mask = __extension__ (__v2di){ ~0, ~0 }; return (__m128i) __builtin_ia32_gathersiv2di (src, base, (__v4si)index, mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i32gather_epi64 (__m128i src, long long int const *base, __m128i index, __m128i mask, const int scale) { return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src, base, (__v4si)index, (__v2di)mask, scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i32gather_epi64 (long long int const *base, __m128i index, const int scale) { __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 }; __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 }; return (__m256i) __builtin_ia32_gathersiv4di (src, base, (__v4si)index, mask, scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i32gather_epi64 (__m256i src, long long int const *base, __m128i index, __m256i mask, const int scale) { return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src, base, (__v4si)index, (__v4di)mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i64gather_epi64 (long long int const *base, __m128i index, const int scale) { __v2di src = __extension__ (__v2di){ 0, 0 }; __v2di mask = __extension__ (__v2di){ ~0, ~0 }; return (__m128i) __builtin_ia32_gatherdiv2di (src, base, (__v2di)index, mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index, __m128i mask, const int scale) { return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src, base, (__v2di)index, (__v2di)mask, scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i64gather_epi64 (long long int const *base, __m256i index, const int scale) { __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 }; __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 }; return (__m256i) __builtin_ia32_gatherdiv4di (src, base, (__v4di)index, mask, scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i64gather_epi64 (__m256i src, long long int const *base, __m256i index, __m256i mask, const int scale) { return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src, base, (__v4di)index, (__v4di)mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i32gather_epi32 (int const *base, __m128i index, const int scale) { __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 }; __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 }; return (__m128i) __builtin_ia32_gathersiv4si (src, base, (__v4si)index, mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index, __m128i mask, const int scale) { return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src, base, (__v4si)index, (__v4si)mask, scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i32gather_epi32 (int const *base, __m256i index, const int scale) { __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 }; __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; return (__m256i) __builtin_ia32_gathersiv8si (src, base, (__v8si)index, mask, scale); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i32gather_epi32 (__m256i src, int const *base, __m256i index, __m256i mask, const int scale) { return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src, base, (__v8si)index, (__v8si)mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_i64gather_epi32 (int const *base, __m128i index, const int scale) { __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 }; __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 }; return (__m128i) __builtin_ia32_gatherdiv4si (src, base, (__v2di)index, mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index, __m128i mask, const int scale) { return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src, base, (__v2di)index, (__v4si)mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_i64gather_epi32 (int const *base, __m256i index, const int scale) { __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 }; __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 }; return (__m128i) __builtin_ia32_gatherdiv4si256 (src, base, (__v4di)index, mask, scale); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_i64gather_epi32 (__m128i src, int const *base, __m256i index, __m128i mask, const int scale) { return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src, base, (__v4di)index, (__v4si)mask, scale); } #else /* __OPTIMIZE__ */ #define _mm_i32gather_pd(BASE, INDEX, SCALE) \ (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \ (double const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v2df)_mm_set1_pd( \ (double)(long long int) -1), \ (int)SCALE) #define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \ (double const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v2df)(__m128d)MASK, \ (int)SCALE) #define _mm256_i32gather_pd(BASE, INDEX, SCALE) \ (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \ (double const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v4df)_mm256_set1_pd( \ (double)(long long int) -1), \ (int)SCALE) #define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \ (double const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v4df)(__m256d)MASK, \ (int)SCALE) #define _mm_i64gather_pd(BASE, INDEX, SCALE) \ (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \ (double const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v2df)_mm_set1_pd( \ (double)(long long int) -1), \ (int)SCALE) #define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \ (double const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v2df)(__m128d)MASK, \ (int)SCALE) #define _mm256_i64gather_pd(BASE, INDEX, SCALE) \ (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \ (double const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4df)_mm256_set1_pd( \ (double)(long long int) -1), \ (int)SCALE) #define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \ (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \ (double const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4df)(__m256d)MASK, \ (int)SCALE) #define _mm_i32gather_ps(BASE, INDEX, SCALE) \ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \ (float const *)BASE, \ (__v4si)(__m128i)INDEX, \ _mm_set1_ps ((float)(int) -1), \ (int)SCALE) #define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \ (float const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v4sf)(__m128d)MASK, \ (int)SCALE) #define _mm256_i32gather_ps(BASE, INDEX, SCALE) \ (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \ (float const *)BASE, \ (__v8si)(__m256i)INDEX, \ (__v8sf)_mm256_set1_ps ( \ (float)(int) -1), \ (int)SCALE) #define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \ (float const *)BASE, \ (__v8si)(__m256i)INDEX, \ (__v8sf)(__m256d)MASK, \ (int)SCALE) #define _mm_i64gather_ps(BASE, INDEX, SCALE) \ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \ (float const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v4sf)_mm_set1_ps ( \ (float)(int) -1), \ (int)SCALE) #define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \ (float const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v4sf)(__m128d)MASK, \ (int)SCALE) #define _mm256_i64gather_ps(BASE, INDEX, SCALE) \ (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \ (float const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4sf)_mm_set1_ps( \ (float)(int) -1), \ (int)SCALE) #define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \ (float const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4sf)(__m128)MASK, \ (int)SCALE) #define _mm_i32gather_epi64(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \ (long long const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v2di)_mm_set1_epi64x (-1), \ (int)SCALE) #define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \ (long long const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v2di)(__m128i)MASK, \ (int)SCALE) #define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \ (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \ (long long const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v4di)_mm256_set1_epi64x (-1), \ (int)SCALE) #define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \ (long long const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v4di)(__m256i)MASK, \ (int)SCALE) #define _mm_i64gather_epi64(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \ (long long const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v2di)_mm_set1_epi64x (-1), \ (int)SCALE) #define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \ (long long const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v2di)(__m128i)MASK, \ (int)SCALE) #define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \ (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \ (long long const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4di)_mm256_set1_epi64x (-1), \ (int)SCALE) #define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \ (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \ (long long const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4di)(__m256i)MASK, \ (int)SCALE) #define _mm_i32gather_epi32(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \ (int const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v4si)_mm_set1_epi32 (-1), \ (int)SCALE) #define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \ (int const *)BASE, \ (__v4si)(__m128i)INDEX, \ (__v4si)(__m128i)MASK, \ (int)SCALE) #define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \ (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \ (int const *)BASE, \ (__v8si)(__m256i)INDEX, \ (__v8si)_mm256_set1_epi32 (-1), \ (int)SCALE) #define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \ (int const *)BASE, \ (__v8si)(__m256i)INDEX, \ (__v8si)(__m256i)MASK, \ (int)SCALE) #define _mm_i64gather_epi32(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \ (int const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v4si)_mm_set1_epi32 (-1), \ (int)SCALE) #define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \ (int const *)BASE, \ (__v2di)(__m128i)INDEX, \ (__v4si)(__m128i)MASK, \ (int)SCALE) #define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \ (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \ (int const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4si)_mm_set1_epi32(-1), \ (int)SCALE) #define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \ (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \ (int const *)BASE, \ (__v4di)(__m256i)INDEX, \ (__v4si)(__m128i)MASK, \ (int)SCALE) #endif /* __OPTIMIZE__ */ tbmintrin.h 0000644 00000012077 14711270666 0006744 0 ustar 00 /* Copyright (C) 2010-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _X86INTRIN_H_INCLUDED # error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef __TBM__ # error "TBM instruction set not enabled" #endif /* __TBM__ */ #ifndef _TBMINTRIN_H_INCLUDED #define _TBMINTRIN_H_INCLUDED #ifdef __OPTIMIZE__ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextri_u32 (unsigned int __X, const unsigned int __I) { return __builtin_ia32_bextri_u32 (__X, __I); } #else #define __bextri_u32(X, I) \ ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \ (unsigned int)(I))) #endif /*__OPTIMIZE__ */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcfill_u32 (unsigned int __X) { return __X & (__X + 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blci_u32 (unsigned int __X) { return __X | ~(__X + 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcic_u32 (unsigned int __X) { return ~__X & (__X + 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcmsk_u32 (unsigned int __X) { return __X ^ (__X + 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcs_u32 (unsigned int __X) { return __X | (__X + 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsfill_u32 (unsigned int __X) { return __X | (__X - 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsic_u32 (unsigned int __X) { return ~__X | (__X - 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __t1mskc_u32 (unsigned int __X) { return ~__X | (__X + 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzmsk_u32 (unsigned int __X) { return ~__X & (__X - 1); } #ifdef __x86_64__ #ifdef __OPTIMIZE__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextri_u64 (unsigned long long __X, const unsigned int __I) { return __builtin_ia32_bextri_u64 (__X, __I); } #else #define __bextri_u64(X, I) \ ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long long)(X), \ (unsigned long long)(I))) #endif /*__OPTIMIZE__ */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcfill_u64 (unsigned long long __X) { return __X & (__X + 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blci_u64 (unsigned long long __X) { return __X | ~(__X + 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcic_u64 (unsigned long long __X) { return ~__X & (__X + 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcmsk_u64 (unsigned long long __X) { return __X ^ (__X + 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blcs_u64 (unsigned long long __X) { return __X | (__X + 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsfill_u64 (unsigned long long __X) { return __X | (__X - 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsic_u64 (unsigned long long __X) { return ~__X | (__X - 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __t1mskc_u64 (unsigned long long __X) { return ~__X | (__X + 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzmsk_u64 (unsigned long long __X) { return ~__X & (__X - 1); } #endif /* __x86_64__ */ #endif /* _TBMINTRIN_H_INCLUDED */ x86intrin.h 0000644 00000004514 14711270667 0006605 0 ustar 00 /* Copyright (C) 2008-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _X86INTRIN_H_INCLUDED #define _X86INTRIN_H_INCLUDED #include <ia32intrin.h> #ifdef __MMX__ #include <mmintrin.h> #endif #ifdef __SSE__ #include <xmmintrin.h> #endif #ifdef __SSE2__ #include <emmintrin.h> #endif #ifdef __SSE3__ #include <pmmintrin.h> #endif #ifdef __SSSE3__ #include <tmmintrin.h> #endif #ifdef __SSE4A__ #include <ammintrin.h> #endif #if defined (__SSE4_2__) || defined (__SSE4_1__) #include <smmintrin.h> #endif #if defined (__AES__) || defined (__PCLMUL__) #include <wmmintrin.h> #endif /* For including AVX instructions */ #include <immintrin.h> #ifdef __3dNOW__ #include <mm3dnow.h> #endif #ifdef __FMA4__ #include <fma4intrin.h> #endif #ifdef __XOP__ #include <xopintrin.h> #endif #ifdef __LWP__ #include <lwpintrin.h> #endif #ifdef __BMI__ #include <bmiintrin.h> #endif #ifdef __BMI2__ #include <bmi2intrin.h> #endif #ifdef __TBM__ #include <tbmintrin.h> #endif #ifdef __LZCNT__ #include <lzcntintrin.h> #endif #ifdef __POPCNT__ #include <popcntintrin.h> #endif #ifdef __RDSEED__ #include <rdseedintrin.h> #endif #ifdef __PRFCHW__ #include <prfchwintrin.h> #endif #ifdef __FXSR__ #include <fxsrintrin.h> #endif #ifdef __XSAVE__ #include <xsaveintrin.h> #endif #ifdef __XSAVEOPT__ #include <xsaveoptintrin.h> #endif #include <adxintrin.h> #ifdef __PKU__ #include <pkuintrin.h> #endif #endif /* _X86INTRIN_H_INCLUDED */ syslimits.h 0000644 00000000512 14711270667 0006766 0 ustar 00 /* syslimits.h stands for the system's own limits.h file. If we can use it ok unmodified, then we install this text. If fixincludes fixes it, then the fixed version is installed instead of this text. */ #define _GCC_NEXT_LIMITS_H /* tell gcc's limits.h to recurse */ #include_next <limits.h> #undef _GCC_NEXT_LIMITS_H avxintrin.h 0000644 00000135610 14711270667 0006760 0 ustar 00 /* Copyright (C) 2008-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 11.0. */ #ifndef _IMMINTRIN_H_INCLUDED # error "Never use <avxintrin.h> directly; include <immintrin.h> instead." #endif /* Internal data types for implementing the intrinsics. */ typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef float __v8sf __attribute__ ((__vector_size__ (32))); typedef long long __v4di __attribute__ ((__vector_size__ (32))); typedef int __v8si __attribute__ ((__vector_size__ (32))); typedef short __v16hi __attribute__ ((__vector_size__ (32))); typedef char __v32qi __attribute__ ((__vector_size__ (32))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); typedef long long __m256i __attribute__ ((__vector_size__ (32), __may_alias__)); typedef double __m256d __attribute__ ((__vector_size__ (32), __may_alias__)); /* Compare predicates for scalar and packed compare intrinsics. */ /* Equal (ordered, non-signaling) */ #define _CMP_EQ_OQ 0x00 /* Less-than (ordered, signaling) */ #define _CMP_LT_OS 0x01 /* Less-than-or-equal (ordered, signaling) */ #define _CMP_LE_OS 0x02 /* Unordered (non-signaling) */ #define _CMP_UNORD_Q 0x03 /* Not-equal (unordered, non-signaling) */ #define _CMP_NEQ_UQ 0x04 /* Not-less-than (unordered, signaling) */ #define _CMP_NLT_US 0x05 /* Not-less-than-or-equal (unordered, signaling) */ #define _CMP_NLE_US 0x06 /* Ordered (nonsignaling) */ #define _CMP_ORD_Q 0x07 /* Equal (unordered, non-signaling) */ #define _CMP_EQ_UQ 0x08 /* Not-greater-than-or-equal (unordered, signaling) */ #define _CMP_NGE_US 0x09 /* Not-greater-than (unordered, signaling) */ #define _CMP_NGT_US 0x0a /* False (ordered, non-signaling) */ #define _CMP_FALSE_OQ 0x0b /* Not-equal (ordered, non-signaling) */ #define _CMP_NEQ_OQ 0x0c /* Greater-than-or-equal (ordered, signaling) */ #define _CMP_GE_OS 0x0d /* Greater-than (ordered, signaling) */ #define _CMP_GT_OS 0x0e /* True (unordered, non-signaling) */ #define _CMP_TRUE_UQ 0x0f /* Equal (ordered, signaling) */ #define _CMP_EQ_OS 0x10 /* Less-than (ordered, non-signaling) */ #define _CMP_LT_OQ 0x11 /* Less-than-or-equal (ordered, non-signaling) */ #define _CMP_LE_OQ 0x12 /* Unordered (signaling) */ #define _CMP_UNORD_S 0x13 /* Not-equal (unordered, signaling) */ #define _CMP_NEQ_US 0x14 /* Not-less-than (unordered, non-signaling) */ #define _CMP_NLT_UQ 0x15 /* Not-less-than-or-equal (unordered, non-signaling) */ #define _CMP_NLE_UQ 0x16 /* Ordered (signaling) */ #define _CMP_ORD_S 0x17 /* Equal (unordered, signaling) */ #define _CMP_EQ_US 0x18 /* Not-greater-than-or-equal (unordered, non-signaling) */ #define _CMP_NGE_UQ 0x19 /* Not-greater-than (unordered, non-signaling) */ #define _CMP_NGT_UQ 0x1a /* False (ordered, signaling) */ #define _CMP_FALSE_OS 0x1b /* Not-equal (ordered, signaling) */ #define _CMP_NEQ_OS 0x1c /* Greater-than-or-equal (ordered, non-signaling) */ #define _CMP_GE_OQ 0x1d /* Greater-than (ordered, non-signaling) */ #define _CMP_GT_OQ 0x1e /* True (unordered, signaling) */ #define _CMP_TRUE_US 0x1f extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_addsub_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_addsub_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_and_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_and_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_andnot_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_andnot_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B); } /* Double/single precision floating point blend instructions - select data from 2 sources using constant/variable mask. */ #ifdef __OPTIMIZE__ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_blend_pd (__m256d __X, __m256d __Y, const int __M) { return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X, (__v4df)__Y, __M); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_blend_ps (__m256 __X, __m256 __Y, const int __M) { return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X, (__v8sf)__Y, __M); } #else #define _mm256_blend_pd(X, Y, M) \ ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), (int)(M))) #define _mm256_blend_ps(X, Y, M) \ ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), (int)(M))) #endif extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M) { return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X, (__v4df)__Y, (__v4df)__M); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M) { return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X, (__v8sf)__Y, (__v8sf)__M); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B); } /* Dot product instructions with mask-defined summing and zeroing parts of result. */ #ifdef __OPTIMIZE__ extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_dp_ps (__m256 __X, __m256 __Y, const int __M) { return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X, (__v8sf)__Y, __M); } #else #define _mm256_dp_ps(X, Y, M) \ ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), (int)(M))) #endif extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hadd_pd (__m256d __X, __m256d __Y) { return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hadd_ps (__m256 __X, __m256 __Y) { return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hsub_pd (__m256d __X, __m256d __Y) { return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_hsub_ps (__m256 __X, __m256 __Y) { return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_or_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_or_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B); } #ifdef __OPTIMIZE__ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask) { return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B, __mask); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask) { return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B, __mask); } #else #define _mm256_shuffle_pd(A, B, N) \ ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(N))) #define _mm256_shuffle_ps(A, B, N) \ ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(N))) #endif extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_xor_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_xor_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B); } #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) { return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) { return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P) { return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y, __P); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P) { return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y, __P); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) { return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) { return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); } #else #define _mm_cmp_pd(X, Y, P) \ ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P))) #define _mm_cmp_ps(X, Y, P) \ ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P))) #define _mm256_cmp_pd(X, Y, P) \ ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), (int)(P))) #define _mm256_cmp_ps(X, Y, P) \ ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), (int)(P))) #define _mm_cmp_sd(X, Y, P) \ ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P))) #define _mm_cmp_ss(X, Y, P) \ ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P))) #endif extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi32_pd (__m128i __A) { return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi32_ps (__m256i __A) { return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtpd_ps (__m256d __A) { return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtps_epi32 (__m256 __A) { return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtps_pd (__m128 __A) { return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvttpd_epi32 (__m256d __A) { return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtpd_epi32 (__m256d __A) { return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvttps_epi32 (__m256 __A) { return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A); } #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extractf128_pd (__m256d __X, const int __N) { return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extractf128_ps (__m256 __X, const int __N) { return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extractf128_si256 (__m256i __X, const int __N) { return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extract_epi32 (__m256i __X, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2); return _mm_extract_epi32 (__Y, __N % 4); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extract_epi16 (__m256i __X, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3); return _mm_extract_epi16 (__Y, __N % 8); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extract_epi8 (__m256i __X, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4); return _mm_extract_epi8 (__Y, __N % 16); } #ifdef __x86_64__ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_extract_epi64 (__m256i __X, const int __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1); return _mm_extract_epi64 (__Y, __N % 2); } #endif #else #define _mm256_extractf128_pd(X, N) \ ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \ (int)(N))) #define _mm256_extractf128_ps(X, N) \ ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X), \ (int)(N))) #define _mm256_extractf128_si256(X, N) \ ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X), \ (int)(N))) #define _mm256_extract_epi32(X, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \ _mm_extract_epi32 (__Y, (N) % 4); \ })) #define _mm256_extract_epi16(X, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \ _mm_extract_epi16 (__Y, (N) % 8); \ })) #define _mm256_extract_epi8(X, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \ _mm_extract_epi8 (__Y, (N) % 16); \ })) #ifdef __x86_64__ #define _mm256_extract_epi64(X, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \ _mm_extract_epi64 (__Y, (N) % 2); \ })) #endif #endif extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_zeroall (void) { __builtin_ia32_vzeroall (); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_zeroupper (void) { __builtin_ia32_vzeroupper (); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_permutevar_pd (__m128d __A, __m128i __C) { return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A, (__v2di)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permutevar_pd (__m256d __A, __m256i __C) { return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A, (__v4di)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_permutevar_ps (__m128 __A, __m128i __C) { return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A, (__v4si)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permutevar_ps (__m256 __A, __m256i __C) { return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A, (__v8si)__C); } #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_permute_pd (__m128d __X, const int __C) { return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute_pd (__m256d __X, const int __C) { return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_permute_ps (__m128 __X, const int __C) { return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute_ps (__m256 __X, const int __C) { return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C); } #else #define _mm_permute_pd(X, C) \ ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C))) #define _mm256_permute_pd(X, C) \ ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X), (int)(C))) #define _mm_permute_ps(X, C) \ ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C))) #define _mm256_permute_ps(X, C) \ ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C))) #endif #ifdef __OPTIMIZE__ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C) { return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X, (__v4df)__Y, __C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C) { return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X, (__v8sf)__Y, __C); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C) { return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X, (__v8si)__Y, __C); } #else #define _mm256_permute2f128_pd(X, Y, C) \ ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ (int)(C))) #define _mm256_permute2f128_ps(X, Y, C) \ ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ (int)(C))) #define _mm256_permute2f128_si256(X, Y, C) \ ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X), \ (__v8si)(__m256i)(Y), \ (int)(C))) #endif extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_broadcast_ss (float const *__X) { return (__m128) __builtin_ia32_vbroadcastss (__X); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcast_sd (double const *__X) { return (__m256d) __builtin_ia32_vbroadcastsd256 (__X); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcast_ss (float const *__X) { return (__m256) __builtin_ia32_vbroadcastss256 (__X); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcast_pd (__m128d const *__X) { return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_broadcast_ps (__m128 const *__X) { return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X); } #ifdef __OPTIMIZE__ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O) { return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X, (__v2df)__Y, __O); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O) { return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X, (__v4sf)__Y, __O); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O) { return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X, (__v4si)__Y, __O); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_insert_epi32 (__m256i __X, int __D, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2); __Y = _mm_insert_epi32 (__Y, __D, __N % 4); return _mm256_insertf128_si256 (__X, __Y, __N >> 2); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_insert_epi16 (__m256i __X, int __D, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3); __Y = _mm_insert_epi16 (__Y, __D, __N % 8); return _mm256_insertf128_si256 (__X, __Y, __N >> 3); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_insert_epi8 (__m256i __X, int __D, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4); __Y = _mm_insert_epi8 (__Y, __D, __N % 16); return _mm256_insertf128_si256 (__X, __Y, __N >> 4); } #ifdef __x86_64__ extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_insert_epi64 (__m256i __X, long long __D, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1); __Y = _mm_insert_epi64 (__Y, __D, __N % 2); return _mm256_insertf128_si256 (__X, __Y, __N >> 1); } #endif #else #define _mm256_insertf128_pd(X, Y, O) \ ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X), \ (__v2df)(__m128d)(Y), \ (int)(O))) #define _mm256_insertf128_ps(X, Y, O) \ ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \ (__v4sf)(__m128)(Y), \ (int)(O))) #define _mm256_insertf128_si256(X, Y, O) \ ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X), \ (__v4si)(__m128i)(Y), \ (int)(O))) #define _mm256_insert_epi32(X, D, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \ __Y = _mm_insert_epi32 (__Y, (D), (N) % 4); \ _mm256_insertf128_si256 ((X), __Y, (N) >> 2); \ })) #define _mm256_insert_epi16(X, D, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \ __Y = _mm_insert_epi16 (__Y, (D), (N) % 8); \ _mm256_insertf128_si256 ((X), __Y, (N) >> 3); \ })) #define _mm256_insert_epi8(X, D, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \ __Y = _mm_insert_epi8 (__Y, (D), (N) % 16); \ _mm256_insertf128_si256 ((X), __Y, (N) >> 4); \ })) #ifdef __x86_64__ #define _mm256_insert_epi64(X, D, N) \ (__extension__ \ ({ \ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \ __Y = _mm_insert_epi64 (__Y, (D), (N) % 2); \ _mm256_insertf128_si256 ((X), __Y, (N) >> 1); \ })) #endif #endif extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_load_pd (double const *__P) { return *(__m256d *)__P; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_store_pd (double *__P, __m256d __A) { *(__m256d *)__P = __A; } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_load_ps (float const *__P) { return *(__m256 *)__P; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_store_ps (float *__P, __m256 __A) { *(__m256 *)__P = __A; } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_loadu_pd (double const *__P) { return (__m256d) __builtin_ia32_loadupd256 (__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_storeu_pd (double *__P, __m256d __A) { __builtin_ia32_storeupd256 (__P, (__v4df)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_loadu_ps (float const *__P) { return (__m256) __builtin_ia32_loadups256 (__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_storeu_ps (float *__P, __m256 __A) { __builtin_ia32_storeups256 (__P, (__v8sf)__A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_load_si256 (__m256i const *__P) { return *__P; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_store_si256 (__m256i *__P, __m256i __A) { *__P = __A; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_loadu_si256 (__m256i const *__P) { return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_storeu_si256 (__m256i *__P, __m256i __A) { __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskload_pd (double const *__P, __m128i __M) { return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P, (__v2di)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskstore_pd (double *__P, __m128i __M, __m128d __A) { __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskload_pd (double const *__P, __m256i __M) { return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P, (__v4di)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A) { __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskload_ps (float const *__P, __m128i __M) { return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P, (__v4si)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskstore_ps (float *__P, __m128i __M, __m128 __A) { __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskload_ps (float const *__P, __m256i __M) { return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P, (__v8si)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A) { __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_movehdup_ps (__m256 __X) { return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_moveldup_ps (__m256 __X) { return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_movedup_pd (__m256d __X) { return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_lddqu_si256 (__m256i const *__P) { return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_stream_si256 (__m256i *__A, __m256i __B) { __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_stream_pd (double *__A, __m256d __B) { __builtin_ia32_movntpd256 (__A, (__v4df)__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_stream_ps (float *__P, __m256 __A) { __builtin_ia32_movntps256 (__P, (__v8sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rcp_ps (__m256 __A) { return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rsqrt_ps (__m256 __A) { return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_pd (__m256d __A) { return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_ps (__m256 __A) { return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A); } #ifdef __OPTIMIZE__ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_round_pd (__m256d __V, const int __M) { return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_round_ps (__m256 __V, const int __M) { return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M); } #else #define _mm256_round_pd(V, M) \ ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M))) #define _mm256_round_ps(V, M) \ ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M))) #endif #define _mm256_ceil_pd(V) _mm256_round_pd ((V), _MM_FROUND_CEIL) #define _mm256_floor_pd(V) _mm256_round_pd ((V), _MM_FROUND_FLOOR) #define _mm256_ceil_ps(V) _mm256_round_ps ((V), _MM_FROUND_CEIL) #define _mm256_floor_ps(V) _mm256_round_ps ((V), _MM_FROUND_FLOOR) extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpackhi_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpacklo_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpackhi_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_unpacklo_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testz_pd (__m128d __M, __m128d __V) { return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testc_pd (__m128d __M, __m128d __V) { return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testnzc_pd (__m128d __M, __m128d __V) { return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testz_ps (__m128 __M, __m128 __V) { return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testc_ps (__m128 __M, __m128 __V) { return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testnzc_ps (__m128 __M, __m128 __V) { return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testz_pd (__m256d __M, __m256d __V) { return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testc_pd (__m256d __M, __m256d __V) { return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testnzc_pd (__m256d __M, __m256d __V) { return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testz_ps (__m256 __M, __m256 __V) { return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testc_ps (__m256 __M, __m256 __V) { return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testnzc_ps (__m256 __M, __m256 __V) { return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testz_si256 (__m256i __M, __m256i __V) { return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testc_si256 (__m256i __M, __m256i __V) { return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_testnzc_si256 (__m256i __M, __m256i __V) { return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_movemask_pd (__m256d __A) { return __builtin_ia32_movmskpd256 ((__v4df)__A); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_movemask_ps (__m256 __A) { return __builtin_ia32_movmskps256 ((__v8sf)__A); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setzero_pd (void) { return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 }; } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setzero_ps (void) { return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setzero_si256 (void) { return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 }; } /* Create the vector [A B C D]. */ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set_pd (double __A, double __B, double __C, double __D) { return __extension__ (__m256d){ __D, __C, __B, __A }; } /* Create the vector [A B C D E F G H]. */ extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set_ps (float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H) { return __extension__ (__m256){ __H, __G, __F, __E, __D, __C, __B, __A }; } /* Create the vector [A B C D E F G H]. */ extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set_epi32 (int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H) { return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E, __D, __C, __B, __A }; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12, short __q11, short __q10, short __q09, short __q08, short __q07, short __q06, short __q05, short __q04, short __q03, short __q02, short __q01, short __q00) { return __extension__ (__m256i)(__v16hi){ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 }; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set_epi8 (char __q31, char __q30, char __q29, char __q28, char __q27, char __q26, char __q25, char __q24, char __q23, char __q22, char __q21, char __q20, char __q19, char __q18, char __q17, char __q16, char __q15, char __q14, char __q13, char __q12, char __q11, char __q10, char __q09, char __q08, char __q07, char __q06, char __q05, char __q04, char __q03, char __q02, char __q01, char __q00) { return __extension__ (__m256i)(__v32qi){ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15, __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23, __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31 }; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set_epi64x (long long __A, long long __B, long long __C, long long __D) { return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A }; } /* Create a vector with all elements equal to A. */ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set1_pd (double __A) { return __extension__ (__m256d){ __A, __A, __A, __A }; } /* Create a vector with all elements equal to A. */ extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set1_ps (float __A) { return __extension__ (__m256){ __A, __A, __A, __A, __A, __A, __A, __A }; } /* Create a vector with all elements equal to A. */ extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set1_epi32 (int __A) { return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A, __A, __A, __A, __A }; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set1_epi16 (short __A) { return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set1_epi8 (char __A) { return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set1_epi64x (long long __A) { return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A }; } /* Create vectors of elements in the reversed order from the _mm256_set_XXX functions. */ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setr_pd (double __A, double __B, double __C, double __D) { return _mm256_set_pd (__D, __C, __B, __A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setr_ps (float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H) { return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setr_epi32 (int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H) { return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12, short __q11, short __q10, short __q09, short __q08, short __q07, short __q06, short __q05, short __q04, short __q03, short __q02, short __q01, short __q00) { return _mm256_set_epi16 (__q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setr_epi8 (char __q31, char __q30, char __q29, char __q28, char __q27, char __q26, char __q25, char __q24, char __q23, char __q22, char __q21, char __q20, char __q19, char __q18, char __q17, char __q16, char __q15, char __q14, char __q13, char __q12, char __q11, char __q10, char __q09, char __q08, char __q07, char __q06, char __q05, char __q04, char __q03, char __q02, char __q01, char __q00) { return _mm256_set_epi8 (__q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15, __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23, __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setr_epi64x (long long __A, long long __B, long long __C, long long __D) { return _mm256_set_epi64x (__D, __C, __B, __A); } /* Casts between various SP, DP, INT vector types. Note that these do no conversion of values, they just change the type. */ extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castpd_ps (__m256d __A) { return (__m256) __A; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castpd_si256 (__m256d __A) { return (__m256i) __A; } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castps_pd (__m256 __A) { return (__m256d) __A; } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castps_si256(__m256 __A) { return (__m256i) __A; } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castsi256_ps (__m256i __A) { return (__m256) __A; } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castsi256_pd (__m256i __A) { return (__m256d) __A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castpd256_pd128 (__m256d __A) { return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castps256_ps128 (__m256 __A) { return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castsi256_si128 (__m256i __A) { return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A); } /* When cast is done from a 128 to 256-bit type, the low 128 bits of the 256-bit result contain source parameter value and the upper 128 bits of the result are undefined. Those intrinsics shouldn't generate any extra moves. */ extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castpd128_pd256 (__m128d __A) { return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castps128_ps256 (__m128 __A) { return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A); } extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_castsi128_si256 (__m128i __A) { return (__m256i) __builtin_ia32_si256_si ((__v4si)__A); } mmintrin.h 0000644 00000073571 14711270672 0006576 0 ustar 00 /* Copyright (C) 2002-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED #ifndef __MMX__ # error "MMX instruction set not enabled" #else /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); /* Internal data types for implementing the intrinsics. */ typedef int __v2si __attribute__ ((__vector_size__ (8))); typedef short __v4hi __attribute__ ((__vector_size__ (8))); typedef char __v8qi __attribute__ ((__vector_size__ (8))); typedef long long __v1di __attribute__ ((__vector_size__ (8))); typedef float __v2sf __attribute__ ((__vector_size__ (8))); /* Empty the multimedia state. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_empty (void) { __builtin_ia32_emms (); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_empty (void) { _mm_empty (); } /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_si64 (int __i) { return (__m64) __builtin_ia32_vec_init_v2si (__i, 0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_from_int (int __i) { return _mm_cvtsi32_si64 (__i); } #ifdef __x86_64__ /* Convert I to a __m64 object. */ /* Intel intrinsic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_from_int64 (long long __i) { return (__m64) __i; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_m64 (long long __i) { return (__m64) __i; } /* Microsoft intrinsic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_si64 (long long __i) { return (__m64) __i; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi64x (long long __i) { return (__m64) __i; } #endif /* Convert the lower 32 bits of the __m64 object into an integer. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si32 (__m64 __i) { return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_to_int (__m64 __i) { return _mm_cvtsi64_si32 (__i); } #ifdef __x86_64__ /* Convert the __m64 object to a 64bit integer. */ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_to_int64 (__m64 __i) { return (long long)__i; } extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtm64_si64 (__m64 __i) { return (long long)__i; } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si64x (__m64 __i) { return (long long)__i; } #endif /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with signed saturation. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_packsswb (__m64 __m1, __m64 __m2) { return _mm_packs_pi16 (__m1, __m2); } /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of the result, and the two 32-bit values from M2 into the upper two 16-bit values of the result, all with signed saturation. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_packssdw (__m64 __m1, __m64 __m2) { return _mm_packs_pi32 (__m1, __m2); } /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with unsigned saturation. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pu16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_packuswb (__m64 __m1, __m64 __m2) { return _mm_packs_pu16 (__m1, __m2); } /* Interleave the four 8-bit values from the high half of M1 with the four 8-bit values from the high half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckhbw (__m64 __m1, __m64 __m2) { return _mm_unpackhi_pi8 (__m1, __m2); } /* Interleave the two 16-bit values from the high half of M1 with the two 16-bit values from the high half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckhwd (__m64 __m1, __m64 __m2) { return _mm_unpackhi_pi16 (__m1, __m2); } /* Interleave the 32-bit value from the high half of M1 with the 32-bit value from the high half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckhdq (__m64 __m1, __m64 __m2) { return _mm_unpackhi_pi32 (__m1, __m2); } /* Interleave the four 8-bit values from the low half of M1 with the four 8-bit values from the low half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpcklbw (__m64 __m1, __m64 __m2) { return _mm_unpacklo_pi8 (__m1, __m2); } /* Interleave the two 16-bit values from the low half of M1 with the two 16-bit values from the low half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpcklwd (__m64 __m1, __m64 __m2) { return _mm_unpacklo_pi16 (__m1, __m2); } /* Interleave the 32-bit value from the low half of M1 with the 32-bit value from the low half of M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_punpckldq (__m64 __m1, __m64 __m2) { return _mm_unpacklo_pi32 (__m1, __m2); } /* Add the 8-bit values in M1 to the 8-bit values in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddb (__m64 __m1, __m64 __m2) { return _mm_add_pi8 (__m1, __m2); } /* Add the 16-bit values in M1 to the 16-bit values in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddw (__m64 __m1, __m64 __m2) { return _mm_add_pi16 (__m1, __m2); } /* Add the 32-bit values in M1 to the 32-bit values in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddd (__m64 __m1, __m64 __m2) { return _mm_add_pi32 (__m1, __m2); } /* Add the 64-bit values in M1 to the 64-bit values in M2. */ #ifdef __SSE2__ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_si64 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2); } #endif /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddsb (__m64 __m1, __m64 __m2) { return _mm_adds_pi8 (__m1, __m2); } /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddsw (__m64 __m1, __m64 __m2) { return _mm_adds_pi16 (__m1, __m2); } /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pu8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddusb (__m64 __m1, __m64 __m2) { return _mm_adds_pu8 (__m1, __m2); } /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned saturated arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pu16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_paddusw (__m64 __m1, __m64 __m2) { return _mm_adds_pu16 (__m1, __m2); } /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubb (__m64 __m1, __m64 __m2) { return _mm_sub_pi8 (__m1, __m2); } /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubw (__m64 __m1, __m64 __m2) { return _mm_sub_pi16 (__m1, __m2); } /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubd (__m64 __m1, __m64 __m2) { return _mm_sub_pi32 (__m1, __m2); } /* Add the 64-bit values in M1 to the 64-bit values in M2. */ #ifdef __SSE2__ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_si64 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2); } #endif /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubsb (__m64 __m1, __m64 __m2) { return _mm_subs_pi8 (__m1, __m2); } /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using signed saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubsw (__m64 __m1, __m64 __m2) { return _mm_subs_pi16 (__m1, __m2); } /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using unsigned saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pu8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubusb (__m64 __m1, __m64 __m2) { return _mm_subs_pu8 (__m1, __m2); } /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using unsigned saturating arithmetic. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pu16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psubusw (__m64 __m1, __m64 __m2) { return _mm_subs_pu16 (__m1, __m2); } /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing four 32-bit intermediate results, which are then summed by pairs to produce two 32-bit results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_madd_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmaddwd (__m64 __m1, __m64 __m2) { return _mm_madd_pi16 (__m1, __m2); } /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in M2 and produce the high 16 bits of the 32-bit results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmulhw (__m64 __m1, __m64 __m2) { return _mm_mulhi_pi16 (__m1, __m2); } /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce the low 16 bits of the results. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmullw (__m64 __m1, __m64 __m2) { return _mm_mullo_pi16 (__m1, __m2); } /* Shift four 16-bit values in M left by COUNT. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_pi16 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllw (__m64 __m, __m64 __count) { return _mm_sll_pi16 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_pi16 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllwi (__m64 __m, int __count) { return _mm_slli_pi16 (__m, __count); } /* Shift two 32-bit values in M left by COUNT. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_pi32 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pslld (__m64 __m, __m64 __count) { return _mm_sll_pi32 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_pi32 (__m64 __m, int __count) { return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pslldi (__m64 __m, int __count) { return _mm_slli_pi32 (__m, __count); } /* Shift the 64-bit value in M left by COUNT. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_si64 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllq (__m64 __m, __m64 __count) { return _mm_sll_si64 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_si64 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psllqi (__m64 __m, int __count) { return _mm_slli_si64 (__m, __count); } /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_pi16 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psraw (__m64 __m, __m64 __count) { return _mm_sra_pi16 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_pi16 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrawi (__m64 __m, int __count) { return _mm_srai_pi16 (__m, __count); } /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_pi32 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrad (__m64 __m, __m64 __count) { return _mm_sra_pi32 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_pi32 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psradi (__m64 __m, int __count) { return _mm_srai_pi32 (__m, __count); } /* Shift four 16-bit values in M right by COUNT; shift in zeros. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_pi16 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlw (__m64 __m, __m64 __count) { return _mm_srl_pi16 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_pi16 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlwi (__m64 __m, int __count) { return _mm_srli_pi16 (__m, __count); } /* Shift two 32-bit values in M right by COUNT; shift in zeros. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_pi32 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrld (__m64 __m, __m64 __count) { return _mm_srl_pi32 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_pi32 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrldi (__m64 __m, int __count) { return _mm_srli_pi32 (__m, __count); } /* Shift the 64-bit value in M left by COUNT; shift in zeros. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_si64 (__m64 __m, __m64 __count) { return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlq (__m64 __m, __m64 __count) { return _mm_srl_si64 (__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_si64 (__m64 __m, int __count) { return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_psrlqi (__m64 __m, int __count) { return _mm_srli_si64 (__m, __count); } /* Bit-wise AND the 64-bit values in M1 and M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_si64 (__m64 __m1, __m64 __m2) { return __builtin_ia32_pand (__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pand (__m64 __m1, __m64 __m2) { return _mm_and_si64 (__m1, __m2); } /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 64-bit value in M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_si64 (__m64 __m1, __m64 __m2) { return __builtin_ia32_pandn (__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pandn (__m64 __m1, __m64 __m2) { return _mm_andnot_si64 (__m1, __m2); } /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_si64 (__m64 __m1, __m64 __m2) { return __builtin_ia32_por (__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_por (__m64 __m1, __m64 __m2) { return _mm_or_si64 (__m1, __m2); } /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_si64 (__m64 __m1, __m64 __m2) { return __builtin_ia32_pxor (__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pxor (__m64 __m1, __m64 __m2) { return _mm_xor_si64 (__m1, __m2); } /* Compare eight 8-bit values. The result of the comparison is 0xFF if the test is true and zero if false. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpeqb (__m64 __m1, __m64 __m2) { return _mm_cmpeq_pi8 (__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpgtb (__m64 __m1, __m64 __m2) { return _mm_cmpgt_pi8 (__m1, __m2); } /* Compare four 16-bit values. The result of the comparison is 0xFFFF if the test is true and zero if false. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpeqw (__m64 __m1, __m64 __m2) { return _mm_cmpeq_pi16 (__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpgtw (__m64 __m1, __m64 __m2) { return _mm_cmpgt_pi16 (__m1, __m2); } /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if the test is true and zero if false. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpeqd (__m64 __m1, __m64 __m2) { return _mm_cmpeq_pi32 (__m1, __m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) { return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pcmpgtd (__m64 __m1, __m64 __m2) { return _mm_cmpgt_pi32 (__m1, __m2); } /* Creates a 64-bit zero. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_si64 (void) { return (__m64)0LL; } /* Creates a vector of two 32-bit values; I0 is least significant. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi32 (int __i1, int __i0) { return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1); } /* Creates a vector of four 16-bit values; W0 is least significant. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) { return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3); } /* Creates a vector of eight 8-bit values; B0 is least significant. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7); } /* Similar, but with the arguments in reverse order. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pi32 (int __i0, int __i1) { return _mm_set_pi32 (__i1, __i0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3) { return _mm_set_pi16 (__w3, __w2, __w1, __w0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7) { return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } /* Creates a vector of two 32-bit values, both elements containing I. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pi32 (int __i) { return _mm_set_pi32 (__i, __i); } /* Creates a vector of four 16-bit values, all elements containing W. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pi16 (short __w) { return _mm_set_pi16 (__w, __w, __w, __w); } /* Creates a vector of eight 8-bit values, all elements containing B. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pi8 (char __b) { return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b); } #endif /* __MMX__ */ #endif /* _MMINTRIN_H_INCLUDED */ float.h 0000644 00000021255 14711270673 0006037 0 ustar 00 /* Copyright (C) 2002-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* * ISO C Standard: 5.2.4.2.2 Characteristics of floating types <float.h> */ #ifndef _FLOAT_H___ #define _FLOAT_H___ /* Radix of exponent representation, b. */ #undef FLT_RADIX #define FLT_RADIX __FLT_RADIX__ /* Number of base-FLT_RADIX digits in the significand, p. */ #undef FLT_MANT_DIG #undef DBL_MANT_DIG #undef LDBL_MANT_DIG #define FLT_MANT_DIG __FLT_MANT_DIG__ #define DBL_MANT_DIG __DBL_MANT_DIG__ #define LDBL_MANT_DIG __LDBL_MANT_DIG__ /* Number of decimal digits, q, such that any floating-point number with q decimal digits can be rounded into a floating-point number with p radix b digits and back again without change to the q decimal digits, p * log10(b) if b is a power of 10 floor((p - 1) * log10(b)) otherwise */ #undef FLT_DIG #undef DBL_DIG #undef LDBL_DIG #define FLT_DIG __FLT_DIG__ #define DBL_DIG __DBL_DIG__ #define LDBL_DIG __LDBL_DIG__ /* Minimum int x such that FLT_RADIX**(x-1) is a normalized float, emin */ #undef FLT_MIN_EXP #undef DBL_MIN_EXP #undef LDBL_MIN_EXP #define FLT_MIN_EXP __FLT_MIN_EXP__ #define DBL_MIN_EXP __DBL_MIN_EXP__ #define LDBL_MIN_EXP __LDBL_MIN_EXP__ /* Minimum negative integer such that 10 raised to that power is in the range of normalized floating-point numbers, ceil(log10(b) * (emin - 1)) */ #undef FLT_MIN_10_EXP #undef DBL_MIN_10_EXP #undef LDBL_MIN_10_EXP #define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ #define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ #define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ /* Maximum int x such that FLT_RADIX**(x-1) is a representable float, emax. */ #undef FLT_MAX_EXP #undef DBL_MAX_EXP #undef LDBL_MAX_EXP #define FLT_MAX_EXP __FLT_MAX_EXP__ #define DBL_MAX_EXP __DBL_MAX_EXP__ #define LDBL_MAX_EXP __LDBL_MAX_EXP__ /* Maximum integer such that 10 raised to that power is in the range of representable finite floating-point numbers, floor(log10((1 - b**-p) * b**emax)) */ #undef FLT_MAX_10_EXP #undef DBL_MAX_10_EXP #undef LDBL_MAX_10_EXP #define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ #define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ #define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ /* Maximum representable finite floating-point number, (1 - b**-p) * b**emax */ #undef FLT_MAX #undef DBL_MAX #undef LDBL_MAX #define FLT_MAX __FLT_MAX__ #define DBL_MAX __DBL_MAX__ #define LDBL_MAX __LDBL_MAX__ /* The difference between 1 and the least value greater than 1 that is representable in the given floating point type, b**1-p. */ #undef FLT_EPSILON #undef DBL_EPSILON #undef LDBL_EPSILON #define FLT_EPSILON __FLT_EPSILON__ #define DBL_EPSILON __DBL_EPSILON__ #define LDBL_EPSILON __LDBL_EPSILON__ /* Minimum normalized positive floating-point number, b**(emin - 1). */ #undef FLT_MIN #undef DBL_MIN #undef LDBL_MIN #define FLT_MIN __FLT_MIN__ #define DBL_MIN __DBL_MIN__ #define LDBL_MIN __LDBL_MIN__ /* Addition rounds to 0: zero, 1: nearest, 2: +inf, 3: -inf, -1: unknown. */ /* ??? This is supposed to change with calls to fesetround in <fenv.h>. */ #undef FLT_ROUNDS #define FLT_ROUNDS 1 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* The floating-point expression evaluation method. -1 indeterminate 0 evaluate all operations and constants just to the range and precision of the type 1 evaluate operations and constants of type float and double to the range and precision of the double type, evaluate long double operations and constants to the range and precision of the long double type 2 evaluate all operations and constants to the range and precision of the long double type ??? This ought to change with the setting of the fp control word; the value provided by the compiler assumes the widest setting. */ #undef FLT_EVAL_METHOD #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ /* Number of decimal digits, n, such that any floating-point number in the widest supported floating type with pmax radix b digits can be rounded to a floating-point number with n decimal digits and back again without change to the value, pmax * log10(b) if b is a power of 10 ceil(1 + pmax * log10(b)) otherwise */ #undef DECIMAL_DIG #define DECIMAL_DIG __DECIMAL_DIG__ #endif /* C99 */ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* Versions of DECIMAL_DIG for each floating-point type. */ #undef FLT_DECIMAL_DIG #undef DBL_DECIMAL_DIG #undef LDBL_DECIMAL_DIG #define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ #define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__ #define LDBL_DECIMAL_DIG __DECIMAL_DIG__ /* Whether types support subnormal numbers. */ #undef FLT_HAS_SUBNORM #undef DBL_HAS_SUBNORM #undef LDBL_HAS_SUBNORM #define FLT_HAS_SUBNORM __FLT_HAS_DENORM__ #define DBL_HAS_SUBNORM __DBL_HAS_DENORM__ #define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__ /* Minimum positive values, including subnormals. */ #undef FLT_TRUE_MIN #undef DBL_TRUE_MIN #undef LDBL_TRUE_MIN #if __FLT_HAS_DENORM__ #define FLT_TRUE_MIN __FLT_DENORM_MIN__ #else #define FLT_TRUE_MIN __FLT_MIN__ #endif #if __DBL_HAS_DENORM__ #define DBL_TRUE_MIN __DBL_DENORM_MIN__ #else #define DBL_TRUE_MIN __DBL_MIN__ #endif #if __LDBL_HAS_DENORM__ #define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ #else #define LDBL_TRUE_MIN __LDBL_MIN__ #endif #endif /* C11 */ #ifdef __STDC_WANT_DEC_FP__ /* Draft Technical Report 24732, extension for decimal floating-point arithmetic: Characteristic of decimal floating types <float.h>. */ /* Number of base-FLT_RADIX digits in the significand, p. */ #undef DEC32_MANT_DIG #undef DEC64_MANT_DIG #undef DEC128_MANT_DIG #define DEC32_MANT_DIG __DEC32_MANT_DIG__ #define DEC64_MANT_DIG __DEC64_MANT_DIG__ #define DEC128_MANT_DIG __DEC128_MANT_DIG__ /* Minimum exponent. */ #undef DEC32_MIN_EXP #undef DEC64_MIN_EXP #undef DEC128_MIN_EXP #define DEC32_MIN_EXP __DEC32_MIN_EXP__ #define DEC64_MIN_EXP __DEC64_MIN_EXP__ #define DEC128_MIN_EXP __DEC128_MIN_EXP__ /* Maximum exponent. */ #undef DEC32_MAX_EXP #undef DEC64_MAX_EXP #undef DEC128_MAX_EXP #define DEC32_MAX_EXP __DEC32_MAX_EXP__ #define DEC64_MAX_EXP __DEC64_MAX_EXP__ #define DEC128_MAX_EXP __DEC128_MAX_EXP__ /* Maximum representable finite decimal floating-point number (there are 6, 15, and 33 9s after the decimal points respectively). */ #undef DEC32_MAX #undef DEC64_MAX #undef DEC128_MAX #define DEC32_MAX __DEC32_MAX__ #define DEC64_MAX __DEC64_MAX__ #define DEC128_MAX __DEC128_MAX__ /* The difference between 1 and the least value greater than 1 that is representable in the given floating point type. */ #undef DEC32_EPSILON #undef DEC64_EPSILON #undef DEC128_EPSILON #define DEC32_EPSILON __DEC32_EPSILON__ #define DEC64_EPSILON __DEC64_EPSILON__ #define DEC128_EPSILON __DEC128_EPSILON__ /* Minimum normalized positive floating-point number. */ #undef DEC32_MIN #undef DEC64_MIN #undef DEC128_MIN #define DEC32_MIN __DEC32_MIN__ #define DEC64_MIN __DEC64_MIN__ #define DEC128_MIN __DEC128_MIN__ /* Minimum subnormal positive floating-point number. */ #undef DEC32_SUBNORMAL_MIN #undef DEC64_SUBNORMAL_MIN #undef DEC128_SUBNORMAL_MIN #define DEC32_SUBNORMAL_MIN __DEC32_SUBNORMAL_MIN__ #define DEC64_SUBNORMAL_MIN __DEC64_SUBNORMAL_MIN__ #define DEC128_SUBNORMAL_MIN __DEC128_SUBNORMAL_MIN__ /* The floating-point expression evaluation method. -1 indeterminate 0 evaluate all operations and constants just to the range and precision of the type 1 evaluate operations and constants of type _Decimal32 and _Decimal64 to the range and precision of the _Decimal64 type, evaluate _Decimal128 operations and constants to the range and precision of the _Decimal128 type; 2 evaluate all operations and constants to the range and precision of the _Decimal128 type. */ #undef DEC_EVAL_METHOD #define DEC_EVAL_METHOD __DEC_EVAL_METHOD__ #endif /* __STDC_WANT_DEC_FP__ */ #endif /* _FLOAT_H___ */ pkuintrin.h 0000644 00000002764 14711270673 0006761 0 ustar 00 /* Copyright (C) 2015 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED # error "Never use <pkuintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef _PKUINTRIN_H_INCLUDED #define _PKUINTRIN_H_INCLUDED extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdpkru_u32(void) { return __builtin_ia32_rdpkru (); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _wrpkru(unsigned int key) { return __builtin_ia32_wrpkru (key); } #endif /* _PKUINTRIN_H_INCLUDED */ nmmintrin.h 0000644 00000002546 14711270674 0006750 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 10.0. */ #ifndef _NMMINTRIN_H_INCLUDED #define _NMMINTRIN_H_INCLUDED #ifndef __SSE4_2__ # error "SSE4.2 instruction set not enabled" #else /* We just include SSE4.1 header file. */ #include <smmintrin.h> #endif /* __SSE4_2__ */ #endif /* _NMMINTRIN_H_INCLUDED */ immintrin.h 0000644 00000007546 14711270674 0006750 0 ustar 00 /* Copyright (C) 2008-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _IMMINTRIN_H_INCLUDED #define _IMMINTRIN_H_INCLUDED #ifdef __MMX__ #include <mmintrin.h> #endif #ifdef __SSE__ #include <xmmintrin.h> #endif #ifdef __SSE2__ #include <emmintrin.h> #endif #ifdef __SSE3__ #include <pmmintrin.h> #endif #ifdef __SSSE3__ #include <tmmintrin.h> #endif #if defined (__SSE4_2__) || defined (__SSE4_1__) #include <smmintrin.h> #endif #if defined (__AES__) || defined (__PCLMUL__) #include <wmmintrin.h> #endif #ifdef __AVX__ #include <avxintrin.h> #endif #ifdef __AVX2__ #include <avx2intrin.h> #endif #ifdef __LZCNT__ #include <lzcntintrin.h> #endif #ifdef __BMI__ #include <bmiintrin.h> #endif #ifdef __BMI2__ #include <bmi2intrin.h> #endif #ifdef __FMA__ #include <fmaintrin.h> #endif #ifdef __F16C__ #include <f16cintrin.h> #endif #ifdef __RTM__ #include <rtmintrin.h> #endif #ifdef __RTM__ #include <xtestintrin.h> #endif #ifdef __RDRND__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdrand16_step (unsigned short *__P) { return __builtin_ia32_rdrand16_step (__P); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdrand32_step (unsigned int *__P) { return __builtin_ia32_rdrand32_step (__P); } #endif /* __RDRND__ */ #ifdef __x86_64__ #ifdef __FSGSBASE__ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _readfsbase_u32 (void) { return __builtin_ia32_rdfsbase32 (); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _readfsbase_u64 (void) { return __builtin_ia32_rdfsbase64 (); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _readgsbase_u32 (void) { return __builtin_ia32_rdgsbase32 (); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _readgsbase_u64 (void) { return __builtin_ia32_rdgsbase64 (); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _writefsbase_u32 (unsigned int __B) { __builtin_ia32_wrfsbase32 (__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _writefsbase_u64 (unsigned long long __B) { __builtin_ia32_wrfsbase64 (__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _writegsbase_u32 (unsigned int __B) { __builtin_ia32_wrgsbase32 (__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _writegsbase_u64 (unsigned long long __B) { __builtin_ia32_wrgsbase64 (__B); } #endif /* __FSGSBASE__ */ #ifdef __RDRND__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdrand64_step (unsigned long long *__P) { return __builtin_ia32_rdrand64_step (__P); } #endif /* __RDRND__ */ #endif /* __x86_64__ */ #endif /* _IMMINTRIN_H_INCLUDED */ xtestintrin.h 0000644 00000003022 14711270674 0007316 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _IMMINTRIN_H_INCLUDED # error "Never use <xtestintrin.h> directly; include <immintrin.h> instead." #endif #ifndef __RTM__ # error "RTM instruction set not enabled" #endif /* __RTM__ */ #ifndef _XTESTINTRIN_H_INCLUDED #define _XTESTINTRIN_H_INCLUDED /* Return non-zero if the instruction executes inside an RTM or HLE code region. Return zero otherwise. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xtest (void) { return __builtin_ia32_xtest (); } #endif /* _XTESTINTRIN_H_INCLUDED */ xsaveintrin.h 0000644 00000003707 14711270675 0007310 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */ /* # error "Never use <xsaveintrin.h> directly; include <x86intrin.h> instead." */ /* #endif */ #ifndef _XSAVEINTRIN_H_INCLUDED #define _XSAVEINTRIN_H_INCLUDED extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xsave (void *__P, long long __M) { return __builtin_ia32_xsave (__P, __M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xrstor (void *__P, long long __M) { return __builtin_ia32_xrstor (__P, __M); } #ifdef __x86_64__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xsave64 (void *__P, long long __M) { return __builtin_ia32_xsave64 (__P, __M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xrstor64 (void *__P, long long __M) { return __builtin_ia32_xrstor64 (__P, __M); } #endif #endif /* _XSAVEINTRIN_H_INCLUDED */ unwind.h 0000644 00000024750 14711270675 0006243 0 ustar 00 /* Exception handling and frame unwind runtime interface routines. Copyright (C) 2001-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* This is derived from the C++ ABI for IA-64. Where we diverge for cross-architecture compatibility are noted with "@@@". */ #ifndef _UNWIND_H #define _UNWIND_H #if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__) /* Only for _GCC_specific_handler. */ #include <windows.h> #endif #ifndef HIDE_EXPORTS #pragma GCC visibility push(default) #endif #ifdef __cplusplus extern "C" { #endif /* Level 1: Base ABI */ /* @@@ The IA-64 ABI uses uint64 throughout. Most places this is inefficient for 32-bit and smaller machines. */ typedef unsigned _Unwind_Word __attribute__((__mode__(__unwind_word__))); typedef signed _Unwind_Sword __attribute__((__mode__(__unwind_word__))); #if defined(__ia64__) && defined(__hpux__) typedef unsigned _Unwind_Ptr __attribute__((__mode__(__word__))); #else typedef unsigned _Unwind_Ptr __attribute__((__mode__(__pointer__))); #endif typedef unsigned _Unwind_Internal_Ptr __attribute__((__mode__(__pointer__))); /* @@@ The IA-64 ABI uses a 64-bit word to identify the producer and consumer of an exception. We'll go along with this for now even on 32-bit machines. We'll need to provide some other option for 16-bit machines and for machines with > 8 bits per byte. */ typedef unsigned _Unwind_Exception_Class __attribute__((__mode__(__DI__))); /* The unwind interface uses reason codes in several contexts to identify the reasons for failures or other actions. */ typedef enum { _URC_NO_REASON = 0, _URC_FOREIGN_EXCEPTION_CAUGHT = 1, _URC_FATAL_PHASE2_ERROR = 2, _URC_FATAL_PHASE1_ERROR = 3, _URC_NORMAL_STOP = 4, _URC_END_OF_STACK = 5, _URC_HANDLER_FOUND = 6, _URC_INSTALL_CONTEXT = 7, _URC_CONTINUE_UNWIND = 8 } _Unwind_Reason_Code; /* The unwind interface uses a pointer to an exception header object as its representation of an exception being thrown. In general, the full representation of an exception object is language- and implementation-specific, but it will be prefixed by a header understood by the unwind interface. */ struct _Unwind_Exception; typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code, struct _Unwind_Exception *); struct _Unwind_Exception { _Unwind_Exception_Class exception_class; _Unwind_Exception_Cleanup_Fn exception_cleanup; #if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__) _Unwind_Word private_[6]; #else _Unwind_Word private_1; _Unwind_Word private_2; #endif /* @@@ The IA-64 ABI says that this structure must be double-word aligned. Taking that literally does not make much sense generically. Instead we provide the maximum alignment required by any type for the machine. */ } __attribute__((__aligned__)); /* The ACTIONS argument to the personality routine is a bitwise OR of one or more of the following constants. */ typedef int _Unwind_Action; #define _UA_SEARCH_PHASE 1 #define _UA_CLEANUP_PHASE 2 #define _UA_HANDLER_FRAME 4 #define _UA_FORCE_UNWIND 8 #define _UA_END_OF_STACK 16 /* The target can override this macro to define any back-end-specific attributes required for the lowest-level stack frame. */ #ifndef LIBGCC2_UNWIND_ATTRIBUTE #define LIBGCC2_UNWIND_ATTRIBUTE #endif /* This is an opaque type used to refer to a system-specific data structure used by the system unwinder. This context is created and destroyed by the system, and passed to the personality routine during unwinding. */ struct _Unwind_Context; /* Raise an exception, passing along the given exception object. */ extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE _Unwind_RaiseException (struct _Unwind_Exception *); /* Raise an exception for forced unwinding. */ typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) (int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *, struct _Unwind_Context *, void *); extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE _Unwind_ForcedUnwind (struct _Unwind_Exception *, _Unwind_Stop_Fn, void *); /* Helper to invoke the exception_cleanup routine. */ extern void _Unwind_DeleteException (struct _Unwind_Exception *); /* Resume propagation of an existing exception. This is used after e.g. executing cleanup code, and not to implement rethrowing. */ extern void LIBGCC2_UNWIND_ATTRIBUTE _Unwind_Resume (struct _Unwind_Exception *); /* @@@ Resume propagation of a FORCE_UNWIND exception, or to rethrow a normal exception that was handled. */ extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE _Unwind_Resume_or_Rethrow (struct _Unwind_Exception *); /* @@@ Use unwind data to perform a stack backtrace. The trace callback is called for every stack frame in the call chain, but no cleanup actions are performed. */ typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn) (struct _Unwind_Context *, void *); extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE _Unwind_Backtrace (_Unwind_Trace_Fn, void *); /* These functions are used for communicating information about the unwind context (i.e. the unwind descriptors and the user register state) between the unwind library and the personality routine and landing pad. Only selected registers may be manipulated. */ extern _Unwind_Word _Unwind_GetGR (struct _Unwind_Context *, int); extern void _Unwind_SetGR (struct _Unwind_Context *, int, _Unwind_Word); extern _Unwind_Ptr _Unwind_GetIP (struct _Unwind_Context *); extern _Unwind_Ptr _Unwind_GetIPInfo (struct _Unwind_Context *, int *); extern void _Unwind_SetIP (struct _Unwind_Context *, _Unwind_Ptr); /* @@@ Retrieve the CFA of the given context. */ extern _Unwind_Word _Unwind_GetCFA (struct _Unwind_Context *); extern void *_Unwind_GetLanguageSpecificData (struct _Unwind_Context *); extern _Unwind_Ptr _Unwind_GetRegionStart (struct _Unwind_Context *); /* The personality routine is the function in the C++ (or other language) runtime library which serves as an interface between the system unwind library and language-specific exception handling semantics. It is specific to the code fragment described by an unwind info block, and it is always referenced via the pointer in the unwind info block, and hence it has no ABI-specified name. Note that this implies that two different C++ implementations can use different names, and have different contents in the language specific data area. Moreover, that the language specific data area contains no version info because name of the function invoked provides more effective versioning by detecting at link time the lack of code to handle the different data format. */ typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn) (int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *, struct _Unwind_Context *); /* @@@ The following alternate entry points are for setjmp/longjmp based unwinding. */ struct SjLj_Function_Context; extern void _Unwind_SjLj_Register (struct SjLj_Function_Context *); extern void _Unwind_SjLj_Unregister (struct SjLj_Function_Context *); extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE _Unwind_SjLj_RaiseException (struct _Unwind_Exception *); extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE _Unwind_SjLj_ForcedUnwind (struct _Unwind_Exception *, _Unwind_Stop_Fn, void *); extern void LIBGCC2_UNWIND_ATTRIBUTE _Unwind_SjLj_Resume (struct _Unwind_Exception *); extern _Unwind_Reason_Code LIBGCC2_UNWIND_ATTRIBUTE _Unwind_SjLj_Resume_or_Rethrow (struct _Unwind_Exception *); /* @@@ The following provide access to the base addresses for text and data-relative addressing in the LDSA. In order to stay link compatible with the standard ABI for IA-64, we inline these. */ #ifdef __ia64__ #include <stdlib.h> static inline _Unwind_Ptr _Unwind_GetDataRelBase (struct _Unwind_Context *_C) { /* The GP is stored in R1. */ return _Unwind_GetGR (_C, 1); } static inline _Unwind_Ptr _Unwind_GetTextRelBase (struct _Unwind_Context *_C __attribute__ ((__unused__))) { abort (); return 0; } /* @@@ Retrieve the Backing Store Pointer of the given context. */ extern _Unwind_Word _Unwind_GetBSP (struct _Unwind_Context *); #else extern _Unwind_Ptr _Unwind_GetDataRelBase (struct _Unwind_Context *); extern _Unwind_Ptr _Unwind_GetTextRelBase (struct _Unwind_Context *); #endif /* @@@ Given an address, return the entry point of the function that contains it. */ extern void * _Unwind_FindEnclosingFunction (void *pc); #ifndef __SIZEOF_LONG__ #error "__SIZEOF_LONG__ macro not defined" #endif #ifndef __SIZEOF_POINTER__ #error "__SIZEOF_POINTER__ macro not defined" #endif /* leb128 type numbers have a potentially unlimited size. The target of the following definitions of _sleb128_t and _uleb128_t is to have efficient data types large enough to hold the leb128 type numbers used in the unwind code. Mostly these types will simply be defined to long and unsigned long except when a unsigned long data type on the target machine is not capable of storing a pointer. */ #if __SIZEOF_LONG__ >= __SIZEOF_POINTER__ typedef long _sleb128_t; typedef unsigned long _uleb128_t; #elif __SIZEOF_LONG_LONG__ >= __SIZEOF_POINTER__ typedef long long _sleb128_t; typedef unsigned long long _uleb128_t; #else # error "What type shall we use for _sleb128_t?" #endif #if defined (__SEH__) && !defined (__USING_SJLJ_EXCEPTIONS__) /* Handles the mapping from SEH to GCC interfaces. */ EXCEPTION_DISPOSITION _GCC_specific_handler (PEXCEPTION_RECORD, void *, PCONTEXT, PDISPATCHER_CONTEXT, _Unwind_Personality_Fn); #endif #ifdef __cplusplus } #endif #ifndef HIDE_EXPORTS #pragma GCC visibility pop #endif #endif /* unwind.h */ popcntintrin.h 0000644 00000003104 14711270676 0007455 0 ustar 00 /* Copyright (C) 2009-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __POPCNT__ # error "POPCNT instruction set not enabled" #endif /* __POPCNT__ */ #ifndef _POPCNTINTRIN_H_INCLUDED #define _POPCNTINTRIN_H_INCLUDED /* Calculate a number of bits set to 1. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_popcnt_u32 (unsigned int __X) { return __builtin_popcount (__X); } #ifdef __x86_64__ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_popcnt_u64 (unsigned long long __X) { return __builtin_popcountll (__X); } #endif #endif /* _POPCNTINTRIN_H_INCLUDED */ lzcntintrin.h 0000644 00000004235 14711270676 0007312 0 ustar 00 /* Copyright (C) 2009-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED # error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef __LZCNT__ # error "LZCNT instruction is not enabled" #endif /* __LZCNT__ */ #ifndef _LZCNTINTRIN_H_INCLUDED #define _LZCNTINTRIN_H_INCLUDED extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lzcnt16 (unsigned short __X) { return __builtin_clzs (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lzcnt32 (unsigned int __X) { return __builtin_clz (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _lzcnt_u32 (unsigned int __X) { return __builtin_clz (__X); } #ifdef __x86_64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lzcnt64 (unsigned long long __X) { return __builtin_clzll (__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _lzcnt_u64 (unsigned long long __X) { return __builtin_clzll (__X); } #endif #endif /* _LZCNTINTRIN_H_INCLUDED */ stdfix.h 0000644 00000013560 14711270676 0006236 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* ISO/IEC JTC1 SC22 WG14 N1169 * Date: 2006-04-04 * ISO/IEC TR 18037 * Programming languages - C - Extensions to support embedded processors */ #ifndef _STDFIX_H #define _STDFIX_H /* 7.18a.1 Introduction. */ #undef fract #undef accum #undef sat #define fract _Fract #define accum _Accum #define sat _Sat /* 7.18a.3 Precision macros. */ #undef SFRACT_FBIT #undef SFRACT_MIN #undef SFRACT_MAX #undef SFRACT_EPSILON #define SFRACT_FBIT __SFRACT_FBIT__ #define SFRACT_MIN __SFRACT_MIN__ #define SFRACT_MAX __SFRACT_MAX__ #define SFRACT_EPSILON __SFRACT_EPSILON__ #undef USFRACT_FBIT #undef USFRACT_MIN #undef USFRACT_MAX #undef USFRACT_EPSILON #define USFRACT_FBIT __USFRACT_FBIT__ #define USFRACT_MIN __USFRACT_MIN__ /* GCC extension. */ #define USFRACT_MAX __USFRACT_MAX__ #define USFRACT_EPSILON __USFRACT_EPSILON__ #undef FRACT_FBIT #undef FRACT_MIN #undef FRACT_MAX #undef FRACT_EPSILON #define FRACT_FBIT __FRACT_FBIT__ #define FRACT_MIN __FRACT_MIN__ #define FRACT_MAX __FRACT_MAX__ #define FRACT_EPSILON __FRACT_EPSILON__ #undef UFRACT_FBIT #undef UFRACT_MIN #undef UFRACT_MAX #undef UFRACT_EPSILON #define UFRACT_FBIT __UFRACT_FBIT__ #define UFRACT_MIN __UFRACT_MIN__ /* GCC extension. */ #define UFRACT_MAX __UFRACT_MAX__ #define UFRACT_EPSILON __UFRACT_EPSILON__ #undef LFRACT_FBIT #undef LFRACT_MIN #undef LFRACT_MAX #undef LFRACT_EPSILON #define LFRACT_FBIT __LFRACT_FBIT__ #define LFRACT_MIN __LFRACT_MIN__ #define LFRACT_MAX __LFRACT_MAX__ #define LFRACT_EPSILON __LFRACT_EPSILON__ #undef ULFRACT_FBIT #undef ULFRACT_MIN #undef ULFRACT_MAX #undef ULFRACT_EPSILON #define ULFRACT_FBIT __ULFRACT_FBIT__ #define ULFRACT_MIN __ULFRACT_MIN__ /* GCC extension. */ #define ULFRACT_MAX __ULFRACT_MAX__ #define ULFRACT_EPSILON __ULFRACT_EPSILON__ #undef LLFRACT_FBIT #undef LLFRACT_MIN #undef LLFRACT_MAX #undef LLFRACT_EPSILON #define LLFRACT_FBIT __LLFRACT_FBIT__ /* GCC extension. */ #define LLFRACT_MIN __LLFRACT_MIN__ /* GCC extension. */ #define LLFRACT_MAX __LLFRACT_MAX__ /* GCC extension. */ #define LLFRACT_EPSILON __LLFRACT_EPSILON__ /* GCC extension. */ #undef ULLFRACT_FBIT #undef ULLFRACT_MIN #undef ULLFRACT_MAX #undef ULLFRACT_EPSILON #define ULLFRACT_FBIT __ULLFRACT_FBIT__ /* GCC extension. */ #define ULLFRACT_MIN __ULLFRACT_MIN__ /* GCC extension. */ #define ULLFRACT_MAX __ULLFRACT_MAX__ /* GCC extension. */ #define ULLFRACT_EPSILON __ULLFRACT_EPSILON__ /* GCC extension. */ #undef SACCUM_FBIT #undef SACCUM_IBIT #undef SACCUM_MIN #undef SACCUM_MAX #undef SACCUM_EPSILON #define SACCUM_FBIT __SACCUM_FBIT__ #define SACCUM_IBIT __SACCUM_IBIT__ #define SACCUM_MIN __SACCUM_MIN__ #define SACCUM_MAX __SACCUM_MAX__ #define SACCUM_EPSILON __SACCUM_EPSILON__ #undef USACCUM_FBIT #undef USACCUM_IBIT #undef USACCUM_MIN #undef USACCUM_MAX #undef USACCUM_EPSILON #define USACCUM_FBIT __USACCUM_FBIT__ #define USACCUM_IBIT __USACCUM_IBIT__ #define USACCUM_MIN __USACCUM_MIN__ /* GCC extension. */ #define USACCUM_MAX __USACCUM_MAX__ #define USACCUM_EPSILON __USACCUM_EPSILON__ #undef ACCUM_FBIT #undef ACCUM_IBIT #undef ACCUM_MIN #undef ACCUM_MAX #undef ACCUM_EPSILON #define ACCUM_FBIT __ACCUM_FBIT__ #define ACCUM_IBIT __ACCUM_IBIT__ #define ACCUM_MIN __ACCUM_MIN__ #define ACCUM_MAX __ACCUM_MAX__ #define ACCUM_EPSILON __ACCUM_EPSILON__ #undef UACCUM_FBIT #undef UACCUM_IBIT #undef UACCUM_MIN #undef UACCUM_MAX #undef UACCUM_EPSILON #define UACCUM_FBIT __UACCUM_FBIT__ #define UACCUM_IBIT __UACCUM_IBIT__ #define UACCUM_MIN __UACCUM_MIN__ /* GCC extension. */ #define UACCUM_MAX __UACCUM_MAX__ #define UACCUM_EPSILON __UACCUM_EPSILON__ #undef LACCUM_FBIT #undef LACCUM_IBIT #undef LACCUM_MIN #undef LACCUM_MAX #undef LACCUM_EPSILON #define LACCUM_FBIT __LACCUM_FBIT__ #define LACCUM_IBIT __LACCUM_IBIT__ #define LACCUM_MIN __LACCUM_MIN__ #define LACCUM_MAX __LACCUM_MAX__ #define LACCUM_EPSILON __LACCUM_EPSILON__ #undef ULACCUM_FBIT #undef ULACCUM_IBIT #undef ULACCUM_MIN #undef ULACCUM_MAX #undef ULACCUM_EPSILON #define ULACCUM_FBIT __ULACCUM_FBIT__ #define ULACCUM_IBIT __ULACCUM_IBIT__ #define ULACCUM_MIN __ULACCUM_MIN__ /* GCC extension. */ #define ULACCUM_MAX __ULACCUM_MAX__ #define ULACCUM_EPSILON __ULACCUM_EPSILON__ #undef LLACCUM_FBIT #undef LLACCUM_IBIT #undef LLACCUM_MIN #undef LLACCUM_MAX #undef LLACCUM_EPSILON #define LLACCUM_FBIT __LLACCUM_FBIT__ /* GCC extension. */ #define LLACCUM_IBIT __LLACCUM_IBIT__ /* GCC extension. */ #define LLACCUM_MIN __LLACCUM_MIN__ /* GCC extension. */ #define LLACCUM_MAX __LLACCUM_MAX__ /* GCC extension. */ #define LLACCUM_EPSILON __LLACCUM_EPSILON__ /* GCC extension. */ #undef ULLACCUM_FBIT #undef ULLACCUM_IBIT #undef ULLACCUM_MIN #undef ULLACCUM_MAX #undef ULLACCUM_EPSILON #define ULLACCUM_FBIT __ULLACCUM_FBIT__ /* GCC extension. */ #define ULLACCUM_IBIT __ULLACCUM_IBIT__ /* GCC extension. */ #define ULLACCUM_MIN __ULLACCUM_MIN__ /* GCC extension. */ #define ULLACCUM_MAX __ULLACCUM_MAX__ /* GCC extension. */ #define ULLACCUM_EPSILON __ULLACCUM_EPSILON__ /* GCC extension. */ #endif /* _STDFIX_H */ tmmintrin.h 0000644 00000020021 14711270677 0006745 0 ustar 00 /* Copyright (C) 2006-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.1. */ #ifndef _TMMINTRIN_H_INCLUDED #define _TMMINTRIN_H_INCLUDED #ifndef __SSSE3__ # error "SSSE3 instruction set not enabled" #else /* We need definitions from the SSE3, SSE2 and SSE header files*/ #include <pmmintrin.h> extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_epi16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadds_epi16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pi16 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pi32 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadds_pi16 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_epi16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubs_epi16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pi16 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pi32 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubs_pi16 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddubs_epi16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddubs_pi16 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhrs_epi16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhrs_pi16 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_epi8 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi8 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi8 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi8 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi16 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi32 (__m64 __X, __m64 __Y) { return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); } #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) { return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X, (__v2di)__Y, __N * 8); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) { return (__m64) __builtin_ia32_palignr ((__v1di)__X, (__v1di)__Y, __N * 8); } #else #define _mm_alignr_epi8(X, Y, N) \ ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), \ (int)(N) * 8)) #define _mm_alignr_pi8(X, Y, N) \ ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X), \ (__v1di)(__m64)(Y), \ (int)(N) * 8)) #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi8 (__m128i __X) { return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi16 (__m128i __X) { return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_epi32 (__m128i __X) { return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi8 (__m64 __X) { return (__m64) __builtin_ia32_pabsb ((__v8qi)__X); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi16 (__m64 __X) { return (__m64) __builtin_ia32_pabsw ((__v4hi)__X); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_abs_pi32 (__m64 __X) { return (__m64) __builtin_ia32_pabsd ((__v2si)__X); } #endif /* __SSSE3__ */ #endif /* _TMMINTRIN_H_INCLUDED */ stddef.h 0000644 00000032463 14711270677 0006212 0 ustar 00 /* Copyright (C) 1989-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* * ISO C Standard: 7.17 Common definitions <stddef.h> */ #if (!defined(_STDDEF_H) && !defined(_STDDEF_H_) && !defined(_ANSI_STDDEF_H) \ && !defined(__STDDEF_H__)) \ || defined(__need_wchar_t) || defined(__need_size_t) \ || defined(__need_ptrdiff_t) || defined(__need_NULL) \ || defined(__need_wint_t) /* Any one of these symbols __need_* means that GNU libc wants us just to define one data type. So don't define the symbols that indicate this file's entire job has been done. */ #if (!defined(__need_wchar_t) && !defined(__need_size_t) \ && !defined(__need_ptrdiff_t) && !defined(__need_NULL) \ && !defined(__need_wint_t)) #define _STDDEF_H #define _STDDEF_H_ /* snaroff@next.com says the NeXT needs this. */ #define _ANSI_STDDEF_H #endif #ifndef __sys_stdtypes_h /* This avoids lossage on SunOS but only if stdtypes.h comes first. There's no way to win with the other order! Sun lossage. */ /* On 4.3bsd-net2, make sure ansi.h is included, so we have one less case to deal with in the following. */ #if defined (__BSD_NET2__) || defined (____386BSD____) || (defined (__FreeBSD__) && (__FreeBSD__ < 5)) || defined(__NetBSD__) #include <machine/ansi.h> #endif /* On FreeBSD 5, machine/ansi.h does not exist anymore... */ #if defined (__FreeBSD__) && (__FreeBSD__ >= 5) #include <sys/_types.h> #endif /* In 4.3bsd-net2, machine/ansi.h defines these symbols, which are defined if the corresponding type is *not* defined. FreeBSD-2.1 defines _MACHINE_ANSI_H_ instead of _ANSI_H_. NetBSD defines _I386_ANSI_H_ and _X86_64_ANSI_H_ instead of _ANSI_H_ */ #if defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_) #if !defined(_SIZE_T_) && !defined(_BSD_SIZE_T_) #define _SIZE_T #endif #if !defined(_PTRDIFF_T_) && !defined(_BSD_PTRDIFF_T_) #define _PTRDIFF_T #endif /* On BSD/386 1.1, at least, machine/ansi.h defines _BSD_WCHAR_T_ instead of _WCHAR_T_. */ #if !defined(_WCHAR_T_) && !defined(_BSD_WCHAR_T_) #ifndef _BSD_WCHAR_T_ #define _WCHAR_T #endif #endif /* Undef _FOO_T_ if we are supposed to define foo_t. */ #if defined (__need_ptrdiff_t) || defined (_STDDEF_H_) #undef _PTRDIFF_T_ #undef _BSD_PTRDIFF_T_ #endif #if defined (__need_size_t) || defined (_STDDEF_H_) #undef _SIZE_T_ #undef _BSD_SIZE_T_ #endif #if defined (__need_wchar_t) || defined (_STDDEF_H_) #undef _WCHAR_T_ #undef _BSD_WCHAR_T_ #endif #endif /* defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_) */ /* Sequent's header files use _PTRDIFF_T_ in some conflicting way. Just ignore it. */ #if defined (__sequent__) && defined (_PTRDIFF_T_) #undef _PTRDIFF_T_ #endif /* On VxWorks, <type/vxTypesBase.h> may have defined macros like _TYPE_size_t which will typedef size_t. fixincludes patched the vxTypesBase.h so that this macro is only defined if _GCC_SIZE_T is not defined, and so that defining this macro defines _GCC_SIZE_T. If we find that the macros are still defined at this point, we must invoke them so that the type is defined as expected. */ #if defined (_TYPE_ptrdiff_t) && (defined (__need_ptrdiff_t) || defined (_STDDEF_H_)) _TYPE_ptrdiff_t; #undef _TYPE_ptrdiff_t #endif #if defined (_TYPE_size_t) && (defined (__need_size_t) || defined (_STDDEF_H_)) _TYPE_size_t; #undef _TYPE_size_t #endif #if defined (_TYPE_wchar_t) && (defined (__need_wchar_t) || defined (_STDDEF_H_)) _TYPE_wchar_t; #undef _TYPE_wchar_t #endif /* In case nobody has defined these types, but we aren't running under GCC 2.00, make sure that __PTRDIFF_TYPE__, __SIZE_TYPE__, and __WCHAR_TYPE__ have reasonable values. This can happen if the parts of GCC is compiled by an older compiler, that actually include gstddef.h, such as collect2. */ /* Signed type of difference of two pointers. */ /* Define this type if we are doing the whole job, or if we want this type in particular. */ #if defined (_STDDEF_H) || defined (__need_ptrdiff_t) #ifndef _PTRDIFF_T /* in case <sys/types.h> has defined it. */ #ifndef _T_PTRDIFF_ #ifndef _T_PTRDIFF #ifndef __PTRDIFF_T #ifndef _PTRDIFF_T_ #ifndef _BSD_PTRDIFF_T_ #ifndef ___int_ptrdiff_t_h #ifndef _GCC_PTRDIFF_T #define _PTRDIFF_T #define _T_PTRDIFF_ #define _T_PTRDIFF #define __PTRDIFF_T #define _PTRDIFF_T_ #define _BSD_PTRDIFF_T_ #define ___int_ptrdiff_t_h #define _GCC_PTRDIFF_T #ifndef __PTRDIFF_TYPE__ #define __PTRDIFF_TYPE__ long int #endif typedef __PTRDIFF_TYPE__ ptrdiff_t; #endif /* _GCC_PTRDIFF_T */ #endif /* ___int_ptrdiff_t_h */ #endif /* _BSD_PTRDIFF_T_ */ #endif /* _PTRDIFF_T_ */ #endif /* __PTRDIFF_T */ #endif /* _T_PTRDIFF */ #endif /* _T_PTRDIFF_ */ #endif /* _PTRDIFF_T */ /* If this symbol has done its job, get rid of it. */ #undef __need_ptrdiff_t #endif /* _STDDEF_H or __need_ptrdiff_t. */ /* Unsigned type of `sizeof' something. */ /* Define this type if we are doing the whole job, or if we want this type in particular. */ #if defined (_STDDEF_H) || defined (__need_size_t) #ifndef __size_t__ /* BeOS */ #ifndef __SIZE_T__ /* Cray Unicos/Mk */ #ifndef _SIZE_T /* in case <sys/types.h> has defined it. */ #ifndef _SYS_SIZE_T_H #ifndef _T_SIZE_ #ifndef _T_SIZE #ifndef __SIZE_T #ifndef _SIZE_T_ #ifndef _BSD_SIZE_T_ #ifndef _SIZE_T_DEFINED_ #ifndef _SIZE_T_DEFINED #ifndef _BSD_SIZE_T_DEFINED_ /* Darwin */ #ifndef _SIZE_T_DECLARED /* FreeBSD 5 */ #ifndef ___int_size_t_h #ifndef _GCC_SIZE_T #ifndef _SIZET_ #ifndef __size_t #define __size_t__ /* BeOS */ #define __SIZE_T__ /* Cray Unicos/Mk */ #define _SIZE_T #define _SYS_SIZE_T_H #define _T_SIZE_ #define _T_SIZE #define __SIZE_T #define _SIZE_T_ #define _BSD_SIZE_T_ #define _SIZE_T_DEFINED_ #define _SIZE_T_DEFINED #define _BSD_SIZE_T_DEFINED_ /* Darwin */ #define _SIZE_T_DECLARED /* FreeBSD 5 */ #define ___int_size_t_h #define _GCC_SIZE_T #define _SIZET_ #if (defined (__FreeBSD__) && (__FreeBSD__ >= 5)) \ || defined(__FreeBSD_kernel__) /* __size_t is a typedef on FreeBSD 5, must not trash it. */ #elif defined (__VMS__) /* __size_t is also a typedef on VMS. */ #else #define __size_t #endif #ifndef __SIZE_TYPE__ #define __SIZE_TYPE__ long unsigned int #endif #if !(defined (__GNUG__) && defined (size_t)) typedef __SIZE_TYPE__ size_t; #ifdef __BEOS__ typedef long ssize_t; #endif /* __BEOS__ */ #endif /* !(defined (__GNUG__) && defined (size_t)) */ #endif /* __size_t */ #endif /* _SIZET_ */ #endif /* _GCC_SIZE_T */ #endif /* ___int_size_t_h */ #endif /* _SIZE_T_DECLARED */ #endif /* _BSD_SIZE_T_DEFINED_ */ #endif /* _SIZE_T_DEFINED */ #endif /* _SIZE_T_DEFINED_ */ #endif /* _BSD_SIZE_T_ */ #endif /* _SIZE_T_ */ #endif /* __SIZE_T */ #endif /* _T_SIZE */ #endif /* _T_SIZE_ */ #endif /* _SYS_SIZE_T_H */ #endif /* _SIZE_T */ #endif /* __SIZE_T__ */ #endif /* __size_t__ */ #undef __need_size_t #endif /* _STDDEF_H or __need_size_t. */ /* Wide character type. Locale-writers should change this as necessary to be big enough to hold unique values not between 0 and 127, and not (wchar_t) -1, for each defined multibyte character. */ /* Define this type if we are doing the whole job, or if we want this type in particular. */ #if defined (_STDDEF_H) || defined (__need_wchar_t) #ifndef __wchar_t__ /* BeOS */ #ifndef __WCHAR_T__ /* Cray Unicos/Mk */ #ifndef _WCHAR_T #ifndef _T_WCHAR_ #ifndef _T_WCHAR #ifndef __WCHAR_T #ifndef _WCHAR_T_ #ifndef _BSD_WCHAR_T_ #ifndef _BSD_WCHAR_T_DEFINED_ /* Darwin */ #ifndef _BSD_RUNE_T_DEFINED_ /* Darwin */ #ifndef _WCHAR_T_DECLARED /* FreeBSD 5 */ #ifndef _WCHAR_T_DEFINED_ #ifndef _WCHAR_T_DEFINED #ifndef _WCHAR_T_H #ifndef ___int_wchar_t_h #ifndef __INT_WCHAR_T_H #ifndef _GCC_WCHAR_T #define __wchar_t__ /* BeOS */ #define __WCHAR_T__ /* Cray Unicos/Mk */ #define _WCHAR_T #define _T_WCHAR_ #define _T_WCHAR #define __WCHAR_T #define _WCHAR_T_ #define _BSD_WCHAR_T_ #define _WCHAR_T_DEFINED_ #define _WCHAR_T_DEFINED #define _WCHAR_T_H #define ___int_wchar_t_h #define __INT_WCHAR_T_H #define _GCC_WCHAR_T #define _WCHAR_T_DECLARED /* On BSD/386 1.1, at least, machine/ansi.h defines _BSD_WCHAR_T_ instead of _WCHAR_T_, and _BSD_RUNE_T_ (which, unlike the other symbols in the _FOO_T_ family, stays defined even after its corresponding type is defined). If we define wchar_t, then we must undef _WCHAR_T_; for BSD/386 1.1 (and perhaps others), if we undef _WCHAR_T_, then we must also define rune_t, since headers like runetype.h assume that if machine/ansi.h is included, and _BSD_WCHAR_T_ is not defined, then rune_t is available. machine/ansi.h says, "Note that _WCHAR_T_ and _RUNE_T_ must be of the same type." */ #ifdef _BSD_WCHAR_T_ #undef _BSD_WCHAR_T_ #ifdef _BSD_RUNE_T_ #if !defined (_ANSI_SOURCE) && !defined (_POSIX_SOURCE) typedef _BSD_RUNE_T_ rune_t; #define _BSD_WCHAR_T_DEFINED_ #define _BSD_RUNE_T_DEFINED_ /* Darwin */ #if defined (__FreeBSD__) && (__FreeBSD__ < 5) /* Why is this file so hard to maintain properly? In contrast to the comment above regarding BSD/386 1.1, on FreeBSD for as long as the symbol has existed, _BSD_RUNE_T_ must not stay defined or redundant typedefs will occur when stdlib.h is included after this file. */ #undef _BSD_RUNE_T_ #endif #endif #endif #endif /* FreeBSD 5 can't be handled well using "traditional" logic above since it no longer defines _BSD_RUNE_T_ yet still desires to export rune_t in some cases... */ #if defined (__FreeBSD__) && (__FreeBSD__ >= 5) #if !defined (_ANSI_SOURCE) && !defined (_POSIX_SOURCE) #if __BSD_VISIBLE #ifndef _RUNE_T_DECLARED typedef __rune_t rune_t; #define _RUNE_T_DECLARED #endif #endif #endif #endif #ifndef __WCHAR_TYPE__ #define __WCHAR_TYPE__ int #endif #ifndef __cplusplus typedef __WCHAR_TYPE__ wchar_t; #endif #endif #endif #endif #endif #endif #endif #endif /* _WCHAR_T_DECLARED */ #endif /* _BSD_RUNE_T_DEFINED_ */ #endif #endif #endif #endif #endif #endif #endif #endif /* __WCHAR_T__ */ #endif /* __wchar_t__ */ #undef __need_wchar_t #endif /* _STDDEF_H or __need_wchar_t. */ #if defined (__need_wint_t) #ifndef _WINT_T #define _WINT_T #ifndef __WINT_TYPE__ #define __WINT_TYPE__ unsigned int #endif typedef __WINT_TYPE__ wint_t; #endif #undef __need_wint_t #endif /* In 4.3bsd-net2, leave these undefined to indicate that size_t, etc. are already defined. */ /* BSD/OS 3.1 and FreeBSD [23].x require the MACHINE_ANSI_H check here. */ /* NetBSD 5 requires the I386_ANSI_H and X86_64_ANSI_H checks here. */ #if defined(_ANSI_H_) || defined(_MACHINE_ANSI_H_) || defined(_X86_64_ANSI_H_) || defined(_I386_ANSI_H_) /* The references to _GCC_PTRDIFF_T_, _GCC_SIZE_T_, and _GCC_WCHAR_T_ are probably typos and should be removed before 2.8 is released. */ #ifdef _GCC_PTRDIFF_T_ #undef _PTRDIFF_T_ #undef _BSD_PTRDIFF_T_ #endif #ifdef _GCC_SIZE_T_ #undef _SIZE_T_ #undef _BSD_SIZE_T_ #endif #ifdef _GCC_WCHAR_T_ #undef _WCHAR_T_ #undef _BSD_WCHAR_T_ #endif /* The following ones are the real ones. */ #ifdef _GCC_PTRDIFF_T #undef _PTRDIFF_T_ #undef _BSD_PTRDIFF_T_ #endif #ifdef _GCC_SIZE_T #undef _SIZE_T_ #undef _BSD_SIZE_T_ #endif #ifdef _GCC_WCHAR_T #undef _WCHAR_T_ #undef _BSD_WCHAR_T_ #endif #endif /* _ANSI_H_ || _MACHINE_ANSI_H_ || _X86_64_ANSI_H_ || _I386_ANSI_H_ */ #endif /* __sys_stdtypes_h */ /* A null pointer constant. */ #if defined (_STDDEF_H) || defined (__need_NULL) #undef NULL /* in case <stdio.h> has defined it. */ #ifdef __GNUG__ #define NULL __null #else /* G++ */ #ifndef __cplusplus #define NULL ((void *)0) #else /* C++ */ #define NULL 0 #endif /* C++ */ #endif /* G++ */ #endif /* NULL not defined and <stddef.h> or need NULL. */ #undef __need_NULL #ifdef _STDDEF_H /* Offset of member MEMBER in a struct of type TYPE. */ #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) \ || (defined(__cplusplus) && __cplusplus >= 201103L) #ifndef _GCC_MAX_ALIGN_T #define _GCC_MAX_ALIGN_T /* Type whose alignment is supported in every context and is at least as great as that of any standard type not using alignment specifiers. */ typedef struct { long long __max_align_ll __attribute__((__aligned__(__alignof__(long long)))); long double __max_align_ld __attribute__((__aligned__(__alignof__(long double)))); } max_align_t; #endif #endif /* C11 or C++11. */ #if defined(__cplusplus) && __cplusplus >= 201103L #ifndef _GXX_NULLPTR_T #define _GXX_NULLPTR_T typedef decltype(nullptr) nullptr_t; #endif #endif /* C++11. */ #endif /* _STDDEF_H was defined this time */ #endif /* !_STDDEF_H && !_STDDEF_H_ && !_ANSI_STDDEF_H && !__STDDEF_H__ || __need_XXX was not defined before */ stdint-gcc.h 0000644 00000015565 14711270700 0006767 0 ustar 00 /* Copyright (C) 2008-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* * ISO C Standard: 7.18 Integer types <stdint.h> */ #ifndef _GCC_STDINT_H #define _GCC_STDINT_H /* 7.8.1.1 Exact-width integer types */ #ifdef __INT8_TYPE__ typedef __INT8_TYPE__ int8_t; #endif #ifdef __INT16_TYPE__ typedef __INT16_TYPE__ int16_t; #endif #ifdef __INT32_TYPE__ typedef __INT32_TYPE__ int32_t; #endif #ifdef __INT64_TYPE__ typedef __INT64_TYPE__ int64_t; #endif #ifdef __UINT8_TYPE__ typedef __UINT8_TYPE__ uint8_t; #endif #ifdef __UINT16_TYPE__ typedef __UINT16_TYPE__ uint16_t; #endif #ifdef __UINT32_TYPE__ typedef __UINT32_TYPE__ uint32_t; #endif #ifdef __UINT64_TYPE__ typedef __UINT64_TYPE__ uint64_t; #endif /* 7.8.1.2 Minimum-width integer types */ typedef __INT_LEAST8_TYPE__ int_least8_t; typedef __INT_LEAST16_TYPE__ int_least16_t; typedef __INT_LEAST32_TYPE__ int_least32_t; typedef __INT_LEAST64_TYPE__ int_least64_t; typedef __UINT_LEAST8_TYPE__ uint_least8_t; typedef __UINT_LEAST16_TYPE__ uint_least16_t; typedef __UINT_LEAST32_TYPE__ uint_least32_t; typedef __UINT_LEAST64_TYPE__ uint_least64_t; /* 7.8.1.3 Fastest minimum-width integer types */ typedef __INT_FAST8_TYPE__ int_fast8_t; typedef __INT_FAST16_TYPE__ int_fast16_t; typedef __INT_FAST32_TYPE__ int_fast32_t; typedef __INT_FAST64_TYPE__ int_fast64_t; typedef __UINT_FAST8_TYPE__ uint_fast8_t; typedef __UINT_FAST16_TYPE__ uint_fast16_t; typedef __UINT_FAST32_TYPE__ uint_fast32_t; typedef __UINT_FAST64_TYPE__ uint_fast64_t; /* 7.8.1.4 Integer types capable of holding object pointers */ #ifdef __INTPTR_TYPE__ typedef __INTPTR_TYPE__ intptr_t; #endif #ifdef __UINTPTR_TYPE__ typedef __UINTPTR_TYPE__ uintptr_t; #endif /* 7.8.1.5 Greatest-width integer types */ typedef __INTMAX_TYPE__ intmax_t; typedef __UINTMAX_TYPE__ uintmax_t; #if (!defined __cplusplus || __cplusplus >= 201103L \ || defined __STDC_LIMIT_MACROS) /* 7.18.2 Limits of specified-width integer types */ #ifdef __INT8_MAX__ # undef INT8_MAX # define INT8_MAX __INT8_MAX__ # undef INT8_MIN # define INT8_MIN (-INT8_MAX - 1) #endif #ifdef __UINT8_MAX__ # undef UINT8_MAX # define UINT8_MAX __UINT8_MAX__ #endif #ifdef __INT16_MAX__ # undef INT16_MAX # define INT16_MAX __INT16_MAX__ # undef INT16_MIN # define INT16_MIN (-INT16_MAX - 1) #endif #ifdef __UINT16_MAX__ # undef UINT16_MAX # define UINT16_MAX __UINT16_MAX__ #endif #ifdef __INT32_MAX__ # undef INT32_MAX # define INT32_MAX __INT32_MAX__ # undef INT32_MIN # define INT32_MIN (-INT32_MAX - 1) #endif #ifdef __UINT32_MAX__ # undef UINT32_MAX # define UINT32_MAX __UINT32_MAX__ #endif #ifdef __INT64_MAX__ # undef INT64_MAX # define INT64_MAX __INT64_MAX__ # undef INT64_MIN # define INT64_MIN (-INT64_MAX - 1) #endif #ifdef __UINT64_MAX__ # undef UINT64_MAX # define UINT64_MAX __UINT64_MAX__ #endif #undef INT_LEAST8_MAX #define INT_LEAST8_MAX __INT_LEAST8_MAX__ #undef INT_LEAST8_MIN #define INT_LEAST8_MIN (-INT_LEAST8_MAX - 1) #undef UINT_LEAST8_MAX #define UINT_LEAST8_MAX __UINT_LEAST8_MAX__ #undef INT_LEAST16_MAX #define INT_LEAST16_MAX __INT_LEAST16_MAX__ #undef INT_LEAST16_MIN #define INT_LEAST16_MIN (-INT_LEAST16_MAX - 1) #undef UINT_LEAST16_MAX #define UINT_LEAST16_MAX __UINT_LEAST16_MAX__ #undef INT_LEAST32_MAX #define INT_LEAST32_MAX __INT_LEAST32_MAX__ #undef INT_LEAST32_MIN #define INT_LEAST32_MIN (-INT_LEAST32_MAX - 1) #undef UINT_LEAST32_MAX #define UINT_LEAST32_MAX __UINT_LEAST32_MAX__ #undef INT_LEAST64_MAX #define INT_LEAST64_MAX __INT_LEAST64_MAX__ #undef INT_LEAST64_MIN #define INT_LEAST64_MIN (-INT_LEAST64_MAX - 1) #undef UINT_LEAST64_MAX #define UINT_LEAST64_MAX __UINT_LEAST64_MAX__ #undef INT_FAST8_MAX #define INT_FAST8_MAX __INT_FAST8_MAX__ #undef INT_FAST8_MIN #define INT_FAST8_MIN (-INT_FAST8_MAX - 1) #undef UINT_FAST8_MAX #define UINT_FAST8_MAX __UINT_FAST8_MAX__ #undef INT_FAST16_MAX #define INT_FAST16_MAX __INT_FAST16_MAX__ #undef INT_FAST16_MIN #define INT_FAST16_MIN (-INT_FAST16_MAX - 1) #undef UINT_FAST16_MAX #define UINT_FAST16_MAX __UINT_FAST16_MAX__ #undef INT_FAST32_MAX #define INT_FAST32_MAX __INT_FAST32_MAX__ #undef INT_FAST32_MIN #define INT_FAST32_MIN (-INT_FAST32_MAX - 1) #undef UINT_FAST32_MAX #define UINT_FAST32_MAX __UINT_FAST32_MAX__ #undef INT_FAST64_MAX #define INT_FAST64_MAX __INT_FAST64_MAX__ #undef INT_FAST64_MIN #define INT_FAST64_MIN (-INT_FAST64_MAX - 1) #undef UINT_FAST64_MAX #define UINT_FAST64_MAX __UINT_FAST64_MAX__ #ifdef __INTPTR_MAX__ # undef INTPTR_MAX # define INTPTR_MAX __INTPTR_MAX__ # undef INTPTR_MIN # define INTPTR_MIN (-INTPTR_MAX - 1) #endif #ifdef __UINTPTR_MAX__ # undef UINTPTR_MAX # define UINTPTR_MAX __UINTPTR_MAX__ #endif #undef INTMAX_MAX #define INTMAX_MAX __INTMAX_MAX__ #undef INTMAX_MIN #define INTMAX_MIN (-INTMAX_MAX - 1) #undef UINTMAX_MAX #define UINTMAX_MAX __UINTMAX_MAX__ /* 7.18.3 Limits of other integer types */ #undef PTRDIFF_MAX #define PTRDIFF_MAX __PTRDIFF_MAX__ #undef PTRDIFF_MIN #define PTRDIFF_MIN (-PTRDIFF_MAX - 1) #undef SIG_ATOMIC_MAX #define SIG_ATOMIC_MAX __SIG_ATOMIC_MAX__ #undef SIG_ATOMIC_MIN #define SIG_ATOMIC_MIN __SIG_ATOMIC_MIN__ #undef SIZE_MAX #define SIZE_MAX __SIZE_MAX__ #undef WCHAR_MAX #define WCHAR_MAX __WCHAR_MAX__ #undef WCHAR_MIN #define WCHAR_MIN __WCHAR_MIN__ #undef WINT_MAX #define WINT_MAX __WINT_MAX__ #undef WINT_MIN #define WINT_MIN __WINT_MIN__ #endif /* (!defined __cplusplus || __cplusplus >= 201103L || defined __STDC_LIMIT_MACROS) */ #if (!defined __cplusplus || __cplusplus >= 201103L \ || defined __STDC_CONSTANT_MACROS) #undef INT8_C #define INT8_C(c) __INT8_C(c) #undef INT16_C #define INT16_C(c) __INT16_C(c) #undef INT32_C #define INT32_C(c) __INT32_C(c) #undef INT64_C #define INT64_C(c) __INT64_C(c) #undef UINT8_C #define UINT8_C(c) __UINT8_C(c) #undef UINT16_C #define UINT16_C(c) __UINT16_C(c) #undef UINT32_C #define UINT32_C(c) __UINT32_C(c) #undef UINT64_C #define UINT64_C(c) __UINT64_C(c) #undef INTMAX_C #define INTMAX_C(c) __INTMAX_C(c) #undef UINTMAX_C #define UINTMAX_C(c) __UINTMAX_C(c) #endif /* (!defined __cplusplus || __cplusplus >= 201103L || defined __STDC_CONSTANT_MACROS) */ #endif /* _GCC_STDINT_H */ rdseedintrin.h 0000644 00000003505 14711270700 0007411 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED # error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef __RDSEED__ # error "RDSEED instruction not enabled" #endif /* __RDSEED__ */ #ifndef _RDSEEDINTRIN_H_INCLUDED #define _RDSEEDINTRIN_H_INCLUDED extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdseed16_step (unsigned short *p) { return __builtin_ia32_rdseed_hi_step (p); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdseed32_step (unsigned int *p) { return __builtin_ia32_rdseed_si_step (p); } #ifdef __x86_64__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _rdseed64_step (unsigned long long *p) { return __builtin_ia32_rdseed_di_step (p); } #endif #endif /* _RDSEEDINTRIN_H_INCLUDED */ bmiintrin.h 0000644 00000012505 14711270701 0006713 0 ustar 00 /* Copyright (C) 2010-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED # error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef __BMI__ # error "BMI instruction set not enabled" #endif /* __BMI__ */ #ifndef _BMIINTRIN_H_INCLUDED #define _BMIINTRIN_H_INCLUDED extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u16 (unsigned short __X) { return __builtin_ctzs (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u32 (unsigned int __X, unsigned int __Y) { return ~__X & __Y; } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextr_u32 (unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32 (__X, __Y); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z) { return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsi_u32 (unsigned int __X) { return __X & -__X; } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsi_u32 (unsigned int __X) { return __blsi_u32 (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u32 (unsigned int __X) { return __X ^ (__X - 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsmsk_u32 (unsigned int __X) { return __blsmsk_u32 (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u32 (unsigned int __X) { return __X & (__X - 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsr_u32 (unsigned int __X) { return __blsr_u32 (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u32 (unsigned int __X) { return __builtin_ctz (__X); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tzcnt_u32 (unsigned int __X) { return __builtin_ctz (__X); } #ifdef __x86_64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u64 (unsigned long long __X, unsigned long long __Y) { return ~__X & __Y; } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bextr_u64 (unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64 (__X, __Y); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z) { return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsi_u64 (unsigned long long __X) { return __X & -__X; } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsi_u64 (unsigned long long __X) { return __blsi_u64 (__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u64 (unsigned long long __X) { return __X ^ (__X - 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsmsk_u64 (unsigned long long __X) { return __blsmsk_u64 (__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u64 (unsigned long long __X) { return __X & (__X - 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _blsr_u64 (unsigned long long __X) { return __blsr_u64 (__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u64 (unsigned long long __X) { return __builtin_ctzll (__X); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _tzcnt_u64 (unsigned long long __X) { return __builtin_ctzll (__X); } #endif /* __x86_64__ */ #endif /* _BMIINTRIN_H_INCLUDED */ stdarg.h 0000644 00000007753 14711270701 0006215 0 ustar 00 /* Copyright (C) 1989-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* * ISO C Standard: 7.15 Variable arguments <stdarg.h> */ #ifndef _STDARG_H #ifndef _ANSI_STDARG_H_ #ifndef __need___va_list #define _STDARG_H #define _ANSI_STDARG_H_ #endif /* not __need___va_list */ #undef __need___va_list /* Define __gnuc_va_list. */ #ifndef __GNUC_VA_LIST #define __GNUC_VA_LIST typedef __builtin_va_list __gnuc_va_list; #endif /* Define the standard macros for the user, if this invocation was from the user program. */ #ifdef _STDARG_H #define va_start(v,l) __builtin_va_start(v,l) #define va_end(v) __builtin_va_end(v) #define va_arg(v,l) __builtin_va_arg(v,l) #if !defined(__STRICT_ANSI__) || __STDC_VERSION__ + 0 >= 199900L || defined(__GXX_EXPERIMENTAL_CXX0X__) #define va_copy(d,s) __builtin_va_copy(d,s) #endif #define __va_copy(d,s) __builtin_va_copy(d,s) /* Define va_list, if desired, from __gnuc_va_list. */ /* We deliberately do not define va_list when called from stdio.h, because ANSI C says that stdio.h is not supposed to define va_list. stdio.h needs to have access to that data type, but must not use that name. It should use the name __gnuc_va_list, which is safe because it is reserved for the implementation. */ #ifdef _BSD_VA_LIST #undef _BSD_VA_LIST #endif #if defined(__svr4__) || (defined(_SCO_DS) && !defined(__VA_LIST)) /* SVR4.2 uses _VA_LIST for an internal alias for va_list, so we must avoid testing it and setting it here. SVR4 uses _VA_LIST as a flag in stdarg.h, but we should have no conflict with that. */ #ifndef _VA_LIST_ #define _VA_LIST_ #ifdef __i860__ #ifndef _VA_LIST #define _VA_LIST va_list #endif #endif /* __i860__ */ typedef __gnuc_va_list va_list; #ifdef _SCO_DS #define __VA_LIST #endif #endif /* _VA_LIST_ */ #else /* not __svr4__ || _SCO_DS */ /* The macro _VA_LIST_ is the same thing used by this file in Ultrix. But on BSD NET2 we must not test or define or undef it. (Note that the comments in NET 2's ansi.h are incorrect for _VA_LIST_--see stdio.h!) */ #if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT) /* The macro _VA_LIST_DEFINED is used in Windows NT 3.5 */ #ifndef _VA_LIST_DEFINED /* The macro _VA_LIST is used in SCO Unix 3.2. */ #ifndef _VA_LIST /* The macro _VA_LIST_T_H is used in the Bull dpx2 */ #ifndef _VA_LIST_T_H /* The macro __va_list__ is used by BeOS. */ #ifndef __va_list__ typedef __gnuc_va_list va_list; #endif /* not __va_list__ */ #endif /* not _VA_LIST_T_H */ #endif /* not _VA_LIST */ #endif /* not _VA_LIST_DEFINED */ #if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__)) #define _VA_LIST_ #endif #ifndef _VA_LIST #define _VA_LIST #endif #ifndef _VA_LIST_DEFINED #define _VA_LIST_DEFINED #endif #ifndef _VA_LIST_T_H #define _VA_LIST_T_H #endif #ifndef __va_list__ #define __va_list__ #endif #endif /* not _VA_LIST_, except on certain systems */ #endif /* not __svr4__ */ #endif /* _STDARG_H */ #endif /* not _ANSI_STDARG_H_ */ #endif /* not _STDARG_H */ ia32intrin.h 0000644 00000014316 14711270701 0006704 0 ustar 00 /* Copyright (C) 2009-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _X86INTRIN_H_INCLUDED # error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead." #endif /* 32bit bsf */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bsfd (int __X) { return __builtin_ctz (__X); } /* 32bit bsr */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bsrd (int __X) { return __builtin_ia32_bsrsi (__X); } /* 32bit bswap */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bswapd (int __X) { return __builtin_bswap32 (__X); } #ifdef __SSE4_2__ /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc32b (unsigned int __C, unsigned char __V) { return __builtin_ia32_crc32qi (__C, __V); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc32w (unsigned int __C, unsigned short __V) { return __builtin_ia32_crc32hi (__C, __V); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc32d (unsigned int __C, unsigned int __V) { return __builtin_ia32_crc32si (__C, __V); } #endif /* SSE4.2 */ /* 32bit popcnt */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __popcntd (unsigned int __X) { return __builtin_popcount (__X); } /* rdpmc */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdpmc (int __S) { return __builtin_ia32_rdpmc (__S); } /* rdtsc */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtsc (void) { return __builtin_ia32_rdtsc (); } /* rdtscp */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rdtscp (unsigned int *__A) { return __builtin_ia32_rdtscp (__A); } /* 8bit rol */ extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rolb (unsigned char __X, int __C) { return __builtin_ia32_rolqi (__X, __C); } /* 16bit rol */ extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rolw (unsigned short __X, int __C) { return __builtin_ia32_rolhi (__X, __C); } /* 32bit rol */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rold (unsigned int __X, int __C) { return (__X << __C) | (__X >> (32 - __C)); } /* 8bit ror */ extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rorb (unsigned char __X, int __C) { return __builtin_ia32_rorqi (__X, __C); } /* 16bit ror */ extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rorw (unsigned short __X, int __C) { return __builtin_ia32_rorhi (__X, __C); } /* 32bit ror */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rord (unsigned int __X, int __C) { return (__X >> __C) | (__X << (32 - __C)); } /* Pause */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __pause (void) { __builtin_ia32_pause (); } #ifdef __x86_64__ /* 64bit bsf */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bsfq (long long __X) { return __builtin_ctzll (__X); } /* 64bit bsr */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bsrq (long long __X) { return __builtin_ia32_bsrdi (__X); } /* 64bit bswap */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __bswapq (long long __X) { return __builtin_bswap64 (__X); } #ifdef __SSE4_2__ /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __crc32q (unsigned long long __C, unsigned long long __V) { return __builtin_ia32_crc32di (__C, __V); } #endif /* 64bit popcnt */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __popcntq (unsigned long long __X) { return __builtin_popcountll (__X); } /* 64bit rol */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rolq (unsigned long long __X, int __C) { return (__X << __C) | (__X >> (64 - __C)); } /* 64bit ror */ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __rorq (unsigned long long __X, int __C) { return (__X >> __C) | (__X << (64 - __C)); } #define _bswap64(a) __bswapq(a) #define _popcnt64(a) __popcntq(a) #define _lrotl(a,b) __rolq((a), (b)) #define _lrotr(a,b) __rorq((a), (b)) #else #define _lrotl(a,b) __rold((a), (b)) #define _lrotr(a,b) __rord((a), (b)) #endif #define _bit_scan_forward(a) __bsfd(a) #define _bit_scan_reverse(a) __bsrd(a) #define _bswap(a) __bswapd(a) #define _popcnt32(a) __popcntd(a) #define _rdpmc(a) __rdpmc(a) #define _rdtsc() __rdtsc() #define _rdtscp(a) __rdtscp(a) #define _rotwl(a,b) __rolw((a), (b)) #define _rotwr(a,b) __rorw((a), (b)) #define _rotl(a,b) __rold((a), (b)) #define _rotr(a,b) __rord((a), (b)) smmintrin.h 0000644 00000066065 14711270702 0006753 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 10.0. */ #ifndef _SMMINTRIN_H_INCLUDED #define _SMMINTRIN_H_INCLUDED #ifndef __SSE4_1__ # error "SSE4.1 instruction set not enabled" #else /* We need definitions from the SSSE3, SSE3, SSE2 and SSE header files. */ #include <tmmintrin.h> /* Rounding mode macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 #define _MM_FROUND_TO_POS_INF 0x02 #define _MM_FROUND_TO_ZERO 0x03 #define _MM_FROUND_CUR_DIRECTION 0x04 #define _MM_FROUND_RAISE_EXC 0x00 #define _MM_FROUND_NO_EXC 0x08 #define _MM_FROUND_NINT \ (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_FLOOR \ (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_CEIL \ (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_TRUNC \ (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_RINT \ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) #define _MM_FROUND_NEARBYINT \ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) /* Test Instruction */ /* Packed integer 128-bit bitwise comparison. Return 1 if (__V & __M) == 0. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testz_si128 (__m128i __M, __m128i __V) { return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V); } /* Packed integer 128-bit bitwise comparison. Return 1 if (__V & ~__M) == 0. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testc_si128 (__m128i __M, __m128i __V) { return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V); } /* Packed integer 128-bit bitwise comparison. Return 1 if (__V & __M) != 0 && (__V & ~__M) != 0. */ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_testnzc_si128 (__m128i __M, __m128i __V) { return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V); } /* Macros for packed integer 128-bit comparison intrinsics. */ #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) #define _mm_test_all_ones(V) \ _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V))) #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) /* Packed/scalar double precision floating point rounding. */ #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_pd (__m128d __V, const int __M) { return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_sd(__m128d __D, __m128d __V, const int __M) { return (__m128d) __builtin_ia32_roundsd ((__v2df)__D, (__v2df)__V, __M); } #else #define _mm_round_pd(V, M) \ ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M))) #define _mm_round_sd(D, V, M) \ ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \ (__v2df)(__m128d)(V), (int)(M))) #endif /* Packed/scalar single precision floating point rounding. */ #ifdef __OPTIMIZE__ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_ps (__m128 __V, const int __M) { return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_round_ss (__m128 __D, __m128 __V, const int __M) { return (__m128) __builtin_ia32_roundss ((__v4sf)__D, (__v4sf)__V, __M); } #else #define _mm_round_ps(V, M) \ ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M))) #define _mm_round_ss(D, V, M) \ ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \ (__v4sf)(__m128)(V), (int)(M))) #endif /* Macros for ceil/floor intrinsics. */ #define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) #define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) #define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) #define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) #define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) #define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) #define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) /* SSE4.1 */ /* Integer blend instructions - select data from 2 sources using constant/variable mask. */ #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M) { return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X, (__v8hi)__Y, __M); } #else #define _mm_blend_epi16(X, Y, M) \ ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X), \ (__v8hi)(__m128i)(Y), (int)(M))) #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M) { return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X, (__v16qi)__Y, (__v16qi)__M); } /* Single precision floating point blend instructions - select data from 2 sources using constant/variable mask. */ #ifdef __OPTIMIZE__ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_ps (__m128 __X, __m128 __Y, const int __M) { return (__m128) __builtin_ia32_blendps ((__v4sf)__X, (__v4sf)__Y, __M); } #else #define _mm_blend_ps(X, Y, M) \ ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(M))) #endif extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M) { return (__m128) __builtin_ia32_blendvps ((__v4sf)__X, (__v4sf)__Y, (__v4sf)__M); } /* Double precision floating point blend instructions - select data from 2 sources using constant/variable mask. */ #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_pd (__m128d __X, __m128d __Y, const int __M) { return (__m128d) __builtin_ia32_blendpd ((__v2df)__X, (__v2df)__Y, __M); } #else #define _mm_blend_pd(X, Y, M) \ ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(M))) #endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M) { return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X, (__v2df)__Y, (__v2df)__M); } /* Dot product instructions with mask-defined summing and zeroing parts of result. */ #ifdef __OPTIMIZE__ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_dp_ps (__m128 __X, __m128 __Y, const int __M) { return (__m128) __builtin_ia32_dpps ((__v4sf)__X, (__v4sf)__Y, __M); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_dp_pd (__m128d __X, __m128d __Y, const int __M) { return (__m128d) __builtin_ia32_dppd ((__v2df)__X, (__v2df)__Y, __M); } #else #define _mm_dp_ps(X, Y, M) \ ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(M))) #define _mm_dp_pd(X, Y, M) \ ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(M))) #endif /* Packed integer 64-bit comparison, zeroing or filling with ones corresponding parts of result. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi64 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y); } /* Min/max packed integer instructions. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi8 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epi8 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epu16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epu16 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epu32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epu32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y); } /* Packed integer 32-bit multiplication with truncation of upper halves of results. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y); } /* Packed integer 32-bit multiplication of 2 pairs of operands with two 64-bit results. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y); } /* Insert single precision float into packed single precision array element selected by index N. The bits [7-6] of N define S index, the bits [5-4] define D index, and bits [3-0] define zeroing mask for D. */ #ifdef __OPTIMIZE__ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_ps (__m128 __D, __m128 __S, const int __N) { return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D, (__v4sf)__S, __N); } #else #define _mm_insert_ps(D, S, N) \ ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D), \ (__v4sf)(__m128)(S), (int)(N))) #endif /* Helper macro to create the N value for _mm_insert_ps. */ #define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M)) /* Extract binary representation of single precision float from packed single precision array element of X selected by index N. */ #ifdef __OPTIMIZE__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_ps (__m128 __X, const int __N) { union { int i; float f; } __tmp; __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N); return __tmp.i; } #else #define _mm_extract_ps(X, N) \ (__extension__ \ ({ \ union { int i; float f; } __tmp; \ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \ __tmp.i; \ })) #endif /* Extract binary representation of single precision float into D from packed single precision array element of S selected by index N. */ #define _MM_EXTRACT_FLOAT(D, S, N) \ { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); } /* Extract specified single precision float element into the lower part of __m128. */ #define _MM_PICK_OUT_PS(X, N) \ _mm_insert_ps (_mm_setzero_ps (), (X), \ _MM_MK_INSERTPS_NDX ((N), 0, 0x0e)) /* Insert integer, S, into packed integer array element of D selected by index N. */ #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi8 (__m128i __D, int __S, const int __N) { return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D, __S, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi32 (__m128i __D, int __S, const int __N) { return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D, __S, __N); } #ifdef __x86_64__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi64 (__m128i __D, long long __S, const int __N) { return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D, __S, __N); } #endif #else #define _mm_insert_epi8(D, S, N) \ ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D), \ (int)(S), (int)(N))) #define _mm_insert_epi32(D, S, N) \ ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \ (int)(S), (int)(N))) #ifdef __x86_64__ #define _mm_insert_epi64(D, S, N) \ ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D), \ (long long)(S), (int)(N))) #endif #endif /* Extract integer from packed integer array element of X selected by index N. */ #ifdef __OPTIMIZE__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi8 (__m128i __X, const int __N) { return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi32 (__m128i __X, const int __N) { return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N); } #ifdef __x86_64__ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi64 (__m128i __X, const int __N) { return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N); } #endif #else #define _mm_extract_epi8(X, N) \ ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) #define _mm_extract_epi32(X, N) \ ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))) #ifdef __x86_64__ #define _mm_extract_epi64(X, N) \ ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N))) #endif #endif /* Return horizontal packed word minimum and its index in bits [15:0] and bits [18:16] respectively. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_minpos_epu16 (__m128i __X) { return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X); } /* Packed integer sign-extension. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi8_epi32 (__m128i __X) { return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi16_epi32 (__m128i __X) { return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi8_epi64 (__m128i __X) { return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_epi64 (__m128i __X) { return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi16_epi64 (__m128i __X) { return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi8_epi16 (__m128i __X) { return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X); } /* Packed integer zero-extension. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu8_epi32 (__m128i __X) { return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu16_epi32 (__m128i __X) { return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu8_epi64 (__m128i __X) { return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu32_epi64 (__m128i __X) { return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu16_epi64 (__m128i __X) { return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepu8_epi16 (__m128i __X) { return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X); } /* Pack 8 double words from 2 operands into 8 words of result with unsigned saturation. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packus_epi32 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y); } /* Sum absolute 8-bit integer difference of adjacent groups of 4 byte integers in the first 2 operands. Starting offsets within operands are determined by the 3rd mask operand. */ #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M) { return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X, (__v16qi)__Y, __M); } #else #define _mm_mpsadbw_epu8(X, Y, M) \ ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #endif /* Load double quadword using non-temporal aligned hint. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_load_si128 (__m128i *__X) { return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X); } #ifdef __SSE4_2__ /* These macros specify the source data format. */ #define _SIDD_UBYTE_OPS 0x00 #define _SIDD_UWORD_OPS 0x01 #define _SIDD_SBYTE_OPS 0x02 #define _SIDD_SWORD_OPS 0x03 /* These macros specify the comparison operation. */ #define _SIDD_CMP_EQUAL_ANY 0x00 #define _SIDD_CMP_RANGES 0x04 #define _SIDD_CMP_EQUAL_EACH 0x08 #define _SIDD_CMP_EQUAL_ORDERED 0x0c /* These macros specify the polarity. */ #define _SIDD_POSITIVE_POLARITY 0x00 #define _SIDD_NEGATIVE_POLARITY 0x10 #define _SIDD_MASKED_POSITIVE_POLARITY 0x20 #define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 /* These macros specify the output selection in _mm_cmpXstri (). */ #define _SIDD_LEAST_SIGNIFICANT 0x00 #define _SIDD_MOST_SIGNIFICANT 0x40 /* These macros specify the output selection in _mm_cmpXstrm (). */ #define _SIDD_BIT_MASK 0x00 #define _SIDD_UNIT_MASK 0x40 /* Intrinsics for text/string processing. */ #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpistrm (__m128i __X, __m128i __Y, const int __M) { return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X, (__v16qi)__Y, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpistri (__m128i __X, __m128i __Y, const int __M) { return __builtin_ia32_pcmpistri128 ((__v16qi)__X, (__v16qi)__Y, __M); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) { return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX, (__v16qi)__Y, __LY, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) { return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX, (__v16qi)__Y, __LY, __M); } #else #define _mm_cmpistrm(X, Y, M) \ ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #define _mm_cmpistri(X, Y, M) \ ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #define _mm_cmpestrm(X, LX, Y, LY, M) \ ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X), \ (int)(LX), (__v16qi)(__m128i)(Y), \ (int)(LY), (int)(M))) #define _mm_cmpestri(X, LX, Y, LY, M) \ ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \ (__v16qi)(__m128i)(Y), (int)(LY), \ (int)(M))) #endif /* Intrinsics for text/string processing and reading values of EFlags. */ #ifdef __OPTIMIZE__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpistra (__m128i __X, __m128i __Y, const int __M) { return __builtin_ia32_pcmpistria128 ((__v16qi)__X, (__v16qi)__Y, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpistrc (__m128i __X, __m128i __Y, const int __M) { return __builtin_ia32_pcmpistric128 ((__v16qi)__X, (__v16qi)__Y, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpistro (__m128i __X, __m128i __Y, const int __M) { return __builtin_ia32_pcmpistrio128 ((__v16qi)__X, (__v16qi)__Y, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpistrs (__m128i __X, __m128i __Y, const int __M) { return __builtin_ia32_pcmpistris128 ((__v16qi)__X, (__v16qi)__Y, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpistrz (__m128i __X, __m128i __Y, const int __M) { return __builtin_ia32_pcmpistriz128 ((__v16qi)__X, (__v16qi)__Y, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) { return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX, (__v16qi)__Y, __LY, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) { return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX, (__v16qi)__Y, __LY, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) { return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX, (__v16qi)__Y, __LY, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) { return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX, (__v16qi)__Y, __LY, __M); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) { return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX, (__v16qi)__Y, __LY, __M); } #else #define _mm_cmpistra(X, Y, M) \ ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #define _mm_cmpistrc(X, Y, M) \ ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #define _mm_cmpistro(X, Y, M) \ ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #define _mm_cmpistrs(X, Y, M) \ ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #define _mm_cmpistrz(X, Y, M) \ ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X), \ (__v16qi)(__m128i)(Y), (int)(M))) #define _mm_cmpestra(X, LX, Y, LY, M) \ ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \ (__v16qi)(__m128i)(Y), (int)(LY), \ (int)(M))) #define _mm_cmpestrc(X, LX, Y, LY, M) \ ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \ (__v16qi)(__m128i)(Y), (int)(LY), \ (int)(M))) #define _mm_cmpestro(X, LX, Y, LY, M) \ ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \ (__v16qi)(__m128i)(Y), (int)(LY), \ (int)(M))) #define _mm_cmpestrs(X, LX, Y, LY, M) \ ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \ (__v16qi)(__m128i)(Y), (int)(LY), \ (int)(M))) #define _mm_cmpestrz(X, LX, Y, LY, M) \ ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \ (__v16qi)(__m128i)(Y), (int)(LY), \ (int)(M))) #endif /* Packed integer 64-bit comparison, zeroing or filling with ones corresponding parts of result. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi64 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y); } #ifdef __POPCNT__ #include <popcntintrin.h> #endif /* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_crc32_u8 (unsigned int __C, unsigned char __V) { return __builtin_ia32_crc32qi (__C, __V); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_crc32_u16 (unsigned int __C, unsigned short __V) { return __builtin_ia32_crc32hi (__C, __V); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_crc32_u32 (unsigned int __C, unsigned int __V) { return __builtin_ia32_crc32si (__C, __V); } #ifdef __x86_64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_crc32_u64 (unsigned long long __C, unsigned long long __V) { return __builtin_ia32_crc32di (__C, __V); } #endif #endif /* __SSE4_2__ */ #endif /* __SSE4_1__ */ #endif /* _SMMINTRIN_H_INCLUDED */ emmintrin.h 0000644 00000143071 14711270703 0006727 0 ustar 00 /* Copyright (C) 2003-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef _EMMINTRIN_H_INCLUDED #define _EMMINTRIN_H_INCLUDED #ifndef __SSE2__ # error "SSE2 instruction set not enabled" #else /* We need definitions from the SSE header files*/ #include <xmmintrin.h> /* SSE2 */ typedef double __v2df __attribute__ ((__vector_size__ (16))); typedef long long __v2di __attribute__ ((__vector_size__ (16))); typedef int __v4si __attribute__ ((__vector_size__ (16))); typedef short __v8hi __attribute__ ((__vector_size__ (16))); typedef char __v16qi __attribute__ ((__vector_size__ (16))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); /* Create a selector for use with the SHUFPD instruction. */ #define _MM_SHUFFLE2(fp1,fp0) \ (((fp1) << 1) | (fp0)) /* Create a vector with element 0 as F and the rest zero. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_sd (double __F) { return __extension__ (__m128d){ __F, 0.0 }; } /* Create a vector with both elements equal to F. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pd (double __F) { return __extension__ (__m128d){ __F, __F }; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pd1 (double __F) { return _mm_set1_pd (__F); } /* Create a vector with the lower value X and upper value W. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pd (double __W, double __X) { return __extension__ (__m128d){ __X, __W }; } /* Create a vector with the lower value W and upper value X. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pd (double __W, double __X) { return __extension__ (__m128d){ __W, __X }; } /* Create a vector of zeros. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_pd (void) { return __extension__ (__m128d){ 0.0, 0.0 }; } /* Sets the low DPFP value of A from the low value of B. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); } /* Load two DPFP values from P. The address must be 16-byte aligned. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_pd (double const *__P) { return *(__m128d *)__P; } /* Load two DPFP values from P. The address need not be 16-byte aligned. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_pd (double const *__P) { return __builtin_ia32_loadupd (__P); } /* Create a vector with all two elements equal to *P. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load1_pd (double const *__P) { return _mm_set1_pd (*__P); } /* Create a vector with element 0 as *P and the rest zero. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_sd (double const *__P) { return _mm_set_sd (*__P); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_pd1 (double const *__P) { return _mm_load1_pd (__P); } /* Load two DPFP values in reverse order. The address must be aligned. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadr_pd (double const *__P) { __m128d __tmp = _mm_load_pd (__P); return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); } /* Store two DPFP values. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_pd (double *__P, __m128d __A) { *(__m128d *)__P = __A; } /* Store two DPFP values. The address need not be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_pd (double *__P, __m128d __A) { __builtin_ia32_storeupd (__P, __A); } /* Stores the lower DPFP value. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_sd (double *__P, __m128d __A) { *__P = __builtin_ia32_vec_ext_v2df (__A, 0); } extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_f64 (__m128d __A) { return __builtin_ia32_vec_ext_v2df (__A, 0); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pd (double *__P, __m128d __A) { _mm_store_sd (__P, __A); } /* Stores the upper DPFP value. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeh_pd (double *__P, __m128d __A) { *__P = __builtin_ia32_vec_ext_v2df (__A, 1); } /* Store the lower DPFP value across two words. The address must be 16-byte aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store1_pd (double *__P, __m128d __A) { _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_pd1 (double *__P, __m128d __A) { _mm_store1_pd (__P, __A); } /* Store two DPFP values in reverse order. The address must be aligned. */ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storer_pd (double *__P, __m128d __A) { _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si32 (__m128i __A) { return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); } #ifdef __x86_64__ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64 (__m128i __A) { return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi128_si64x (__m128i __A) { return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); } #endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_pd (__m128d __A) { return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); } /* Return pair {sqrt (A[0), B[1]}. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_sd (__m128d __A, __m128d __B) { __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmpltsd ((__v2df) __B, (__v2df) __A)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmplesd ((__v2df) __B, (__v2df) __A)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmpnltsd ((__v2df) __B, (__v2df) __A)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmpnlesd ((__v2df) __B, (__v2df) __A)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_sd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comieq_sd (__m128d __A, __m128d __B) { return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comilt_sd (__m128d __A, __m128d __B) { return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comile_sd (__m128d __A, __m128d __B) { return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comigt_sd (__m128d __A, __m128d __B) { return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comige_sd (__m128d __A, __m128d __B) { return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comineq_sd (__m128d __A, __m128d __B) { return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomieq_sd (__m128d __A, __m128d __B) { return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomilt_sd (__m128d __A, __m128d __B) { return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomile_sd (__m128d __A, __m128d __B) { return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomigt_sd (__m128d __A, __m128d __B) { return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomige_sd (__m128d __A, __m128d __B) { return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomineq_sd (__m128d __A, __m128d __B) { return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); } /* Create a vector of Qi, where i is the element number. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi64x (long long __q1, long long __q0) { return __extension__ (__m128i)(__v2di){ __q0, __q1 }; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi64 (__m64 __q1, __m64 __q0) { return _mm_set_epi64x ((long long)__q1, (long long)__q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) { return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, short __q3, short __q2, short __q1, short __q0) { return __extension__ (__m128i)(__v8hi){ __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, char __q11, char __q10, char __q09, char __q08, char __q07, char __q06, char __q05, char __q04, char __q03, char __q02, char __q01, char __q00) { return __extension__ (__m128i)(__v16qi){ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 }; } /* Set all of the elements of the vector to A. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi64x (long long __A) { return _mm_set_epi64x (__A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi64 (__m64 __A) { return _mm_set_epi64 (__A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi32 (int __A) { return _mm_set_epi32 (__A, __A, __A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi16 (short __A) { return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_epi8 (char __A) { return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A, __A); } /* Create a vector of Qi, where i is the element number. The parameter order is reversed from the _mm_set_epi* functions. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi64 (__m64 __q0, __m64 __q1) { return _mm_set_epi64 (__q1, __q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) { return _mm_set_epi32 (__q3, __q2, __q1, __q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, short __q4, short __q5, short __q6, short __q7) { return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, char __q04, char __q05, char __q06, char __q07, char __q08, char __q09, char __q10, char __q11, char __q12, char __q13, char __q14, char __q15) { return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); } /* Create a vector with element 0 as *P and the rest zero. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_si128 (__m128i const *__P) { return *__P; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_si128 (__m128i const *__P) { return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_epi64 (__m128i const *__P) { return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_si128 (__m128i *__P, __m128i __B) { *__P = __B; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storeu_si128 (__m128i *__P, __m128i __B) { __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_epi64 (__m128i *__P, __m128i __B) { *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movepi64_pi64 (__m128i __B) { return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movpi64_epi64 (__m64 __A) { return _mm_set_epi64 ((__m64)0LL, __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_epi64 (__m128i __A) { return (__m128i)__builtin_ia32_movq128 ((__v2di) __A); } /* Create a vector of zeros. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_si128 (void) { return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_pd (__m128i __A) { return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_ps (__m128i __A) { return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_epi32 (__m128d __A) { return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_pi32 (__m128d __A) { return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_ps (__m128d __A) { return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_epi32 (__m128d __A) { return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_pi32 (__m128d __A) { return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32_pd (__m64 __A) { return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_epi32 (__m128 __A) { return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttps_epi32 (__m128 __A) { return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pd (__m128 __A) { return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si32 (__m128d __A) { return __builtin_ia32_cvtsd2si ((__v2df) __A); } #ifdef __x86_64__ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si64 (__m128d __A) { return __builtin_ia32_cvtsd2si64 ((__v2df) __A); } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si64x (__m128d __A) { return __builtin_ia32_cvtsd2si64 ((__v2df) __A); } #endif extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si32 (__m128d __A) { return __builtin_ia32_cvttsd2si ((__v2df) __A); } #ifdef __x86_64__ /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si64 (__m128d __A) { return __builtin_ia32_cvttsd2si64 ((__v2df) __A); } /* Microsoft intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si64x (__m128d __A) { return __builtin_ia32_cvttsd2si64 ((__v2df) __A); } #endif extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_ss (__m128 __A, __m128d __B) { return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_sd (__m128d __A, int __B) { return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); } #ifdef __x86_64__ /* Intel intrinsic. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_sd (__m128d __A, long long __B) { return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); } /* Microsoft intrinsic. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_sd (__m128d __A, long long __B) { return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); } #endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_sd (__m128d __A, __m128 __B) { return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); } #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) { return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); } #else #define _mm_shuffle_pd(A, B, N) \ ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(N))) #endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pd (__m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pd (__m128d __A, double const *__B) { return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pd (__m128d __A, double const *__B) { return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_pd (__m128d __A) { return __builtin_ia32_movmskpd ((__v2df)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packus_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epu8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_epu16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epu8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_epu16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_madd_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_su32 (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_epu32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi16 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi32 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi64 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi16 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi32 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); } #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bsrli_si128 (__m128i __A, const int __N) { return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bslli_si128 (__m128i __A, const int __N) { return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_si128 (__m128i __A, const int __N) { return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_si128 (__m128i __A, const int __N) { return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); } #else #define _mm_bsrli_si128(A, N) \ ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) #define _mm_bslli_si128(A, N) \ ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) #define _mm_srli_si128(A, N) \ ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) #define _mm_slli_si128(A, N) \ ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi16 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi32 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi64 (__m128i __A, int __B) { return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_and_si128 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_andnot_si128 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_or_si128 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_xor_si128 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); } #ifdef __OPTIMIZE__ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi16 (__m128i const __A, int const __N) { return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) { return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); } #else #define _mm_extract_epi16(A, N) \ ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_insert_epi16(A, D, N) \ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ (int)(D), (int)(N))) #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_epu8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epu8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_epi8 (__m128i __A) { return __builtin_ia32_pmovmskb128 ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epu16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); } #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflehi_epi16 (__m128i __A, const int __mask) { return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflelo_epi16 (__m128i __A, const int __mask) { return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_epi32 (__m128i __A, const int __mask) { return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); } #else #define _mm_shufflehi_epi16(A, N) \ ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_shufflelo_epi16(A, N) \ ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_shuffle_epi32(A, N) \ ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N))) #endif extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) { __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_epu8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_epu16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sad_epu8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si32 (int *__A, int __B) { __builtin_ia32_movnti (__A, __B); } #ifdef __x86_64__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si64 (long long int *__A, long long int __B) { __builtin_ia32_movnti64 (__A, __B); } #endif extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_si128 (__m128i *__A, __m128i __B) { __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_pd (double *__A, __m128d __B) { __builtin_ia32_movntpd (__A, (__v2df)__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_clflush (void const *__A) { __builtin_ia32_clflush (__A); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_lfence (void) { __builtin_ia32_lfence (); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mfence (void) { __builtin_ia32_mfence (); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_si128 (int __A) { return _mm_set_epi32 (0, 0, 0, __A); } #ifdef __x86_64__ /* Intel intrinsic. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_si128 (long long __A) { return _mm_set_epi64x (0, __A); } /* Microsoft intrinsic. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64x_si128 (long long __A) { return _mm_set_epi64x (0, __A); } #endif /* Casts between various SP, DP, INT vector types. Note that these do no conversion of values, they just change the type. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_ps(__m128d __A) { return (__m128) __A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_si128(__m128d __A) { return (__m128i) __A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_pd(__m128 __A) { return (__m128d) __A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_si128(__m128 __A) { return (__m128i) __A; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_ps(__m128i __A) { return (__m128) __A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_pd(__m128i __A) { return (__m128d) __A; } #endif /* __SSE2__ */ #endif /* _EMMINTRIN_H_INCLUDED */ xopintrin.h 0000644 00000067427 14711270706 0006774 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _X86INTRIN_H_INCLUDED # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef _XOPMMINTRIN_H_INCLUDED #define _XOPMMINTRIN_H_INCLUDED #ifndef __XOP__ # error "XOP instruction set not enabled" #else #include <fma4intrin.h> /* Integer multiply/add intructions. */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); } /* Packed Integer Horizontal Add and Subtract */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddw_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddd_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddq_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddd_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddq_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddq_epi32(__m128i __A) { return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddw_epu8(__m128i __A) { return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddd_epu8(__m128i __A) { return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddq_epu8(__m128i __A) { return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddd_epu16(__m128i __A) { return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddq_epu16(__m128i __A) { return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_haddq_epu32(__m128i __A) { return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubw_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubd_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsubq_epi32(__m128i __A) { return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A); } /* Vector conditional move and permute */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) { return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); } /* Packed Integer Rotates and Shifts Rotates - Non-Immediate form */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rot_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rot_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rot_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rot_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B); } /* Rotates - Immediate form */ #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_roti_epi8(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_roti_epi16(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_roti_epi32(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_roti_epi64(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B); } #else #define _mm_roti_epi8(A, N) \ ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N))) #define _mm_roti_epi16(A, N) \ ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N))) #define _mm_roti_epi32(A, N) \ ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N))) #define _mm_roti_epi64(A, N) \ ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N))) #endif /* Shifts */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shl_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shl_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shl_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shl_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sha_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sha_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sha_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sha_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B); } /* Compare and Predicate Generation pcom (integer, unsinged bytes) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epu8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B); } /*pcom (integer, unsinged words) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epu16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B); } /*pcom (integer, unsinged double words) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epu32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B); } /*pcom (integer, unsinged quad words) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epu64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B); } /*pcom (integer, signed bytes) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epi8(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B); } /*pcom (integer, signed words) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epi16(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B); } /*pcom (integer, signed double words) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epi32(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B); } /*pcom (integer, signed quad words) */ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comlt_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comle_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comgt_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comge_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comeq_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comneq_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comfalse_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_comtrue_epi64(__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B); } /* FRCZ */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_frcz_ps (__m128 __A) { return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_frcz_pd (__m128d __A) { return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_frcz_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf) __builtin_ia32_vfrczss ((__v4sf)__B)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_frcz_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df) __builtin_ia32_vfrczsd ((__v2df)__B)); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_frcz_ps (__m256 __A) { return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_frcz_pd (__m256d __A) { return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A); } /* PERMIL2 */ #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I) { return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X, (__v2df)__Y, (__v2di)__C, __I); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I) { return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X, (__v4df)__Y, (__v4di)__C, __I); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I) { return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X, (__v4sf)__Y, (__v4si)__C, __I); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) { return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X, (__v8sf)__Y, (__v8si)__C, __I); } #else #define _mm_permute2_pd(X, Y, C, I) \ ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ (__v2di)(__m128d)(C), \ (int)(I))) #define _mm256_permute2_pd(X, Y, C, I) \ ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ (__v4di)(__m256d)(C), \ (int)(I))) #define _mm_permute2_ps(X, Y, C, I) \ ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), \ (__v4si)(__m128)(C), \ (int)(I))) #define _mm256_permute2_ps(X, Y, C, I) \ ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ (__v8si)(__m256)(C), \ (int)(I))) #endif /* __OPTIMIZE__ */ #endif /* __XOP__ */ #endif /* _XOPMMINTRIN_H_INCLUDED */ cross-stdarg.h 0000644 00000004776 14711270707 0007354 0 ustar 00 /* Copyright (C) 2002-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __CROSS_STDARG_H_INCLUDED #define __CROSS_STDARG_H_INCLUDED /* Make sure that for non x64 targets cross builtins are defined. */ #ifndef __x86_64__ /* Call abi ms_abi. */ #define __builtin_ms_va_list __builtin_va_list #define __builtin_ms_va_copy __builtin_va_copy #define __builtin_ms_va_start __builtin_va_start #define __builtin_ms_va_end __builtin_va_end /* Call abi sysv_abi. */ #define __builtin_sysv_va_list __builtin_va_list #define __builtin_sysv_va_copy __builtin_va_copy #define __builtin_sysv_va_start __builtin_va_start #define __builtin_sysv_va_end __builtin_va_end #endif #define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s) #define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l) #define __ms_va_arg(__v,__l) __builtin_va_arg(__v,__l) #define __ms_va_end(__v) __builtin_ms_va_end(__v) #define __sysv_va_copy(__d,__s) __builtin_sysv_va_copy(__d,__s) #define __sysv_va_start(__v,__l) __builtin_sysv_va_start(__v,__l) #define __sysv_va_arg(__v,__l) __builtin_va_arg(__v,__l) #define __sysv_va_end(__v) __builtin_sysv_va_end(__v) #ifndef __GNUC_SYSV_VA_LIST #define __GNUC_SYSV_VA_LIST typedef __builtin_sysv_va_list __gnuc_sysv_va_list; #endif #ifndef _SYSV_VA_LIST_DEFINED #define _SYSV_VA_LIST_DEFINED typedef __gnuc_sysv_va_list sysv_va_list; #endif #ifndef __GNUC_MS_VA_LIST #define __GNUC_MS_VA_LIST typedef __builtin_ms_va_list __gnuc_ms_va_list; #endif #ifndef _MS_VA_LIST_DEFINED #define _MS_VA_LIST_DEFINED typedef __gnuc_ms_va_list ms_va_list; #endif #endif /* __CROSS_STDARG_H_INCLUDED */ prfchwintrin.h 0000644 00000003051 14711270707 0007437 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED && !defined _MM3DNOW_H_INCLUDED # error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead." #endif #if !defined (__PRFCHW__) && !defined (__3dNOW__) # error "PRFCHW instruction not enabled" #endif /* __PRFCHW__ or __3dNOW__*/ #ifndef _PRFCHWINTRIN_H_INCLUDED #define _PRFCHWINTRIN_H_INCLUDED extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_prefetchw (void *__P) { __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); } #endif /* _PRFCHWINTRIN_H_INCLUDED */ pmmintrin.h 0000644 00000010216 14711270710 0006732 0 ustar 00 /* Copyright (C) 2003-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 9.0. */ #ifndef _PMMINTRIN_H_INCLUDED #define _PMMINTRIN_H_INCLUDED #ifndef __SSE3__ # error "SSE3 instruction set not enabled" #else /* We need definitions from the SSE2 and SSE header files*/ #include <emmintrin.h> /* Additional bits in the MXCSR. */ #define _MM_DENORMALS_ZERO_MASK 0x0040 #define _MM_DENORMALS_ZERO_ON 0x0040 #define _MM_DENORMALS_ZERO_OFF 0x0000 #define _MM_SET_DENORMALS_ZERO_MODE(mode) \ _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode)) #define _MM_GET_DENORMALS_ZERO_MODE() \ (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_addsub_ps (__m128 __X, __m128 __Y) { return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_ps (__m128 __X, __m128 __Y) { return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_ps (__m128 __X, __m128 __Y) { return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movehdup_ps (__m128 __X) { return (__m128) __builtin_ia32_movshdup ((__v4sf)__X); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_moveldup_ps (__m128 __X) { return (__m128) __builtin_ia32_movsldup ((__v4sf)__X); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_addsub_pd (__m128d __X, __m128d __Y) { return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_pd (__m128d __X, __m128d __Y) { return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_pd (__m128d __X, __m128d __Y) { return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loaddup_pd (double const *__P) { return _mm_load1_pd (__P); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movedup_pd (__m128d __X) { return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0)); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_lddqu_si128 (__m128i const *__P) { return (__m128i) __builtin_ia32_lddqu ((char const *)__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_monitor (void const * __P, unsigned int __E, unsigned int __H) { __builtin_ia32_monitor (__P, __E, __H); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mwait (unsigned int __E, unsigned int __H) { __builtin_ia32_mwait (__E, __H); } #endif /* __SSE3__ */ #endif /* _PMMINTRIN_H_INCLUDED */ iso646.h 0000644 00000002370 14711270710 0005751 0 ustar 00 /* Copyright (C) 1997-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* * ISO C Standard: 7.9 Alternative spellings <iso646.h> */ #ifndef _ISO646_H #define _ISO646_H #ifndef __cplusplus #define and && #define and_eq &= #define bitand & #define bitor | #define compl ~ #define not ! #define not_eq != #define or || #define or_eq |= #define xor ^ #define xor_eq ^= #endif #endif stdalign.h 0000644 00000002272 14711270710 0006525 0 ustar 00 /* Copyright (C) 2011-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* ISO C1X: 7.15 Alignment <stdalign.h>. */ #ifndef _STDALIGN_H #define _STDALIGN_H #ifndef __cplusplus #define alignas _Alignas #define alignof _Alignof #define __alignas_is_defined 1 #define __alignof_is_defined 1 #endif #endif /* stdalign.h */ adxintrin.h 0000644 00000003411 14711270711 0006715 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED # error "Never use <adxintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef _ADXINTRIN_H_INCLUDED #define _ADXINTRIN_H_INCLUDED extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _addcarryx_u32 (unsigned char __CF, unsigned int __X, unsigned int __Y, unsigned int *__P) { return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P); } #ifdef __x86_64__ extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _addcarryx_u64 (unsigned char __CF, unsigned long __X, unsigned long __Y, unsigned long long *__P) { return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P); } #endif #endif /* _ADXINTRIN_H_INCLUDED */ limits.h 0000644 00000012406 14711270711 0006222 0 ustar 00 /* Copyright (C) 1992-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* This administrivia gets added to the beginning of limits.h if the system has its own version of limits.h. */ /* We use _GCC_LIMITS_H_ because we want this not to match any macros that the system's limits.h uses for its own purposes. */ #ifndef _GCC_LIMITS_H_ /* Terminated in limity.h. */ #define _GCC_LIMITS_H_ #ifndef _LIBC_LIMITS_H_ /* Use "..." so that we find syslimits.h only in this same directory. */ #include "syslimits.h" #endif /* Copyright (C) 1991-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _LIMITS_H___ #define _LIMITS_H___ /* Number of bits in a `char'. */ #undef CHAR_BIT #define CHAR_BIT __CHAR_BIT__ /* Maximum length of a multibyte character. */ #ifndef MB_LEN_MAX #define MB_LEN_MAX 1 #endif /* Minimum and maximum values a `signed char' can hold. */ #undef SCHAR_MIN #define SCHAR_MIN (-SCHAR_MAX - 1) #undef SCHAR_MAX #define SCHAR_MAX __SCHAR_MAX__ /* Maximum value an `unsigned char' can hold. (Minimum is 0). */ #undef UCHAR_MAX #if __SCHAR_MAX__ == __INT_MAX__ # define UCHAR_MAX (SCHAR_MAX * 2U + 1U) #else # define UCHAR_MAX (SCHAR_MAX * 2 + 1) #endif /* Minimum and maximum values a `char' can hold. */ #ifdef __CHAR_UNSIGNED__ # undef CHAR_MIN # if __SCHAR_MAX__ == __INT_MAX__ # define CHAR_MIN 0U # else # define CHAR_MIN 0 # endif # undef CHAR_MAX # define CHAR_MAX UCHAR_MAX #else # undef CHAR_MIN # define CHAR_MIN SCHAR_MIN # undef CHAR_MAX # define CHAR_MAX SCHAR_MAX #endif /* Minimum and maximum values a `signed short int' can hold. */ #undef SHRT_MIN #define SHRT_MIN (-SHRT_MAX - 1) #undef SHRT_MAX #define SHRT_MAX __SHRT_MAX__ /* Maximum value an `unsigned short int' can hold. (Minimum is 0). */ #undef USHRT_MAX #if __SHRT_MAX__ == __INT_MAX__ # define USHRT_MAX (SHRT_MAX * 2U + 1U) #else # define USHRT_MAX (SHRT_MAX * 2 + 1) #endif /* Minimum and maximum values a `signed int' can hold. */ #undef INT_MIN #define INT_MIN (-INT_MAX - 1) #undef INT_MAX #define INT_MAX __INT_MAX__ /* Maximum value an `unsigned int' can hold. (Minimum is 0). */ #undef UINT_MAX #define UINT_MAX (INT_MAX * 2U + 1U) /* Minimum and maximum values a `signed long int' can hold. (Same as `int'). */ #undef LONG_MIN #define LONG_MIN (-LONG_MAX - 1L) #undef LONG_MAX #define LONG_MAX __LONG_MAX__ /* Maximum value an `unsigned long int' can hold. (Minimum is 0). */ #undef ULONG_MAX #define ULONG_MAX (LONG_MAX * 2UL + 1UL) #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* Minimum and maximum values a `signed long long int' can hold. */ # undef LLONG_MIN # define LLONG_MIN (-LLONG_MAX - 1LL) # undef LLONG_MAX # define LLONG_MAX __LONG_LONG_MAX__ /* Maximum value an `unsigned long long int' can hold. (Minimum is 0). */ # undef ULLONG_MAX # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) #endif #if defined (__GNU_LIBRARY__) ? defined (__USE_GNU) : !defined (__STRICT_ANSI__) /* Minimum and maximum values a `signed long long int' can hold. */ # undef LONG_LONG_MIN # define LONG_LONG_MIN (-LONG_LONG_MAX - 1LL) # undef LONG_LONG_MAX # define LONG_LONG_MAX __LONG_LONG_MAX__ /* Maximum value an `unsigned long long int' can hold. (Minimum is 0). */ # undef ULONG_LONG_MAX # define ULONG_LONG_MAX (LONG_LONG_MAX * 2ULL + 1ULL) #endif #endif /* _LIMITS_H___ */ /* This administrivia gets added to the end of limits.h if the system has its own version of limits.h. */ #else /* not _GCC_LIMITS_H_ */ #ifdef _GCC_NEXT_LIMITS_H #include_next <limits.h> /* recurse down to the real one */ #endif #endif /* not _GCC_LIMITS_H_ */ mm3dnow.h 0000644 00000014513 14711270711 0006306 0 ustar 00 /* Copyright (C) 2004-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the mm3dnow.h (of supposedly AMD origin) included with MSVC 7.1. */ #ifndef _MM3DNOW_H_INCLUDED #define _MM3DNOW_H_INCLUDED #ifdef __3dNOW__ #include <mmintrin.h> #include <prfchwintrin.h> extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_femms (void) { __builtin_ia32_femms(); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pavgusb (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pf2id (__m64 __A) { return (__m64)__builtin_ia32_pf2id ((__v2sf)__A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfacc (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfadd (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfcmpeq (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfcmpge (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfcmpgt (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfmax (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfmin (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfmul (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfrcp (__m64 __A) { return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfrcpit1 (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfrcpit2 (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfrsqrt (__m64 __A) { return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfrsqit1 (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfsub (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfsubr (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pi2fd (__m64 __A) { return (__m64)__builtin_ia32_pi2fd ((__v2si)__A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pmulhrw (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_prefetch (void *__P) { __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_from_float (float __A) { return __extension__ (__m64)(__v2sf){ __A, 0.0f }; } extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_to_float (__m64 __A) { union { __v2sf v; float a[2]; } __tmp; __tmp.v = (__v2sf)__A; return __tmp.a[0]; } #ifdef __3dNOW_A__ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pf2iw (__m64 __A) { return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfnacc (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pfpnacc (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pi2fw (__m64 __A) { return (__m64)__builtin_ia32_pi2fw ((__v2si)__A); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _m_pswapd (__m64 __A) { return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A); } #endif /* __3dNOW_A__ */ #endif /* __3dNOW__ */ #endif /* _MM3DNOW_H_INCLUDED */ varargs.h 0000644 00000000213 14711270712 0006360 0 ustar 00 #ifndef _VARARGS_H #define _VARARGS_H #error "GCC no longer implements <varargs.h>." #error "Revise your code to use <stdarg.h>." #endif stdbool.h 0000644 00000002636 14711270712 0006374 0 ustar 00 /* Copyright (C) 1998-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* * ISO C Standard: 7.16 Boolean type and values <stdbool.h> */ #ifndef _STDBOOL_H #define _STDBOOL_H #ifndef __cplusplus #define bool _Bool #define true 1 #define false 0 #else /* __cplusplus */ /* Supporting <stdbool.h> in C++ is a GCC extension. */ #define _Bool bool #define bool bool #define false false #define true true #endif /* __cplusplus */ /* Signal that all the definitions are present. */ #define __bool_true_false_are_defined 1 #endif /* stdbool.h */ mm_malloc.h 0000644 00000003334 14711270712 0006662 0 ustar 00 /* Copyright (C) 2004-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED #include <stdlib.h> /* We can't depend on <stdlib.h> since the prototype of posix_memalign may not be visible. */ #ifndef __cplusplus extern int posix_memalign (void **, size_t, size_t); #else extern "C" int posix_memalign (void **, size_t, size_t) throw (); #endif static __inline void * _mm_malloc (size_t size, size_t alignment) { void *ptr; if (alignment == 1) return malloc (size); if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4)) alignment = sizeof (void *); if (posix_memalign (&ptr, alignment, size) == 0) return ptr; else return NULL; } static __inline void _mm_free (void * ptr) { free (ptr); } #endif /* _MM_MALLOC_H_INCLUDED */ f16cintrin.h 0000644 00000006313 14711270712 0006705 0 ustar 00 /* Copyright (C) 2011-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED # error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead." #endif #ifndef __F16C__ # error "F16C instruction set not enabled" #else #ifndef _F16CINTRIN_H_INCLUDED #define _F16CINTRIN_H_INCLUDED extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _cvtsh_ss (unsigned short __S) { __v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 }; __v4sf __A = __builtin_ia32_vcvtph2ps (__H); return __builtin_ia32_vec_ext_v4sf (__A, 0); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtph_ps (__m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtph_ps (__m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A); } #ifdef __OPTIMIZE__ extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _cvtss_sh (float __F, const int __I) { __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_ph (__m128 __A, const int __I) { return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtps_ph (__m256 __A, const int __I) { return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I); } #else #define _cvtss_sh(__F, __I) \ (__extension__ \ ({ \ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; \ __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); \ (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); \ })) #define _mm_cvtps_ph(A, I) \ ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I))) #define _mm256_cvtps_ph(A, I) \ ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I))) #endif /* __OPTIMIZE */ #endif /* _F16CINTRIN_H_INCLUDED */ #endif /* __F16C__ */ xsaveoptintrin.h 0000644 00000003205 14711270712 0010014 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */ /* # error "Never use <xsaveoptintrin.h> directly; include <x86intrin.h> instead." */ /* #endif */ #ifndef _XSAVEOPTINTRIN_H_INCLUDED #define _XSAVEOPTINTRIN_H_INCLUDED extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xsaveopt (void *__P, long long __M) { return __builtin_ia32_xsaveopt (__P, __M); } #ifdef __x86_64__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _xsaveopt64 (void *__P, long long __M) { return __builtin_ia32_xsaveopt64 (__P, __M); } #endif #endif /* _XSAVEOPTINTRIN_H_INCLUDED */ fma4intrin.h 0000644 00000021433 14711270713 0006776 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _X86INTRIN_H_INCLUDED # error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." #endif #ifndef _FMA4INTRIN_H_INCLUDED #define _FMA4INTRIN_H_INCLUDED #ifndef __FMA4__ # error "FMA4 instruction set not enabled" #else /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */ #include <ammintrin.h> /* 128b Floating point multiply/add type instructions. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msub_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msub_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msub_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msub_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /* 256b Floating point multiply/add type instructions. */ extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); } #endif #endif quadmath_weak.h 0000644 00000006071 14711270713 0007537 0 ustar 00 /* GCC Quad-Precision Math Library Copyright (C) 2010, 2011 Free Software Foundation, Inc. Written by Tobias Burnus <burnus@net-b.de> This file is part of the libquadmath library. Libquadmath is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Libquadmath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with libquadmath; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef QUADMATH_WEAK_H #define QUADMATH_WEAK_H #include "quadmath.h" #if SUPPORTS_WEAK # define __qmath2(name,name2,type) \ static __typeof(type) name __attribute__ ((__weakref__(#name2))); # define __qmath_(name) __qmath_ ## name #else # define __qmath2(name,name2,type) # define __qmath_(name) name #endif /* __qmath_foo is a weak reference to symbol foo. */ #define __qmath3(name) __qmath2(__qmath_ ## name,name,name) /* Prototypes for real functions. */ __qmath3 (acosq) __qmath3 (acoshq) __qmath3 (asinq) __qmath3 (asinhq) __qmath3 (atanq) __qmath3 (atanhq) __qmath3 (atan2q) __qmath3 (cbrtq) __qmath3 (ceilq) __qmath3 (copysignq) __qmath3 (coshq) __qmath3 (cosq) __qmath3 (erfq) __qmath3 (erfcq) __qmath3 (expq) __qmath3 (expm1q) __qmath3 (fabsq) __qmath3 (fdimq) __qmath3 (finiteq) __qmath3 (floorq) __qmath3 (fmaq) __qmath3 (fmaxq) __qmath3 (fminq) __qmath3 (fmodq) __qmath3 (frexpq) __qmath3 (hypotq) __qmath3 (ilogbq) __qmath3 (isinfq) __qmath3 (isnanq) __qmath3 (j0q) __qmath3 (j1q) __qmath3 (jnq) __qmath3 (ldexpq) __qmath3 (lgammaq) __qmath3 (llrintq) __qmath3 (llroundq) __qmath3 (logq) __qmath3 (log10q) __qmath3 (log1pq) __qmath3 (log2q) __qmath3 (lrintq) __qmath3 (lroundq) __qmath3 (modfq) __qmath3 (nanq) __qmath3 (nearbyintq) __qmath3 (nextafterq) __qmath3 (powq) __qmath3 (remainderq) __qmath3 (remquoq) __qmath3 (rintq) __qmath3 (roundq) __qmath3 (scalblnq) __qmath3 (scalbnq) __qmath3 (signbitq) __qmath3 (sincosq) __qmath3 (sinhq) __qmath3 (sinq) __qmath3 (sqrtq) __qmath3 (tanq) __qmath3 (tanhq) __qmath3 (tgammaq) __qmath3 (truncq) __qmath3 (y0q) __qmath3 (y1q) __qmath3 (ynq) /* Prototypes for complex functions. */ __qmath3 (cabsq) __qmath3 (cargq) __qmath3 (cimagq) __qmath3 (crealq) __qmath3 (cacosq) __qmath3 (cacoshq) __qmath3 (casinq) __qmath3 (casinhq) __qmath3 (catanq) __qmath3 (catanhq) __qmath3 (ccosq) __qmath3 (ccoshq) __qmath3 (cexpq) __qmath3 (cexpiq) __qmath3 (clogq) __qmath3 (clog10q) __qmath3 (conjq) __qmath3 (cpowq) __qmath3 (cprojq) __qmath3 (csinq) __qmath3 (csinhq) __qmath3 (csqrtq) __qmath3 (ctanq) __qmath3 (ctanhq) /* Prototypes for string <-> flt128 conversion functions. */ __qmath3 (strtoflt128) __qmath3 (quadmath_snprintf) #endif ammintrin.h 0000644 00000006012 14711270713 0006715 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Implemented from the specification included in the AMD Programmers Manual Update, version 2.x */ #ifndef _AMMINTRIN_H_INCLUDED #define _AMMINTRIN_H_INCLUDED #ifndef __SSE4A__ # error "SSE4A instruction set not enabled" #else /* We need definitions from the SSE3, SSE2 and SSE header files*/ #include <pmmintrin.h> extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_sd (double * __P, __m128d __Y) { __builtin_ia32_movntsd (__P, (__v2df) __Y); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_stream_ss (float * __P, __m128 __Y) { __builtin_ia32_movntss (__P, (__v4sf) __Y); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_si64 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y); } #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L) { return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L); } #else #define _mm_extracti_si64(X, I, L) \ ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), \ (unsigned int)(I), (unsigned int)(L))) #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_si64 (__m128i __X,__m128i __Y) { return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y); } #ifdef __OPTIMIZE__ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L) { return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L); } #else #define _mm_inserti_si64(X, Y, I, L) \ ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), \ (unsigned int)(I), (unsigned int)(L))) #endif #endif /* __SSE4A__ */ #endif /* _AMMINTRIN_H_INCLUDED */ stdnoreturn.h 0000644 00000002160 14711270714 0007307 0 ustar 00 /* Copyright (C) 2011-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* ISO C1X: 7.23 _Noreturn <stdnoreturn.h>. */ #ifndef _STDNORETURN_H #define _STDNORETURN_H #ifndef __cplusplus #define noreturn _Noreturn #endif #endif /* stdnoreturn.h */ lwpintrin.h 0000644 00000006214 14711270714 0006752 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _X86INTRIN_H_INCLUDED # error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead." #endif #ifndef _LWPINTRIN_H_INCLUDED #define _LWPINTRIN_H_INCLUDED #ifndef __LWP__ # error "LWP instruction set not enabled" #else extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __llwpcb (void *pcbAddress) { __builtin_ia32_llwpcb (pcbAddress); } extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __slwpcb (void) { return __builtin_ia32_slwpcb (); } #ifdef __OPTIMIZE__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags) { __builtin_ia32_lwpval32 (data2, data1, flags); } #ifdef __x86_64__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags) { __builtin_ia32_lwpval64 (data2, data1, flags); } #endif #else #define __lwpval32(D2, D1, F) \ (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), \ (unsigned int) (F))) #ifdef __x86_64__ #define __lwpval64(D2, D1, F) \ (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), \ (unsigned int) (F))) #endif #endif #ifdef __OPTIMIZE__ extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags) { return __builtin_ia32_lwpins32 (data2, data1, flags); } #ifdef __x86_64__ extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags) { return __builtin_ia32_lwpins64 (data2, data1, flags); } #endif #else #define __lwpins32(D2, D1, F) \ (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), \ (unsigned int) (F))) #ifdef __x86_64__ #define __lwpins64(D2, D1, F) \ (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), \ (unsigned int) (F))) #endif #endif #endif /* __LWP__ */ #endif /* _LWPINTRIN_H_INCLUDED */ stdint.h 0000644 00000000510 14711270715 0006223 0 ustar 00 #ifndef _GCC_WRAP_STDINT_H #if __STDC_HOSTED__ # if defined __cplusplus && __cplusplus >= 201103L # undef __STDC_LIMIT_MACROS # define __STDC_LIMIT_MACROS # undef __STDC_CONSTANT_MACROS # define __STDC_CONSTANT_MACROS # endif # include_next <stdint.h> #else # include "stdint-gcc.h" #endif #define _GCC_WRAP_STDINT_H #endif bmi2intrin.h 0000644 00000006263 14711270715 0007006 0 ustar 00 /* Copyright (C) 2011-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED # error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead." #endif #ifndef __BMI2__ # error "BMI2 instruction set not enabled" #endif /* __BMI2__ */ #ifndef _BMI2INTRIN_H_INCLUDED #define _BMI2INTRIN_H_INCLUDED extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bzhi_u32 (unsigned int __X, unsigned int __Y) { return __builtin_ia32_bzhi_si (__X, __Y); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pdep_u32 (unsigned int __X, unsigned int __Y) { return __builtin_ia32_pdep_si (__X, __Y); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pext_u32 (unsigned int __X, unsigned int __Y) { return __builtin_ia32_pext_si (__X, __Y); } #ifdef __x86_64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _bzhi_u64 (unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bzhi_di (__X, __Y); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pdep_u64 (unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pdep_di (__X, __Y); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pext_u64 (unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pext_di (__X, __Y); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mulx_u64 (unsigned long long __X, unsigned long long __Y, unsigned long long *__P) { unsigned __int128 __res = (unsigned __int128) __X * __Y; *__P = (unsigned long long) (__res >> 64); return (unsigned long long) __res; } #else /* !__x86_64__ */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) { unsigned long long __res = (unsigned long long) __X * __Y; *__P = (unsigned int) (__res >> 32); return (unsigned int) __res; } #endif /* !__x86_64__ */ #endif /* _BMI2INTRIN_H_INCLUDED */ fxsrintrin.h 0000644 00000003577 14711270715 0007144 0 ustar 00 /* Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED */ /* # error "Never use <fxsrintrin.h> directly; include <x86intrin.h> instead." */ /* #endif */ #ifndef _FXSRINTRIN_H_INCLUDED #define _FXSRINTRIN_H_INCLUDED extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _fxsave (void *__P) { return __builtin_ia32_fxsave (__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _fxrstor (void *__P) { return __builtin_ia32_fxrstor (__P); } #ifdef __x86_64__ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _fxsave64 (void *__P) { return __builtin_ia32_fxsave64 (__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _fxrstor64 (void *__P) { return __builtin_ia32_fxrstor64 (__P); } #endif #endif /* _FXSRINTRIN_H_INCLUDED */ cpuid.h 0000644 00000017774 14711270715 0006046 0 ustar 00 /* * Copyright (C) 2007-2013 Free Software Foundation, Inc. * * This file is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 3, or (at your option) any * later version. * * This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * Under Section 7 of GPL version 3, you are granted additional * permissions described in the GCC Runtime Library Exception, version * 3.1, as published by the Free Software Foundation. * * You should have received a copy of the GNU General Public License and * a copy of the GCC Runtime Library Exception along with this program; * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see * <http://www.gnu.org/licenses/>. */ /* %ecx */ #define bit_SSE3 (1 << 0) #define bit_PCLMUL (1 << 1) #define bit_LZCNT (1 << 5) #define bit_SSSE3 (1 << 9) #define bit_FMA (1 << 12) #define bit_CMPXCHG16B (1 << 13) #define bit_SSE4_1 (1 << 19) #define bit_SSE4_2 (1 << 20) #define bit_MOVBE (1 << 22) #define bit_POPCNT (1 << 23) #define bit_AES (1 << 25) #define bit_XSAVE (1 << 26) #define bit_OSXSAVE (1 << 27) #define bit_AVX (1 << 28) #define bit_F16C (1 << 29) #define bit_RDRND (1 << 30) /* %edx */ #define bit_CMPXCHG8B (1 << 8) #define bit_CMOV (1 << 15) #define bit_MMX (1 << 23) #define bit_FXSAVE (1 << 24) #define bit_SSE (1 << 25) #define bit_SSE2 (1 << 26) /* Extended Features */ /* %ecx */ #define bit_LAHF_LM (1 << 0) #define bit_ABM (1 << 5) #define bit_SSE4a (1 << 6) #define bit_PRFCHW (1 << 8) #define bit_XOP (1 << 11) #define bit_LWP (1 << 15) #define bit_FMA4 (1 << 16) #define bit_TBM (1 << 21) /* %edx */ #define bit_MMXEXT (1 << 22) #define bit_LM (1 << 29) #define bit_3DNOWP (1 << 30) #define bit_3DNOW (1 << 31) /* Extended Features (%eax == 7) */ #define bit_FSGSBASE (1 << 0) #define bit_BMI (1 << 3) #define bit_HLE (1 << 4) #define bit_AVX2 (1 << 5) #define bit_BMI2 (1 << 8) #define bit_RTM (1 << 11) #define bit_RDSEED (1 << 18) #define bit_ADX (1 << 19) /* %ecx */ #define bit_PKU (1 << 3) #define bit_OSPKE (1 << 4) /* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */ #define bit_XSAVEOPT (1 << 0) /* Signatures for different CPU implementations as returned in uses of cpuid with level 0. */ #define signature_AMD_ebx 0x68747541 #define signature_AMD_ecx 0x444d4163 #define signature_AMD_edx 0x69746e65 #define signature_CENTAUR_ebx 0x746e6543 #define signature_CENTAUR_ecx 0x736c7561 #define signature_CENTAUR_edx 0x48727561 #define signature_CYRIX_ebx 0x69727943 #define signature_CYRIX_ecx 0x64616574 #define signature_CYRIX_edx 0x736e4978 #define signature_INTEL_ebx 0x756e6547 #define signature_INTEL_ecx 0x6c65746e #define signature_INTEL_edx 0x49656e69 #define signature_TM1_ebx 0x6e617254 #define signature_TM1_ecx 0x55504361 #define signature_TM1_edx 0x74656d73 #define signature_TM2_ebx 0x756e6547 #define signature_TM2_ecx 0x3638784d #define signature_TM2_edx 0x54656e69 #define signature_NSC_ebx 0x646f6547 #define signature_NSC_ecx 0x43534e20 #define signature_NSC_edx 0x79622065 #define signature_NEXGEN_ebx 0x4778654e #define signature_NEXGEN_ecx 0x6e657669 #define signature_NEXGEN_edx 0x72446e65 #define signature_RISE_ebx 0x65736952 #define signature_RISE_ecx 0x65736952 #define signature_RISE_edx 0x65736952 #define signature_SIS_ebx 0x20536953 #define signature_SIS_ecx 0x20536953 #define signature_SIS_edx 0x20536953 #define signature_UMC_ebx 0x20434d55 #define signature_UMC_ecx 0x20434d55 #define signature_UMC_edx 0x20434d55 #define signature_VIA_ebx 0x20414956 #define signature_VIA_ecx 0x20414956 #define signature_VIA_edx 0x20414956 #define signature_VORTEX_ebx 0x74726f56 #define signature_VORTEX_ecx 0x436f5320 #define signature_VORTEX_edx 0x36387865 #if defined(__i386__) && defined(__PIC__) /* %ebx may be the PIC register. */ #if __GNUC__ >= 3 #define __cpuid(level, a, b, c, d) \ __asm__ ("xchg{l}\t{%%}ebx, %k1\n\t" \ "cpuid\n\t" \ "xchg{l}\t{%%}ebx, %k1\n\t" \ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \ : "0" (level)) #define __cpuid_count(level, count, a, b, c, d) \ __asm__ ("xchg{l}\t{%%}ebx, %k1\n\t" \ "cpuid\n\t" \ "xchg{l}\t{%%}ebx, %k1\n\t" \ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \ : "0" (level), "2" (count)) #else /* Host GCCs older than 3.0 weren't supporting Intel asm syntax nor alternatives in i386 code. */ #define __cpuid(level, a, b, c, d) \ __asm__ ("xchgl\t%%ebx, %k1\n\t" \ "cpuid\n\t" \ "xchgl\t%%ebx, %k1\n\t" \ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \ : "0" (level)) #define __cpuid_count(level, count, a, b, c, d) \ __asm__ ("xchgl\t%%ebx, %k1\n\t" \ "cpuid\n\t" \ "xchgl\t%%ebx, %k1\n\t" \ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \ : "0" (level), "2" (count)) #endif #elif defined(__x86_64__) && (defined(__code_model_medium__) || defined(__code_model_large__)) && defined(__PIC__) /* %rbx may be the PIC register. */ #define __cpuid(level, a, b, c, d) \ __asm__ ("xchg{q}\t{%%}rbx, %q1\n\t" \ "cpuid\n\t" \ "xchg{q}\t{%%}rbx, %q1\n\t" \ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \ : "0" (level)) #define __cpuid_count(level, count, a, b, c, d) \ __asm__ ("xchg{q}\t{%%}rbx, %q1\n\t" \ "cpuid\n\t" \ "xchg{q}\t{%%}rbx, %q1\n\t" \ : "=a" (a), "=&r" (b), "=c" (c), "=d" (d) \ : "0" (level), "2" (count)) #else #define __cpuid(level, a, b, c, d) \ __asm__ ("cpuid\n\t" \ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ : "0" (level)) #define __cpuid_count(level, count, a, b, c, d) \ __asm__ ("cpuid\n\t" \ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ : "0" (level), "2" (count)) #endif /* Return highest supported input value for cpuid instruction. ext can be either 0x0 or 0x8000000 to return highest supported value for basic or extended cpuid information. Function returns 0 if cpuid is not supported or whatever cpuid returns in eax register. If sig pointer is non-null, then first four bytes of the signature (as found in ebx register) are returned in location pointed by sig. */ static __inline unsigned int __get_cpuid_max (unsigned int __ext, unsigned int *__sig) { unsigned int __eax, __ebx, __ecx, __edx; #ifndef __x86_64__ /* See if we can use cpuid. On AMD64 we always can. */ #if __GNUC__ >= 3 __asm__ ("pushf{l|d}\n\t" "pushf{l|d}\n\t" "pop{l}\t%0\n\t" "mov{l}\t{%0, %1|%1, %0}\n\t" "xor{l}\t{%2, %0|%0, %2}\n\t" "push{l}\t%0\n\t" "popf{l|d}\n\t" "pushf{l|d}\n\t" "pop{l}\t%0\n\t" "popf{l|d}\n\t" : "=&r" (__eax), "=&r" (__ebx) : "i" (0x00200000)); #else /* Host GCCs older than 3.0 weren't supporting Intel asm syntax nor alternatives in i386 code. */ __asm__ ("pushfl\n\t" "pushfl\n\t" "popl\t%0\n\t" "movl\t%0, %1\n\t" "xorl\t%2, %0\n\t" "pushl\t%0\n\t" "popfl\n\t" "pushfl\n\t" "popl\t%0\n\t" "popfl\n\t" : "=&r" (__eax), "=&r" (__ebx) : "i" (0x00200000)); #endif if (!((__eax ^ __ebx) & 0x00200000)) return 0; #endif /* Host supports cpuid. Return highest supported cpuid input value. */ __cpuid (__ext, __eax, __ebx, __ecx, __edx); if (__sig) *__sig = __ebx; return __eax; } /* Return cpuid data for requested cpuid level, as found in returned eax, ebx, ecx and edx registers. The function checks if cpuid is supported and returns 1 for valid cpuid information or 0 for unsupported cpuid level. All pointers are required to be non-null. */ static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { unsigned int __ext = __level & 0x80000000; if (__get_cpuid_max (__ext, 0) < __level) return 0; __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx); return 1; } quadmath.h 0000644 00000021506 14711270715 0006532 0 ustar 00 /* GCC Quad-Precision Math Library Copyright (C) 2010, 2011 Free Software Foundation, Inc. Written by Francois-Xavier Coudert <fxcoudert@gcc.gnu.org> This file is part of the libquadmath library. Libquadmath is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Libquadmath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with libquadmath; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef QUADMATH_H #define QUADMATH_H #include <stdlib.h> #ifdef __cplusplus extern "C" { #endif /* Define the complex type corresponding to __float128 ("_Complex __float128" is not allowed) */ typedef _Complex float __attribute__((mode(TC))) __complex128; #ifdef __cplusplus # define __quadmath_throw throw () # define __quadmath_nth(fct) fct throw () #else # define __quadmath_throw __attribute__((__nothrow__)) # define __quadmath_nth(fct) __attribute__((__nothrow__)) fct #endif /* Prototypes for real functions */ extern __float128 acosq (__float128) __quadmath_throw; extern __float128 acoshq (__float128) __quadmath_throw; extern __float128 asinq (__float128) __quadmath_throw; extern __float128 asinhq (__float128) __quadmath_throw; extern __float128 atanq (__float128) __quadmath_throw; extern __float128 atanhq (__float128) __quadmath_throw; extern __float128 atan2q (__float128, __float128) __quadmath_throw; extern __float128 cbrtq (__float128) __quadmath_throw; extern __float128 ceilq (__float128) __quadmath_throw; extern __float128 copysignq (__float128, __float128) __quadmath_throw; extern __float128 coshq (__float128) __quadmath_throw; extern __float128 cosq (__float128) __quadmath_throw; extern __float128 erfq (__float128) __quadmath_throw; extern __float128 erfcq (__float128) __quadmath_throw; extern __float128 expq (__float128) __quadmath_throw; extern __float128 expm1q (__float128) __quadmath_throw; extern __float128 fabsq (__float128) __quadmath_throw; extern __float128 fdimq (__float128, __float128) __quadmath_throw; extern int finiteq (__float128) __quadmath_throw; extern __float128 floorq (__float128) __quadmath_throw; extern __float128 fmaq (__float128, __float128, __float128) __quadmath_throw; extern __float128 fmaxq (__float128, __float128) __quadmath_throw; extern __float128 fminq (__float128, __float128) __quadmath_throw; extern __float128 fmodq (__float128, __float128) __quadmath_throw; extern __float128 frexpq (__float128, int *) __quadmath_throw; extern __float128 hypotq (__float128, __float128) __quadmath_throw; extern int isinfq (__float128) __quadmath_throw; extern int ilogbq (__float128) __quadmath_throw; extern int isnanq (__float128) __quadmath_throw; extern __float128 j0q (__float128) __quadmath_throw; extern __float128 j1q (__float128) __quadmath_throw; extern __float128 jnq (int, __float128) __quadmath_throw; extern __float128 ldexpq (__float128, int) __quadmath_throw; extern __float128 lgammaq (__float128) __quadmath_throw; extern long long int llrintq (__float128) __quadmath_throw; extern long long int llroundq (__float128) __quadmath_throw; extern __float128 logq (__float128) __quadmath_throw; extern __float128 log10q (__float128) __quadmath_throw; extern __float128 log2q (__float128) __quadmath_throw; extern __float128 log1pq (__float128) __quadmath_throw; extern long int lrintq (__float128) __quadmath_throw; extern long int lroundq (__float128) __quadmath_throw; extern __float128 modfq (__float128, __float128 *) __quadmath_throw; extern __float128 nanq (const char *) __quadmath_throw; extern __float128 nearbyintq (__float128) __quadmath_throw; extern __float128 nextafterq (__float128, __float128) __quadmath_throw; extern __float128 powq (__float128, __float128) __quadmath_throw; extern __float128 remainderq (__float128, __float128) __quadmath_throw; extern __float128 remquoq (__float128, __float128, int *) __quadmath_throw; extern __float128 rintq (__float128) __quadmath_throw; extern __float128 roundq (__float128) __quadmath_throw; extern __float128 scalblnq (__float128, long int) __quadmath_throw; extern __float128 scalbnq (__float128, int) __quadmath_throw; extern int signbitq (__float128) __quadmath_throw; extern void sincosq (__float128, __float128 *, __float128 *) __quadmath_throw; extern __float128 sinhq (__float128) __quadmath_throw; extern __float128 sinq (__float128) __quadmath_throw; extern __float128 sqrtq (__float128) __quadmath_throw; extern __float128 tanq (__float128) __quadmath_throw; extern __float128 tanhq (__float128) __quadmath_throw; extern __float128 tgammaq (__float128) __quadmath_throw; extern __float128 truncq (__float128) __quadmath_throw; extern __float128 y0q (__float128) __quadmath_throw; extern __float128 y1q (__float128) __quadmath_throw; extern __float128 ynq (int, __float128) __quadmath_throw; /* Prototypes for complex functions */ extern __float128 cabsq (__complex128) __quadmath_throw; extern __float128 cargq (__complex128) __quadmath_throw; extern __float128 cimagq (__complex128) __quadmath_throw; extern __float128 crealq (__complex128) __quadmath_throw; extern __complex128 cacosq (__complex128) __quadmath_throw; extern __complex128 cacoshq (__complex128) __quadmath_throw; extern __complex128 casinq (__complex128) __quadmath_throw; extern __complex128 casinhq (__complex128) __quadmath_throw; extern __complex128 catanq (__complex128) __quadmath_throw; extern __complex128 catanhq (__complex128) __quadmath_throw; extern __complex128 ccosq (__complex128) __quadmath_throw; extern __complex128 ccoshq (__complex128) __quadmath_throw; extern __complex128 cexpq (__complex128) __quadmath_throw; extern __complex128 cexpiq (__float128) __quadmath_throw; extern __complex128 clogq (__complex128) __quadmath_throw; extern __complex128 clog10q (__complex128) __quadmath_throw; extern __complex128 conjq (__complex128) __quadmath_throw; extern __complex128 cpowq (__complex128, __complex128) __quadmath_throw; extern __complex128 cprojq (__complex128) __quadmath_throw; extern __complex128 csinq (__complex128) __quadmath_throw; extern __complex128 csinhq (__complex128) __quadmath_throw; extern __complex128 csqrtq (__complex128) __quadmath_throw; extern __complex128 ctanq (__complex128) __quadmath_throw; extern __complex128 ctanhq (__complex128) __quadmath_throw; /* Prototypes for string <-> __float128 conversion functions */ extern __float128 strtoflt128 (const char *, char **) __quadmath_throw; extern int quadmath_snprintf (char *str, size_t size, const char *format, ...) __quadmath_throw; /* Macros */ #define FLT128_MAX 1.18973149535723176508575932662800702e4932Q #define FLT128_MIN 3.36210314311209350626267781732175260e-4932Q #define FLT128_EPSILON 1.92592994438723585305597794258492732e-34Q #define FLT128_DENORM_MIN 6.475175119438025110924438958227646552e-4966Q #define FLT128_MANT_DIG 113 #define FLT128_MIN_EXP (-16381) #define FLT128_MAX_EXP 16384 #define FLT128_DIG 33 #define FLT128_MIN_10_EXP (-4931) #define FLT128_MAX_10_EXP 4932 #define HUGE_VALQ __builtin_huge_valq() /* The following alternative is valid, but brings the warning: (floating constant exceeds range of ‘__float128’) */ /* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */ #define M_Eq 2.7182818284590452353602874713526625Q /* e */ #define M_LOG2Eq 1.4426950408889634073599246810018921Q /* log_2 e */ #define M_LOG10Eq 0.4342944819032518276511289189166051Q /* log_10 e */ #define M_LN2q 0.6931471805599453094172321214581766Q /* log_e 2 */ #define M_LN10q 2.3025850929940456840179914546843642Q /* log_e 10 */ #define M_PIq 3.1415926535897932384626433832795029Q /* pi */ #define M_PI_2q 1.5707963267948966192313216916397514Q /* pi/2 */ #define M_PI_4q 0.7853981633974483096156608458198757Q /* pi/4 */ #define M_1_PIq 0.3183098861837906715377675267450287Q /* 1/pi */ #define M_2_PIq 0.6366197723675813430755350534900574Q /* 2/pi */ #define M_2_SQRTPIq 1.1283791670955125738961589031215452Q /* 2/sqrt(pi) */ #define M_SQRT2q 1.4142135623730950488016887242096981Q /* sqrt(2) */ #define M_SQRT1_2q 0.7071067811865475244008443621048490Q /* 1/sqrt(2) */ #define __quadmath_extern_inline \ extern inline __attribute__ ((__gnu_inline__)) __quadmath_extern_inline __float128 __quadmath_nth (cimagq (__complex128 __z)) { return __imag__ __z; } __quadmath_extern_inline __float128 __quadmath_nth (crealq (__complex128 __z)) { return __real__ __z; } __quadmath_extern_inline __complex128 __quadmath_nth (conjq (__complex128 __z)) { return __extension__ ~__z; } #ifdef __cplusplus } #endif #endif bmmintrin.h 0000644 00000002202 14711270715 0006715 0 ustar 00 /* Copyright (C) 2007-2013 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _BMMINTRIN_H_INCLUDED #define _BMMINTRIN_H_INCLUDED # error "SSE5 instruction set removed from compiler" #endif /* _BMMINTRIN_H_INCLUDED */
| ver. 1.4 |
Github
|
.
| PHP 7.4.33 | Генерация страницы: 0.01 |
proxy
|
phpinfo
|
Настройка