Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_atomic.c
1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  * $Revision: 42582 $
4  * $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp_atomic.h"
38 #include "kmp.h" // TRUE, asm routines prototypes
39 
40 typedef unsigned char uchar;
41 typedef unsigned short ushort;
42 
560 /*
561  * Global vars
562  */
563 
564 #ifndef KMP_GOMP_COMPAT
565 int __kmp_atomic_mode = 1; // Intel perf
566 #else
567 int __kmp_atomic_mode = 2; // GOMP compatibility
568 #endif /* KMP_GOMP_COMPAT */
569 
570 KMP_ALIGN(128)
571 
572 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
573 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
574 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
575 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
576 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
577 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
578 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
579 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
580 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
581 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
582 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
583 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
584 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
585 
586 
587 /*
588  2007-03-02:
589  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
590  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
591  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
592  routines in assembler language.
593 */
594 #define KMP_ATOMIC_VOLATILE volatile
595 
596 #if ( KMP_ARCH_X86 )
597 
598  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
599  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
600  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
601  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
602  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
603  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
604 
605  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
606  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
607  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
608  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
609  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
610  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
611 
612  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
613  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
614  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
615  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
616 
617  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
618  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
619  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
620  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
621 
622 #endif
623 
624 /* ------------------------------------------------------------------------ */
625 /* ATOMIC implementation routines */
626 /* one routine for each operation and operand type */
627 /* ------------------------------------------------------------------------ */
628 
629 // All routines declarations looks like
630 // void __kmpc_atomic_RTYPE_OP( ident_t*, int*, TYPE *lhs, TYPE rhs );
631 // ------------------------------------------------------------------------
632 
633 #define KMP_CHECK_GTID \
634  if ( gtid == KMP_GTID_UNKNOWN ) { \
635  gtid = __kmp_entry_gtid(); \
636  } // check and get gtid when needed
637 
638 // Beginning of a definition (provides name, parameters, gebug trace)
639 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
640 // OP_ID - operation identifier (add, sub, mul, ...)
641 // TYPE - operands' type
642 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
643 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
644 { \
645  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
646  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
647 
648 // ------------------------------------------------------------------------
649 // Lock variables used for critical sections for various size operands
650 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
651 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
652 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
653 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
654 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
655 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
656 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
657 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
658 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
659 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
660 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
661 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
662 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
663 
664 // ------------------------------------------------------------------------
665 // Operation on *lhs, rhs bound by critical section
666 // OP - operator (it's supposed to contain an assignment)
667 // LCK_ID - lock identifier
668 // Note: don't check gtid as it should always be valid
669 // 1, 2-byte - expect valid parameter, other - check before this macro
670 #define OP_CRITICAL(OP,LCK_ID) \
671  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
672  \
673  (*lhs) OP (rhs); \
674  \
675  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
676 
677 // ------------------------------------------------------------------------
678 // For GNU compatibility, we may need to use a critical section,
679 // even though it is not required by the ISA.
680 //
681 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
682 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
683 // critical section. On Intel(R) 64, all atomic operations are done with fetch
684 // and add or compare and exchange. Therefore, the FLAG parameter to this
685 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
686 // require a critical section, where we predict that they will be implemented
687 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
688 //
689 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
690 // the FLAG parameter should always be 1. If we know that we will be using
691 // a critical section, then we want to make certain that we use the generic
692 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
693 // locks that are specialized based upon the size or type of the data.
694 //
695 // If FLAG is 0, then we are relying on dead code elimination by the build
696 // compiler to get rid of the useless block of code, and save a needless
697 // branch at runtime.
698 //
699 
700 #ifdef KMP_GOMP_COMPAT
701 # define OP_GOMP_CRITICAL(OP,FLAG) \
702  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
703  KMP_CHECK_GTID; \
704  OP_CRITICAL( OP, 0 ); \
705  return; \
706  }
707 # else
708 # define OP_GOMP_CRITICAL(OP,FLAG)
709 #endif /* KMP_GOMP_COMPAT */
710 
711 #if KMP_MIC
712 # define KMP_DO_PAUSE _mm_delay_32( 30 )
713 #else
714 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
715 #endif /* KMP_MIC */
716 
717 // ------------------------------------------------------------------------
718 // Operation on *lhs, rhs using "compare_and_store" routine
719 // TYPE - operands' type
720 // BITS - size in bits, used to distinguish low level calls
721 // OP - operator
722 // Note: temp_val introduced in order to force the compiler to read
723 // *lhs only once (w/o it the compiler reads *lhs twice)
724 #define OP_CMPXCHG(TYPE,BITS,OP) \
725  { \
726  TYPE KMP_ATOMIC_VOLATILE temp_val; \
727  TYPE old_value, new_value; \
728  temp_val = *lhs; \
729  old_value = temp_val; \
730  new_value = old_value OP rhs; \
731  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
732  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
733  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
734  { \
735  KMP_DO_PAUSE; \
736  \
737  temp_val = *lhs; \
738  old_value = temp_val; \
739  new_value = old_value OP rhs; \
740  } \
741  }
742 
743 // 2007-06-25:
744 // workaround for C78287 (complex(kind=4) data type)
745 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
746 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
747 // This is a problem of the compiler.
748 // Related tracker is C76005, targeted to 11.0.
749 // I verified the asm of the workaround.
750 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
751  { \
752  char anonym[ ( sizeof( TYPE ) == sizeof( kmp_int##BITS ) ) ? ( 1 ) : ( 0 ) ] = { 1 }; \
753  struct _sss { \
754  TYPE cmp; \
755  kmp_int##BITS *vvv; \
756  }; \
757  struct _sss old_value, new_value; \
758  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
759  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
760  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
761  new_value.cmp = old_value.cmp OP rhs; \
762  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
763  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
764  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
765  { \
766  KMP_DO_PAUSE; \
767  \
768  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
769  new_value.cmp = old_value.cmp OP rhs; \
770  } \
771  }
772 // end of the first part of the workaround for C78287
773 
774 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
775 
776 // ------------------------------------------------------------------------
777 // X86 or X86_64: no alignment problems ====================================
778 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
779 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
780  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
781  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
782  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
783 }
784 // -------------------------------------------------------------------------
785 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
786 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
787  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
788  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
789  KMP_TEST_THEN_ADD_REAL##BITS( lhs, OP rhs ); \
790 }
791 // -------------------------------------------------------------------------
792 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
793 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
794  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
795  OP_CMPXCHG(TYPE,BITS,OP) \
796 }
797 // -------------------------------------------------------------------------
798 // workaround for C78287 (complex(kind=4) data type)
799 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
800 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
801  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
802  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
803 }
804 // end of the second part of the workaround for C78287
805 
806 #else
807 // -------------------------------------------------------------------------
808 // Code for other architectures that don't handle unaligned accesses.
809 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
810 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
811  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
812  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
813  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
814  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
815  } else { \
816  KMP_CHECK_GTID; \
817  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
818  } \
819 }
820 // -------------------------------------------------------------------------
821 #define ATOMIC_FLOAT_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
822 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
823  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
824  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
825  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
826  } else { \
827  KMP_CHECK_GTID; \
828  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
829  } \
830 }
831 // -------------------------------------------------------------------------
832 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
833 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
834  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
835  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
836  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
837  } else { \
838  KMP_CHECK_GTID; \
839  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
840  } \
841 }
842 // -------------------------------------------------------------------------
843 // workaround for C78287 (complex(kind=4) data type)
844 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
845 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
846  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
847  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
848  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
849  } else { \
850  KMP_CHECK_GTID; \
851  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
852  } \
853 }
854 // end of the second part of the workaround for C78287
855 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
856 
857 // Routines for ATOMIC 4-byte operands addition and subtraction
858 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
859 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
860 
861 #if KMP_MIC
862 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
863 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
864 #else
865 ATOMIC_FLOAT_ADD( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
866 ATOMIC_FLOAT_ADD( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
867 #endif // KMP_MIC
868 
869 // Routines for ATOMIC 8-byte operands addition and subtraction
870 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
871 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
872 
873 #if KMP_MIC
874 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
875 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
876 #else
877 ATOMIC_FLOAT_ADD( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
878 ATOMIC_FLOAT_ADD( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
879 #endif // KMP_MIC
880 
881 // ------------------------------------------------------------------------
882 // Entries definition for integer operands
883 // TYPE_ID - operands type and size (fixed4, float4)
884 // OP_ID - operation identifier (add, sub, mul, ...)
885 // TYPE - operand type
886 // BITS - size in bits, used to distinguish low level calls
887 // OP - operator (used in critical section)
888 // LCK_ID - lock identifier, used to possibly distinguish lock variable
889 // MASK - used for alignment check
890 
891 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
892 // ------------------------------------------------------------------------
893 // Routines for ATOMIC integer operands, other operators
894 // ------------------------------------------------------------------------
895 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
896 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
897 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
898 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
899 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
900 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
901 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
902 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
903 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
904 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
905 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
906 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
907 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
908 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
909 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
910 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
911 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
912 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
913 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
914 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
915 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
916 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
917 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
918 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
919 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
920 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
921 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
922 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
923 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
924 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
925 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
926 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
927 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
928 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
929 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
930 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
931 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
932 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
933 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
934 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
935 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
936 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
937 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
938 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
939 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
940 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
941 
942 
943 /* ------------------------------------------------------------------------ */
944 /* Routines for C/C++ Reduction operators && and || */
945 /* ------------------------------------------------------------------------ */
946 
947 // ------------------------------------------------------------------------
948 // Need separate macros for &&, || because there is no combined assignment
949 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
950 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
951 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
952  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
953  OP_CRITICAL( = *lhs OP, LCK_ID ) \
954 }
955 
956 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
957 
958 // ------------------------------------------------------------------------
959 // X86 or X86_64: no alignment problems ===================================
960 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
961 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
962  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
963  OP_CMPXCHG(TYPE,BITS,OP) \
964 }
965 
966 #else
967 // ------------------------------------------------------------------------
968 // Code for other architectures that don't handle unaligned accesses.
969 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
970 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
971  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
972  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
973  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
974  } else { \
975  KMP_CHECK_GTID; \
976  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
977  } \
978 }
979 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
980 
981 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
982 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
983 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
984 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
985 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
986 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
987 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
988 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
989 
990 
991 /* ------------------------------------------------------------------------- */
992 /* Routines for Fortran operators that matched no one in C: */
993 /* MAX, MIN, .EQV., .NEQV. */
994 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
995 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
996 /* ------------------------------------------------------------------------- */
997 
998 // -------------------------------------------------------------------------
999 // MIN and MAX need separate macros
1000 // OP - operator to check if we need any actions?
1001 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
1002  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1003  \
1004  if ( *lhs OP rhs ) { /* still need actions? */ \
1005  *lhs = rhs; \
1006  } \
1007  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1008 
1009 // -------------------------------------------------------------------------
1010 #ifdef KMP_GOMP_COMPAT
1011 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
1012  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
1013  KMP_CHECK_GTID; \
1014  MIN_MAX_CRITSECT( OP, 0 ); \
1015  return; \
1016  }
1017 #else
1018 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
1019 #endif /* KMP_GOMP_COMPAT */
1020 
1021 // -------------------------------------------------------------------------
1022 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1023  { \
1024  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1025  TYPE old_value; \
1026  temp_val = *lhs; \
1027  old_value = temp_val; \
1028  while ( old_value OP rhs && /* still need actions? */ \
1029  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1030  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1031  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
1032  { \
1033  KMP_CPU_PAUSE(); \
1034  temp_val = *lhs; \
1035  old_value = temp_val; \
1036  } \
1037  }
1038 
1039 // -------------------------------------------------------------------------
1040 // 1-byte, 2-byte operands - use critical section
1041 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1042 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1043  if ( *lhs OP rhs ) { /* need actions? */ \
1044  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1045  MIN_MAX_CRITSECT(OP,LCK_ID) \
1046  } \
1047 }
1048 
1049 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1050 
1051 // -------------------------------------------------------------------------
1052 // X86 or X86_64: no alignment problems ====================================
1053 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1054 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1055  if ( *lhs OP rhs ) { \
1056  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1057  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1058  } \
1059 }
1060 
1061 #else
1062 // -------------------------------------------------------------------------
1063 // Code for other architectures that don't handle unaligned accesses.
1064 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1065 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1066  if ( *lhs OP rhs ) { \
1067  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1068  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1069  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1070  } else { \
1071  KMP_CHECK_GTID; \
1072  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1073  } \
1074  } \
1075 }
1076 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1077 
1078 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1079 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1080 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1081 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1082 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1083 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1084 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1085 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1086 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1087 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1088 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1089 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1090 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1091 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1092 #if ( KMP_ARCH_X86 )
1093  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1094  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1095 #endif
1096 // ------------------------------------------------------------------------
1097 // Need separate macros for .EQV. because of the need of complement (~)
1098 // OP ignored for critical sections, ^=~ used instead
1099 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1100 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1101  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1102  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1103 }
1104 
1105 // ------------------------------------------------------------------------
1106 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1107 // ------------------------------------------------------------------------
1108 // X86 or X86_64: no alignment problems ===================================
1109 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1110 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1111  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1112  OP_CMPXCHG(TYPE,BITS,OP) \
1113 }
1114 // ------------------------------------------------------------------------
1115 #else
1116 // ------------------------------------------------------------------------
1117 // Code for other architectures that don't handle unaligned accesses.
1118 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1119 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1120  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1121  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1122  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1123  } else { \
1124  KMP_CHECK_GTID; \
1125  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1126  } \
1127 }
1128 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1129 
1130 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1131 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1132 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1133 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1134 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1135 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1136 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1137 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1138 
1139 
1140 // ------------------------------------------------------------------------
1141 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1142 // TYPE_ID, OP_ID, TYPE - detailed above
1143 // OP - operator
1144 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1145 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1146 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1147  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1148  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1149 }
1150 
1151 /* ------------------------------------------------------------------------- */
1152 // routines for long double type
1153 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1154 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1155 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1156 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1157 // routines for _Quad type
1158 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1159 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1160 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1161 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1162 #if ( KMP_ARCH_X86 )
1163  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1164  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1165  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1166  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1167 #endif
1168 // routines for complex types
1169 
1170 // workaround for C78287 (complex(kind=4) data type)
1171 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1172 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1173 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1174 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1175 // end of the workaround for C78287
1176 
1177 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1178 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1179 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1180 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1181 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1182 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1183 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1184 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1185 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1186 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1187 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1188 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1189 #if ( KMP_ARCH_X86 )
1190  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1191  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1192  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1193  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1194 #endif
1195 
1196 #if OMP_40_ENABLED
1197 
1198 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1199 // Supported only on IA-32 architecture and Intel(R) 64
1200 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1201 
1202 // ------------------------------------------------------------------------
1203 // Operation on *lhs, rhs bound by critical section
1204 // OP - operator (it's supposed to contain an assignment)
1205 // LCK_ID - lock identifier
1206 // Note: don't check gtid as it should always be valid
1207 // 1, 2-byte - expect valid parameter, other - check before this macro
1208 #define OP_CRITICAL_REV(OP,LCK_ID) \
1209  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1210  \
1211  (*lhs) = (rhs) OP (*lhs); \
1212  \
1213  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1214 
1215 #ifdef KMP_GOMP_COMPAT
1216 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1217  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1218  KMP_CHECK_GTID; \
1219  OP_CRITICAL_REV( OP, 0 ); \
1220  return; \
1221  }
1222 #else
1223 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1224 #endif /* KMP_GOMP_COMPAT */
1225 
1226 
1227 // Beginning of a definition (provides name, parameters, gebug trace)
1228 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1229 // OP_ID - operation identifier (add, sub, mul, ...)
1230 // TYPE - operands' type
1231 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1232 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1233 { \
1234  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1235  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1236 
1237 // ------------------------------------------------------------------------
1238 // Operation on *lhs, rhs using "compare_and_store" routine
1239 // TYPE - operands' type
1240 // BITS - size in bits, used to distinguish low level calls
1241 // OP - operator
1242 // Note: temp_val introduced in order to force the compiler to read
1243 // *lhs only once (w/o it the compiler reads *lhs twice)
1244 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1245  { \
1246  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1247  TYPE old_value, new_value; \
1248  temp_val = *lhs; \
1249  old_value = temp_val; \
1250  new_value = rhs OP old_value; \
1251  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1252  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1253  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1254  { \
1255  KMP_DO_PAUSE; \
1256  \
1257  temp_val = *lhs; \
1258  old_value = temp_val; \
1259  new_value = rhs OP old_value; \
1260  } \
1261  }
1262 
1263 // -------------------------------------------------------------------------
1264 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1265 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1266  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1267  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1268 }
1269 
1270 // ------------------------------------------------------------------------
1271 // Entries definition for integer operands
1272 // TYPE_ID - operands type and size (fixed4, float4)
1273 // OP_ID - operation identifier (add, sub, mul, ...)
1274 // TYPE - operand type
1275 // BITS - size in bits, used to distinguish low level calls
1276 // OP - operator (used in critical section)
1277 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1278 
1279 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1280 // ------------------------------------------------------------------------
1281 // Routines for ATOMIC integer operands, other operators
1282 // ------------------------------------------------------------------------
1283 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1284 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1285 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1286 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1287 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1288 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1289 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1290 
1291 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1292 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1293 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1294 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1295 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1296 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1297 
1298 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1299 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1300 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1301 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1302 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1303 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1304 
1305 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1306 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1307 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1308 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1309 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1310 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1311 
1312 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1313 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1314 
1315 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1316 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1317 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1318 
1319 // ------------------------------------------------------------------------
1320 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1321 // TYPE_ID, OP_ID, TYPE - detailed above
1322 // OP - operator
1323 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1324 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1325 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1326  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1327  OP_CRITICAL_REV(OP,LCK_ID) \
1328 }
1329 
1330 /* ------------------------------------------------------------------------- */
1331 // routines for long double type
1332 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1333 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1334 // routines for _Quad type
1335 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1336 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1337 #if ( KMP_ARCH_X86 )
1338  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1339  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1340 #endif
1341 
1342 // routines for complex types
1343 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1344 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1345 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1346 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1347 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1348 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1349 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1350 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1351 #if ( KMP_ARCH_X86 )
1352  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1353  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1354 #endif
1355 
1356 
1357 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1358 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1359 
1360 #endif //OMP_40_ENABLED
1361 
1362 
1363 /* ------------------------------------------------------------------------ */
1364 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1365 /* Note: in order to reduce the total number of types combinations */
1366 /* it is supposed that compiler converts RHS to longest floating type,*/
1367 /* that is _Quad, before call to any of these routines */
1368 /* Conversion to _Quad will be done by the compiler during calculation, */
1369 /* conversion back to TYPE - before the assignment, like: */
1370 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1371 /* Performance penalty expected because of SW emulation use */
1372 /* ------------------------------------------------------------------------ */
1373 
1374 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1375 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1376 { \
1377  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1378  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1379 
1380 // -------------------------------------------------------------------------
1381 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1382 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1383  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1384  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1385 }
1386 
1387 // -------------------------------------------------------------------------
1388 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1389 // -------------------------------------------------------------------------
1390 // X86 or X86_64: no alignment problems ====================================
1391 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1392 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1393  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1394  OP_CMPXCHG(TYPE,BITS,OP) \
1395 }
1396 // -------------------------------------------------------------------------
1397 #else
1398 // ------------------------------------------------------------------------
1399 // Code for other architectures that don't handle unaligned accesses.
1400 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1401 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1402  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1403  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1404  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1405  } else { \
1406  KMP_CHECK_GTID; \
1407  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1408  } \
1409 }
1410 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1411 
1412 // RHS=float8
1413 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1414 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1415 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1416 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1417 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1418 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1419 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1420 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1421 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1422 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1423 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1424 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1425 
1426 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1427 
1428 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1429 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1430 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1431 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1432 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1433 
1434 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1435 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1436 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1437 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1438 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1439 
1440 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1441 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1442 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1443 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1444 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1445 
1446 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1447 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1448 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1449 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1450 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1451 
1452 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1453 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1454 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1455 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1456 
1457 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1458 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1459 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1460 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1461 
1462 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1463 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1464 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1465 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1466 
1467 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1468 // ------------------------------------------------------------------------
1469 // X86 or X86_64: no alignment problems ====================================
1470 // workaround for C78287 (complex(kind=4) data type)
1471 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1472 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1473  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1474  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1475 }
1476 // end of the second part of the workaround for C78287
1477 #else
1478 // ------------------------------------------------------------------------
1479 // Code for other architectures that don't handle unaligned accesses.
1480 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1481 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1482  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1483  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1484  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1485  } else { \
1486  KMP_CHECK_GTID; \
1487  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1488  } \
1489 }
1490 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1491 
1492 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1493 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1494 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1495 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1496 
1497 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1498 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1499 
1501 // ------------------------------------------------------------------------
1502 // Atomic READ routines
1503 // ------------------------------------------------------------------------
1504 
1505 // ------------------------------------------------------------------------
1506 // Beginning of a definition (provides name, parameters, gebug trace)
1507 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1508 // OP_ID - operation identifier (add, sub, mul, ...)
1509 // TYPE - operands' type
1510 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1511 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1512 { \
1513  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1514  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1515 
1516 // ------------------------------------------------------------------------
1517 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1518 // TYPE - operands' type
1519 // BITS - size in bits, used to distinguish low level calls
1520 // OP - operator
1521 // Note: temp_val introduced in order to force the compiler to read
1522 // *lhs only once (w/o it the compiler reads *lhs twice)
1523 // TODO: check if it is still necessary
1524 // Return old value regardless of the result of "compare & swap# operation
1525 
1526 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1527  { \
1528  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1529  union f_i_union { \
1530  TYPE f_val; \
1531  kmp_int##BITS i_val; \
1532  }; \
1533  union f_i_union old_value; \
1534  temp_val = *loc; \
1535  old_value.f_val = temp_val; \
1536  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1537  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1538  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1539  new_value = old_value.f_val; \
1540  return new_value; \
1541  }
1542 
1543 // -------------------------------------------------------------------------
1544 // Operation on *lhs, rhs bound by critical section
1545 // OP - operator (it's supposed to contain an assignment)
1546 // LCK_ID - lock identifier
1547 // Note: don't check gtid as it should always be valid
1548 // 1, 2-byte - expect valid parameter, other - check before this macro
1549 #define OP_CRITICAL_READ(OP,LCK_ID) \
1550  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1551  \
1552  new_value = (*loc); \
1553  \
1554  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1555 
1556 // -------------------------------------------------------------------------
1557 #ifdef KMP_GOMP_COMPAT
1558 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1559  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1560  KMP_CHECK_GTID; \
1561  OP_CRITICAL_READ( OP, 0 ); \
1562  return new_value; \
1563  }
1564 #else
1565 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1566 #endif /* KMP_GOMP_COMPAT */
1567 
1568 // -------------------------------------------------------------------------
1569 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1570 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1571  TYPE new_value; \
1572  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1573  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1574  return new_value; \
1575 }
1576 // -------------------------------------------------------------------------
1577 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1578 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1579  TYPE new_value; \
1580  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1581  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1582 }
1583 // ------------------------------------------------------------------------
1584 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1585 // TYPE_ID, OP_ID, TYPE - detailed above
1586 // OP - operator
1587 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1588 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1589 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1590  TYPE new_value; \
1591  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1592  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1593  return new_value; \
1594 }
1595 
1596 // ------------------------------------------------------------------------
1597 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1598 // Let's return the read value through the additional parameter.
1599 
1600 #if ( KMP_OS_WINDOWS )
1601 
1602 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1603  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1604  \
1605  (*out) = (*loc); \
1606  \
1607  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1608 // ------------------------------------------------------------------------
1609 #ifdef KMP_GOMP_COMPAT
1610 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1611  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1612  KMP_CHECK_GTID; \
1613  OP_CRITICAL_READ_WRK( OP, 0 ); \
1614  }
1615 #else
1616 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1617 #endif /* KMP_GOMP_COMPAT */
1618 // ------------------------------------------------------------------------
1619 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1620 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1621 { \
1622  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1623  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1624 
1625 // ------------------------------------------------------------------------
1626 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1627 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1628  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1629  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1630 }
1631 
1632 #endif // KMP_OS_WINDOWS
1633 
1634 // ------------------------------------------------------------------------
1635 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1636 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1637 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1638 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1639 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1640 
1641 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1642 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1643 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1644 
1645 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1646 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1647 
1648 // Fix for CQ220361 on Windows* OS
1649 #if ( KMP_OS_WINDOWS )
1650  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1651 #else
1652  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1653 #endif
1654 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1655 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1656 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1657 #if ( KMP_ARCH_X86 )
1658  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1659  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1660 #endif
1661 
1662 
1663 // ------------------------------------------------------------------------
1664 // Atomic WRITE routines
1665 // ------------------------------------------------------------------------
1666 
1667 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1668 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1669  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1670  KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1671 }
1672 // ------------------------------------------------------------------------
1673 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1674 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1675  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1676  KMP_XCHG_REAL##BITS( lhs, rhs ); \
1677 }
1678 
1679 
1680 // ------------------------------------------------------------------------
1681 // Operation on *lhs, rhs using "compare_and_store" routine
1682 // TYPE - operands' type
1683 // BITS - size in bits, used to distinguish low level calls
1684 // OP - operator
1685 // Note: temp_val introduced in order to force the compiler to read
1686 // *lhs only once (w/o it the compiler reads *lhs twice)
1687 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1688  { \
1689  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1690  TYPE old_value, new_value; \
1691  temp_val = *lhs; \
1692  old_value = temp_val; \
1693  new_value = rhs; \
1694  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1695  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1696  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1697  { \
1698  KMP_CPU_PAUSE(); \
1699  \
1700  temp_val = *lhs; \
1701  old_value = temp_val; \
1702  new_value = rhs; \
1703  } \
1704  }
1705 
1706 // -------------------------------------------------------------------------
1707 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1708 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1709  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1710  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1711 }
1712 
1713 // ------------------------------------------------------------------------
1714 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1715 // TYPE_ID, OP_ID, TYPE - detailed above
1716 // OP - operator
1717 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1718 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1719 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1720  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1721  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1722 }
1723 // -------------------------------------------------------------------------
1724 
1725 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1726 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1727 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1728 #if ( KMP_ARCH_X86 )
1729  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1730 #else
1731  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1732 #endif
1733 
1734 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1735 #if ( KMP_ARCH_X86 )
1736  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1737 #else
1738  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1739 #endif
1740 
1741 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1742 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1743 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1744 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1745 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1746 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1747 #if ( KMP_ARCH_X86 )
1748  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1749  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1750 #endif
1751 
1752 
1753 // ------------------------------------------------------------------------
1754 // Atomic CAPTURE routines
1755 // ------------------------------------------------------------------------
1756 
1757 // Beginning of a definition (provides name, parameters, gebug trace)
1758 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1759 // OP_ID - operation identifier (add, sub, mul, ...)
1760 // TYPE - operands' type
1761 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1762 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1763 { \
1764  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1765  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1766 
1767 // -------------------------------------------------------------------------
1768 // Operation on *lhs, rhs bound by critical section
1769 // OP - operator (it's supposed to contain an assignment)
1770 // LCK_ID - lock identifier
1771 // Note: don't check gtid as it should always be valid
1772 // 1, 2-byte - expect valid parameter, other - check before this macro
1773 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1774  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1775  \
1776  if( flag ) { \
1777  (*lhs) OP rhs; \
1778  new_value = (*lhs); \
1779  } else { \
1780  new_value = (*lhs); \
1781  (*lhs) OP rhs; \
1782  } \
1783  \
1784  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1785  return new_value;
1786 
1787 // ------------------------------------------------------------------------
1788 #ifdef KMP_GOMP_COMPAT
1789 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1790  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1791  KMP_CHECK_GTID; \
1792  OP_CRITICAL_CPT( OP##=, 0 ); \
1793  }
1794 #else
1795 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1796 #endif /* KMP_GOMP_COMPAT */
1797 
1798 // ------------------------------------------------------------------------
1799 // Operation on *lhs, rhs using "compare_and_store" routine
1800 // TYPE - operands' type
1801 // BITS - size in bits, used to distinguish low level calls
1802 // OP - operator
1803 // Note: temp_val introduced in order to force the compiler to read
1804 // *lhs only once (w/o it the compiler reads *lhs twice)
1805 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1806  { \
1807  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1808  TYPE old_value, new_value; \
1809  temp_val = *lhs; \
1810  old_value = temp_val; \
1811  new_value = old_value OP rhs; \
1812  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1813  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1814  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1815  { \
1816  KMP_CPU_PAUSE(); \
1817  \
1818  temp_val = *lhs; \
1819  old_value = temp_val; \
1820  new_value = old_value OP rhs; \
1821  } \
1822  if( flag ) { \
1823  return new_value; \
1824  } else \
1825  return old_value; \
1826  }
1827 
1828 // -------------------------------------------------------------------------
1829 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1830 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1831  TYPE new_value; \
1832  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1833  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1834 }
1835 
1836 // -------------------------------------------------------------------------
1837 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1838 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1839  TYPE old_value, new_value; \
1840  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1841  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1842  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1843  if( flag ) { \
1844  return old_value OP rhs; \
1845  } else \
1846  return old_value; \
1847 }
1848 // -------------------------------------------------------------------------
1849 #define ATOMIC_FLOAT_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1850 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1851  TYPE old_value, new_value; \
1852  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1853  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1854  old_value = KMP_TEST_THEN_ADD_REAL##BITS( lhs, OP rhs ); \
1855  if( flag ) { \
1856  return old_value OP rhs; \
1857  } else \
1858  return old_value; \
1859 }
1860 // -------------------------------------------------------------------------
1861 
1862 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1863 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1864 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1865 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1866 
1867 #if KMP_MIC
1868 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1869 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1870 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1871 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1872 #else
1873 ATOMIC_FLOAT_ADD_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1874 ATOMIC_FLOAT_ADD_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1875 ATOMIC_FLOAT_ADD_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1876 ATOMIC_FLOAT_ADD_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1877 #endif // KMP_MIC
1878 
1879 // ------------------------------------------------------------------------
1880 // Entries definition for integer operands
1881 // TYPE_ID - operands type and size (fixed4, float4)
1882 // OP_ID - operation identifier (add, sub, mul, ...)
1883 // TYPE - operand type
1884 // BITS - size in bits, used to distinguish low level calls
1885 // OP - operator (used in critical section)
1886 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1887 // ------------------------------------------------------------------------
1888 // Routines for ATOMIC integer operands, other operators
1889 // ------------------------------------------------------------------------
1890 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1891 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1892 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1893 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1894 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1895 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1896 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1897 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1898 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1899 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1900 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1901 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1902 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1903 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1904 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1905 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1906 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1907 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1908 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1909 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1910 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1911 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1912 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1913 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1914 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1915 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1916 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1917 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1918 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1919 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1920 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1921 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1922 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1923 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1924 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1925 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1926 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1927 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1928 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1929 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1930 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1931 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1932 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1933 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1934 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1935 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1936 
1937 // ------------------------------------------------------------------------
1938 // Routines for C/C++ Reduction operators && and ||
1939 // ------------------------------------------------------------------------
1940 
1941 // -------------------------------------------------------------------------
1942 // Operation on *lhs, rhs bound by critical section
1943 // OP - operator (it's supposed to contain an assignment)
1944 // LCK_ID - lock identifier
1945 // Note: don't check gtid as it should always be valid
1946 // 1, 2-byte - expect valid parameter, other - check before this macro
1947 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1948  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1949  \
1950  if( flag ) { \
1951  new_value OP rhs; \
1952  } else \
1953  new_value = (*lhs); \
1954  \
1955  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1956 
1957 // ------------------------------------------------------------------------
1958 #ifdef KMP_GOMP_COMPAT
1959 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
1960  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1961  KMP_CHECK_GTID; \
1962  OP_CRITICAL_L_CPT( OP, 0 ); \
1963  return new_value; \
1964  }
1965 #else
1966 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
1967 #endif /* KMP_GOMP_COMPAT */
1968 
1969 // ------------------------------------------------------------------------
1970 // Need separate macros for &&, || because there is no combined assignment
1971 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1972 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1973  TYPE new_value; \
1974  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
1975  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1976 }
1977 
1978 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
1979 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
1980 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
1981 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
1982 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
1983 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
1984 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
1985 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
1986 
1987 
1988 // -------------------------------------------------------------------------
1989 // Routines for Fortran operators that matched no one in C:
1990 // MAX, MIN, .EQV., .NEQV.
1991 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
1992 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
1993 // -------------------------------------------------------------------------
1994 
1995 // -------------------------------------------------------------------------
1996 // MIN and MAX need separate macros
1997 // OP - operator to check if we need any actions?
1998 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
1999  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2000  \
2001  if ( *lhs OP rhs ) { /* still need actions? */ \
2002  old_value = *lhs; \
2003  *lhs = rhs; \
2004  if ( flag ) \
2005  new_value = rhs; \
2006  else \
2007  new_value = old_value; \
2008  } \
2009  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2010  return new_value; \
2011 
2012 // -------------------------------------------------------------------------
2013 #ifdef KMP_GOMP_COMPAT
2014 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
2015  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
2016  KMP_CHECK_GTID; \
2017  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
2018  }
2019 #else
2020 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2021 #endif /* KMP_GOMP_COMPAT */
2022 
2023 // -------------------------------------------------------------------------
2024 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2025  { \
2026  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2027  /*TYPE old_value; */ \
2028  temp_val = *lhs; \
2029  old_value = temp_val; \
2030  while ( old_value OP rhs && /* still need actions? */ \
2031  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2032  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2033  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2034  { \
2035  KMP_CPU_PAUSE(); \
2036  temp_val = *lhs; \
2037  old_value = temp_val; \
2038  } \
2039  if( flag ) \
2040  return rhs; \
2041  else \
2042  return old_value; \
2043  }
2044 
2045 // -------------------------------------------------------------------------
2046 // 1-byte, 2-byte operands - use critical section
2047 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2048 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2049  TYPE new_value, old_value; \
2050  if ( *lhs OP rhs ) { /* need actions? */ \
2051  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2052  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2053  } \
2054  return *lhs; \
2055 }
2056 
2057 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2058 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2059  TYPE new_value, old_value; \
2060  if ( *lhs OP rhs ) { \
2061  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2062  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2063  } \
2064  return *lhs; \
2065 }
2066 
2067 
2068 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2069 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2070 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2071 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2072 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2073 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2074 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2075 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2076 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2077 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2078 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2079 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2080 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2081 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2082 #if ( KMP_ARCH_X86 )
2083  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2084  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2085 #endif
2086 
2087 // ------------------------------------------------------------------------
2088 #ifdef KMP_GOMP_COMPAT
2089 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2090  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2091  KMP_CHECK_GTID; \
2092  OP_CRITICAL_CPT( OP, 0 ); \
2093  }
2094 #else
2095 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2096 #endif /* KMP_GOMP_COMPAT */
2097 // ------------------------------------------------------------------------
2098 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2099 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2100  TYPE new_value; \
2101  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2102  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2103 }
2104 
2105 // ------------------------------------------------------------------------
2106 
2107 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2108 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2109 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2110 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2111 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2112 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2113 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2114 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2115 
2116 // ------------------------------------------------------------------------
2117 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2118 // TYPE_ID, OP_ID, TYPE - detailed above
2119 // OP - operator
2120 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2121 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2122 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2123  TYPE new_value; \
2124  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2125  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2126 }
2127 
2128 // ------------------------------------------------------------------------
2129 
2130 // Workaround for cmplx4. Regular routines with return value don't work
2131 // on Win_32e. Let's return captured values through the additional parameter.
2132 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2133  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2134  \
2135  if( flag ) { \
2136  (*lhs) OP rhs; \
2137  (*out) = (*lhs); \
2138  } else { \
2139  (*out) = (*lhs); \
2140  (*lhs) OP rhs; \
2141  } \
2142  \
2143  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2144  return;
2145 // ------------------------------------------------------------------------
2146 
2147 #ifdef KMP_GOMP_COMPAT
2148 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2149  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2150  KMP_CHECK_GTID; \
2151  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2152  }
2153 #else
2154 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2155 #endif /* KMP_GOMP_COMPAT */
2156 // ------------------------------------------------------------------------
2157 
2158 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2159 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2160 { \
2161  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2162  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2163 // ------------------------------------------------------------------------
2164 
2165 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2166 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2167  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2168  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2169 }
2170 // The end of workaround for cmplx4
2171 
2172 /* ------------------------------------------------------------------------- */
2173 // routines for long double type
2174 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2175 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2176 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2177 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2178 // routines for _Quad type
2179 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2180 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2181 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2182 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2183 #if ( KMP_ARCH_X86 )
2184  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2185  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2186  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2187  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2188 #endif
2189 
2190 // routines for complex types
2191 
2192 // cmplx4 routines to return void
2193 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2194 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2195 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2196 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2197 
2198 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2199 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2200 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2201 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2202 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2203 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2204 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2205 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2206 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2207 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2208 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2209 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2210 #if ( KMP_ARCH_X86 )
2211  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2212  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2213  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2214  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2215 #endif
2216 
2217 #if OMP_40_ENABLED
2218 
2219 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2220 // Supported only on IA-32 architecture and Intel(R) 64
2221 
2222 // -------------------------------------------------------------------------
2223 // Operation on *lhs, rhs bound by critical section
2224 // OP - operator (it's supposed to contain an assignment)
2225 // LCK_ID - lock identifier
2226 // Note: don't check gtid as it should always be valid
2227 // 1, 2-byte - expect valid parameter, other - check before this macro
2228 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2229  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2230  \
2231  if( flag ) { \
2232  /*temp_val = (*lhs);*/\
2233  (*lhs) = (rhs) OP (*lhs); \
2234  new_value = (*lhs); \
2235  } else { \
2236  new_value = (*lhs);\
2237  (*lhs) = (rhs) OP (*lhs); \
2238  } \
2239  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2240  return new_value;
2241 
2242 // ------------------------------------------------------------------------
2243 #ifdef KMP_GOMP_COMPAT
2244 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2245  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2246  KMP_CHECK_GTID; \
2247  OP_CRITICAL_CPT_REV( OP, 0 ); \
2248  }
2249 #else
2250 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2251 #endif /* KMP_GOMP_COMPAT */
2252 
2253 // ------------------------------------------------------------------------
2254 // Operation on *lhs, rhs using "compare_and_store" routine
2255 // TYPE - operands' type
2256 // BITS - size in bits, used to distinguish low level calls
2257 // OP - operator
2258 // Note: temp_val introduced in order to force the compiler to read
2259 // *lhs only once (w/o it the compiler reads *lhs twice)
2260 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2261  { \
2262  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2263  TYPE old_value, new_value; \
2264  temp_val = *lhs; \
2265  old_value = temp_val; \
2266  new_value = rhs OP old_value; \
2267  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2268  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2269  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2270  { \
2271  KMP_CPU_PAUSE(); \
2272  \
2273  temp_val = *lhs; \
2274  old_value = temp_val; \
2275  new_value = rhs OP old_value; \
2276  } \
2277  if( flag ) { \
2278  return new_value; \
2279  } else \
2280  return old_value; \
2281  }
2282 
2283 // -------------------------------------------------------------------------
2284 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2285 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2286  TYPE new_value; \
2287  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2288  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2289  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2290 }
2291 
2292 
2293 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2294 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2295 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2296 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2297 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2298 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2299 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2300 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2301 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2302 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2303 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2304 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2305 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2306 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2307 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2308 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2309 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2310 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2311 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2312 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2313 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2314 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2315 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2316 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2317 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2318 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2319 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2320 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2321 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2322 
2323 
2324 // ------------------------------------------------------------------------
2325 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2326 // TYPE_ID, OP_ID, TYPE - detailed above
2327 // OP - operator
2328 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2329 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2330 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2331  TYPE new_value; \
2332  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2333  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2334  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2335  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2336 }
2337 
2338 
2339 /* ------------------------------------------------------------------------- */
2340 // routines for long double type
2341 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2342 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2343 // routines for _Quad type
2344 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2345 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2346 #if ( KMP_ARCH_X86 )
2347  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2348  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2349 #endif
2350 
2351 // routines for complex types
2352 
2353 // ------------------------------------------------------------------------
2354 
2355 // Workaround for cmplx4. Regular routines with return value don't work
2356 // on Win_32e. Let's return captured values through the additional parameter.
2357 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2358  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2359  \
2360  if( flag ) { \
2361  (*lhs) = (rhs) OP (*lhs); \
2362  (*out) = (*lhs); \
2363  } else { \
2364  (*out) = (*lhs); \
2365  (*lhs) = (rhs) OP (*lhs); \
2366  } \
2367  \
2368  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2369  return;
2370 // ------------------------------------------------------------------------
2371 
2372 #ifdef KMP_GOMP_COMPAT
2373 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2374  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2375  KMP_CHECK_GTID; \
2376  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2377  }
2378 #else
2379 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2380 #endif /* KMP_GOMP_COMPAT */
2381 // ------------------------------------------------------------------------
2382 
2383 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2384 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2385  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2386  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2387 }
2388 // The end of workaround for cmplx4
2389 
2390 
2391 // !!! TODO: check if we need to return void for cmplx4 routines
2392 // cmplx4 routines to return void
2393 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2394 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2395 
2396 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2397 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2398 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2399 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2400 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2401 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2402 #if ( KMP_ARCH_X86 )
2403  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2404  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2405 #endif
2406 
2407 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2408 
2409 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2410 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2411 { \
2412  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2413  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2414 
2415 #define CRITICAL_SWP(LCK_ID) \
2416  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2417  \
2418  old_value = (*lhs); \
2419  (*lhs) = rhs; \
2420  \
2421  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2422  return old_value;
2423 
2424 // ------------------------------------------------------------------------
2425 #ifdef KMP_GOMP_COMPAT
2426 #define GOMP_CRITICAL_SWP(FLAG) \
2427  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2428  KMP_CHECK_GTID; \
2429  CRITICAL_SWP( 0 ); \
2430  }
2431 #else
2432 #define GOMP_CRITICAL_SWP(FLAG)
2433 #endif /* KMP_GOMP_COMPAT */
2434 
2435 
2436 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2437 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2438  TYPE old_value; \
2439  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2440  old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2441  return old_value; \
2442 }
2443 // ------------------------------------------------------------------------
2444 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2445 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2446  TYPE old_value; \
2447  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2448  old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2449  return old_value; \
2450 }
2451 
2452 // ------------------------------------------------------------------------
2453 #define CMPXCHG_SWP(TYPE,BITS) \
2454  { \
2455  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2456  TYPE old_value, new_value; \
2457  temp_val = *lhs; \
2458  old_value = temp_val; \
2459  new_value = rhs; \
2460  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2461  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2462  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2463  { \
2464  KMP_CPU_PAUSE(); \
2465  \
2466  temp_val = *lhs; \
2467  old_value = temp_val; \
2468  new_value = rhs; \
2469  } \
2470  return old_value; \
2471  }
2472 
2473 // -------------------------------------------------------------------------
2474 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2475 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2476  TYPE old_value; \
2477  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2478  CMPXCHG_SWP(TYPE,BITS) \
2479 }
2480 
2481 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2482 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2483 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2484 
2485 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2486 
2487 #if ( KMP_ARCH_X86 )
2488  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2489  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2490 #else
2491  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2492  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2493 #endif
2494 
2495 // ------------------------------------------------------------------------
2496 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2497 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2498 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2499  TYPE old_value; \
2500  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2501  CRITICAL_SWP(LCK_ID) \
2502 }
2503 
2504 // ------------------------------------------------------------------------
2505 
2506 // !!! TODO: check if we need to return void for cmplx4 routines
2507 // Workaround for cmplx4. Regular routines with return value don't work
2508 // on Win_32e. Let's return captured values through the additional parameter.
2509 
2510 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2511 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2512 { \
2513  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2514  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2515 
2516 
2517 #define CRITICAL_SWP_WRK(LCK_ID) \
2518  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2519  \
2520  tmp = (*lhs); \
2521  (*lhs) = (rhs); \
2522  (*out) = tmp; \
2523  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2524  return;
2525 
2526 // ------------------------------------------------------------------------
2527 
2528 #ifdef KMP_GOMP_COMPAT
2529 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2530  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2531  KMP_CHECK_GTID; \
2532  CRITICAL_SWP_WRK( 0 ); \
2533  }
2534 #else
2535 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2536 #endif /* KMP_GOMP_COMPAT */
2537 // ------------------------------------------------------------------------
2538 
2539 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2540 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2541  TYPE tmp; \
2542  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2543  CRITICAL_SWP_WRK(LCK_ID) \
2544 }
2545 // The end of workaround for cmplx4
2546 
2547 
2548 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2549 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2550 // cmplx4 routine to return void
2551 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2552 
2553 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2554 
2555 
2556 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2557 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2558 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2559 #if ( KMP_ARCH_X86 )
2560  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2561  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2562 #endif
2563 
2564 
2565 // End of OpenMP 4.0 Capture
2566 
2567 #endif //OMP_40_ENABLED
2568 
2569 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2570 
2571 
2572 #undef OP_CRITICAL
2573 
2574 /* ------------------------------------------------------------------------ */
2575 /* Generic atomic routines */
2576 /* ------------------------------------------------------------------------ */
2577 
2578 void
2579 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2580 {
2581  KMP_DEBUG_ASSERT( __kmp_init_serial );
2582 
2583  if (
2584 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2585  FALSE /* must use lock */
2586 #else
2587  TRUE
2588 #endif
2589  )
2590  {
2591  kmp_int8 old_value, new_value;
2592 
2593  old_value = *(kmp_int8 *) lhs;
2594  (*f)( &new_value, &old_value, rhs );
2595 
2596  /* TODO: Should this be acquire or release? */
2597  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2598  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2599  {
2600  KMP_CPU_PAUSE();
2601 
2602  old_value = *(kmp_int8 *) lhs;
2603  (*f)( &new_value, &old_value, rhs );
2604  }
2605 
2606  return;
2607  }
2608  else {
2609  //
2610  // All 1-byte data is of integer data type.
2611  //
2612 
2613 #ifdef KMP_GOMP_COMPAT
2614  if ( __kmp_atomic_mode == 2 ) {
2615  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2616  }
2617  else
2618 #endif /* KMP_GOMP_COMPAT */
2619  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2620 
2621  (*f)( lhs, lhs, rhs );
2622 
2623 #ifdef KMP_GOMP_COMPAT
2624  if ( __kmp_atomic_mode == 2 ) {
2625  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2626  }
2627  else
2628 #endif /* KMP_GOMP_COMPAT */
2629  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2630  }
2631 }
2632 
2633 void
2634 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2635 {
2636  if (
2637 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2638  FALSE /* must use lock */
2639 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2640  TRUE /* no alignment problems */
2641 #else
2642  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2643 #endif
2644  )
2645  {
2646  kmp_int16 old_value, new_value;
2647 
2648  old_value = *(kmp_int16 *) lhs;
2649  (*f)( &new_value, &old_value, rhs );
2650 
2651  /* TODO: Should this be acquire or release? */
2652  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2653  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2654  {
2655  KMP_CPU_PAUSE();
2656 
2657  old_value = *(kmp_int16 *) lhs;
2658  (*f)( &new_value, &old_value, rhs );
2659  }
2660 
2661  return;
2662  }
2663  else {
2664  //
2665  // All 2-byte data is of integer data type.
2666  //
2667 
2668 #ifdef KMP_GOMP_COMPAT
2669  if ( __kmp_atomic_mode == 2 ) {
2670  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2671  }
2672  else
2673 #endif /* KMP_GOMP_COMPAT */
2674  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2675 
2676  (*f)( lhs, lhs, rhs );
2677 
2678 #ifdef KMP_GOMP_COMPAT
2679  if ( __kmp_atomic_mode == 2 ) {
2680  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2681  }
2682  else
2683 #endif /* KMP_GOMP_COMPAT */
2684  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2685  }
2686 }
2687 
2688 void
2689 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2690 {
2691  KMP_DEBUG_ASSERT( __kmp_init_serial );
2692 
2693  if (
2694  //
2695  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2696  // Gomp compatibility is broken if this routine is called for floats.
2697  //
2698 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2699  TRUE /* no alignment problems */
2700 #else
2701  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2702 #endif
2703  )
2704  {
2705  kmp_int32 old_value, new_value;
2706 
2707  old_value = *(kmp_int32 *) lhs;
2708  (*f)( &new_value, &old_value, rhs );
2709 
2710  /* TODO: Should this be acquire or release? */
2711  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2712  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2713  {
2714  KMP_CPU_PAUSE();
2715 
2716  old_value = *(kmp_int32 *) lhs;
2717  (*f)( &new_value, &old_value, rhs );
2718  }
2719 
2720  return;
2721  }
2722  else {
2723  //
2724  // Use __kmp_atomic_lock_4i for all 4-byte data,
2725  // even if it isn't of integer data type.
2726  //
2727 
2728 #ifdef KMP_GOMP_COMPAT
2729  if ( __kmp_atomic_mode == 2 ) {
2730  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2731  }
2732  else
2733 #endif /* KMP_GOMP_COMPAT */
2734  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2735 
2736  (*f)( lhs, lhs, rhs );
2737 
2738 #ifdef KMP_GOMP_COMPAT
2739  if ( __kmp_atomic_mode == 2 ) {
2740  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2741  }
2742  else
2743 #endif /* KMP_GOMP_COMPAT */
2744  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2745  }
2746 }
2747 
2748 void
2749 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2750 {
2751  KMP_DEBUG_ASSERT( __kmp_init_serial );
2752  if (
2753 
2754 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2755  FALSE /* must use lock */
2756 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2757  TRUE /* no alignment problems */
2758 #else
2759  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2760 #endif
2761  )
2762  {
2763  kmp_int64 old_value, new_value;
2764 
2765  old_value = *(kmp_int64 *) lhs;
2766  (*f)( &new_value, &old_value, rhs );
2767  /* TODO: Should this be acquire or release? */
2768  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2769  *(kmp_int64 *) &old_value,
2770  *(kmp_int64 *) &new_value ) )
2771  {
2772  KMP_CPU_PAUSE();
2773 
2774  old_value = *(kmp_int64 *) lhs;
2775  (*f)( &new_value, &old_value, rhs );
2776  }
2777 
2778  return;
2779  } else {
2780  //
2781  // Use __kmp_atomic_lock_8i for all 8-byte data,
2782  // even if it isn't of integer data type.
2783  //
2784 
2785 #ifdef KMP_GOMP_COMPAT
2786  if ( __kmp_atomic_mode == 2 ) {
2787  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2788  }
2789  else
2790 #endif /* KMP_GOMP_COMPAT */
2791  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2792 
2793  (*f)( lhs, lhs, rhs );
2794 
2795 #ifdef KMP_GOMP_COMPAT
2796  if ( __kmp_atomic_mode == 2 ) {
2797  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2798  }
2799  else
2800 #endif /* KMP_GOMP_COMPAT */
2801  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2802  }
2803 }
2804 
2805 void
2806 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2807 {
2808  KMP_DEBUG_ASSERT( __kmp_init_serial );
2809 
2810 #ifdef KMP_GOMP_COMPAT
2811  if ( __kmp_atomic_mode == 2 ) {
2812  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2813  }
2814  else
2815 #endif /* KMP_GOMP_COMPAT */
2816  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2817 
2818  (*f)( lhs, lhs, rhs );
2819 
2820 #ifdef KMP_GOMP_COMPAT
2821  if ( __kmp_atomic_mode == 2 ) {
2822  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2823  }
2824  else
2825 #endif /* KMP_GOMP_COMPAT */
2826  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2827 }
2828 
2829 void
2830 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2831 {
2832  KMP_DEBUG_ASSERT( __kmp_init_serial );
2833 
2834 #ifdef KMP_GOMP_COMPAT
2835  if ( __kmp_atomic_mode == 2 ) {
2836  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2837  }
2838  else
2839 #endif /* KMP_GOMP_COMPAT */
2840  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2841 
2842  (*f)( lhs, lhs, rhs );
2843 
2844 #ifdef KMP_GOMP_COMPAT
2845  if ( __kmp_atomic_mode == 2 ) {
2846  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2847  }
2848  else
2849 #endif /* KMP_GOMP_COMPAT */
2850  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2851 }
2852 
2853 void
2854 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2855 {
2856  KMP_DEBUG_ASSERT( __kmp_init_serial );
2857 
2858 #ifdef KMP_GOMP_COMPAT
2859  if ( __kmp_atomic_mode == 2 ) {
2860  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2861  }
2862  else
2863 #endif /* KMP_GOMP_COMPAT */
2864  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2865 
2866  (*f)( lhs, lhs, rhs );
2867 
2868 #ifdef KMP_GOMP_COMPAT
2869  if ( __kmp_atomic_mode == 2 ) {
2870  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2871  }
2872  else
2873 #endif /* KMP_GOMP_COMPAT */
2874  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2875 }
2876 
2877 void
2878 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2879 {
2880  KMP_DEBUG_ASSERT( __kmp_init_serial );
2881 
2882 #ifdef KMP_GOMP_COMPAT
2883  if ( __kmp_atomic_mode == 2 ) {
2884  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2885  }
2886  else
2887 #endif /* KMP_GOMP_COMPAT */
2888  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2889 
2890  (*f)( lhs, lhs, rhs );
2891 
2892 #ifdef KMP_GOMP_COMPAT
2893  if ( __kmp_atomic_mode == 2 ) {
2894  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2895  }
2896  else
2897 #endif /* KMP_GOMP_COMPAT */
2898  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2899 }
2900 
2901 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2902 // duplicated in order to not use 3-party names in pure Intel code
2903 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2904 void
2905 __kmpc_atomic_start(void)
2906 {
2907  int gtid = __kmp_entry_gtid();
2908  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2909  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2910 }
2911 
2912 
2913 void
2914 __kmpc_atomic_end(void)
2915 {
2916  int gtid = __kmp_get_gtid();
2917  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2918  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2919 }
2920 
2921 /* ------------------------------------------------------------------------ */
2922 /* ------------------------------------------------------------------------ */
2927 // end of file