LCOV - code coverage report
Current view: top level - mm/boehm-gc/libatomic_ops/src/atomic_ops/sysdeps/gcc - x86.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 2 5 40.0 %
Date: 2017-07-14 10:03:36 Functions: 1 2 50.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
       3             :  * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
       4             :  * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
       5             :  *
       6             :  *
       7             :  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
       8             :  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
       9             :  *
      10             :  * Permission is hereby granted to use or copy this program
      11             :  * for any purpose,  provided the above notices are retained on all copies.
      12             :  * Permission to modify the code and to distribute modified code is granted,
      13             :  * provided the above notices are retained, and a notice that the code was
      14             :  * modified is included with the above copyright notice.
      15             :  *
      16             :  * Some of the machine specific code was borrowed from our GC distribution.
      17             :  */
      18             : 
      19             : /* The following really assume we have a 486 or better.  Unfortunately  */
      20             : /* gcc doesn't define a suitable feature test macro based on command    */
      21             : /* line options.                                                        */
      22             : /* We should perhaps test dynamically.                                  */
      23             : 
      24             : #include "../all_aligned_atomic_load_store.h"
      25             : 
      26             : #include "../test_and_set_t_is_char.h"
      27             : 
      28             : #if defined(__SSE2__) && !defined(AO_USE_PENTIUM4_INSTRS)
      29             :   /* "mfence" is a part of SSE2 set (introduced on Intel Pentium 4).    */
      30             : # define AO_USE_PENTIUM4_INSTRS
      31             : #endif
      32             : 
      33             : #if defined(AO_USE_PENTIUM4_INSTRS)
      34             :   AO_INLINE void
      35             :   AO_nop_full(void)
      36             :   {
      37             :     __asm__ __volatile__("mfence" : : : "memory");
      38             :   }
      39             : # define AO_HAVE_nop_full
      40             : 
      41             : #else
      42             :   /* We could use the cpuid instruction.  But that seems to be slower   */
      43             :   /* than the default implementation based on test_and_set_full.  Thus  */
      44             :   /* we omit that bit of misinformation here.                           */
      45             : #endif /* !AO_USE_PENTIUM4_INSTRS */
      46             : 
      47             : /* As far as we can tell, the lfence and sfence instructions are not    */
      48             : /* currently needed or useful for cached memory accesses.               */
      49             : 
      50             : /* Really only works for 486 and later */
      51             : #ifndef AO_PREFER_GENERALIZED
      52             :   AO_INLINE AO_t
      53             :   AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
      54             :   {
      55             :     AO_t result;
      56             : 
      57             :     __asm__ __volatile__ ("lock; xadd %0, %1" :
      58             :                         "=r" (result), "=m" (*p) : "0" (incr), "m" (*p)
      59             :                         : "memory");
      60             :     return result;
      61             :   }
      62             : # define AO_HAVE_fetch_and_add_full
      63             : #endif /* !AO_PREFER_GENERALIZED */
      64             : 
      65             : AO_INLINE unsigned char
      66             : AO_char_fetch_and_add_full (volatile unsigned char *p, unsigned char incr)
      67             : {
      68             :   unsigned char result;
      69             : 
      70             :   __asm__ __volatile__ ("lock; xaddb %0, %1" :
      71             :                         "=q" (result), "=m" (*p) : "0" (incr), "m" (*p)
      72             :                         : "memory");
      73             :   return result;
      74             : }
      75             : #define AO_HAVE_char_fetch_and_add_full
      76             : 
      77             : AO_INLINE unsigned short
      78             : AO_short_fetch_and_add_full (volatile unsigned short *p, unsigned short incr)
      79             : {
      80             :   unsigned short result;
      81             : 
      82             :   __asm__ __volatile__ ("lock; xaddw %0, %1" :
      83             :                         "=r" (result), "=m" (*p) : "0" (incr), "m" (*p)
      84             :                         : "memory");
      85             :   return result;
      86             : }
      87             : #define AO_HAVE_short_fetch_and_add_full
      88             : 
      89             : #ifndef AO_PREFER_GENERALIZED
      90             :   /* Really only works for 486 and later */
      91             :   AO_INLINE void
      92             :   AO_and_full (volatile AO_t *p, AO_t value)
      93             :   {
      94             :     __asm__ __volatile__ ("lock; and %1, %0" :
      95             :                         "=m" (*p) : "r" (value), "m" (*p)
      96             :                         : "memory");
      97             :   }
      98             : # define AO_HAVE_and_full
      99             : 
     100             :   AO_INLINE void
     101             :   AO_or_full (volatile AO_t *p, AO_t value)
     102             :   {
     103             :     __asm__ __volatile__ ("lock; or %1, %0" :
     104             :                         "=m" (*p) : "r" (value), "m" (*p)
     105             :                         : "memory");
     106             :   }
     107             : # define AO_HAVE_or_full
     108             : 
     109             :   AO_INLINE void
     110             :   AO_xor_full (volatile AO_t *p, AO_t value)
     111             :   {
     112             :     __asm__ __volatile__ ("lock; xor %1, %0" :
     113             :                         "=m" (*p) : "r" (value), "m" (*p)
     114             :                         : "memory");
     115             :   }
     116             : # define AO_HAVE_xor_full
     117             : 
     118             :   /* AO_store_full could be implemented directly using "xchg" but it    */
     119             :   /* could be generalized efficiently as an ordinary store accomplished */
     120             :   /* with AO_nop_full ("mfence" instruction).                           */
     121             : #endif /* !AO_PREFER_GENERALIZED */
     122             : 
     123             : AO_INLINE AO_TS_VAL_t
     124           0 : AO_test_and_set_full(volatile AO_TS_t *addr)
     125             : {
     126             :   unsigned char oldval;
     127             :   /* Note: the "xchg" instruction does not need a "lock" prefix */
     128           0 :   __asm__ __volatile__ ("xchgb %0, %1"
     129             :                         : "=q" (oldval), "=m" (*addr)
     130             :                         : "0" ((unsigned char)0xff), "m" (*addr)
     131             :                         : "memory");
     132           0 :   return (AO_TS_VAL_t)oldval;
     133             : }
     134             : #define AO_HAVE_test_and_set_full
     135             : 
     136             : #ifndef AO_GENERALIZE_ASM_BOOL_CAS
     137             :   /* Returns nonzero if the comparison succeeded.       */
     138             :   AO_INLINE int
     139       30475 :   AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val)
     140             :   {
     141             : #   ifdef AO_USE_SYNC_CAS_BUILTIN
     142       30475 :       return (int)__sync_bool_compare_and_swap(addr, old, new_val
     143             :                                                /* empty protection list */);
     144             :                 /* Note: an empty list of variables protected by the    */
     145             :                 /* memory barrier should mean all globally accessible   */
     146             :                 /* variables are protected.                             */
     147             : #   else
     148             :       char result;
     149             :       __asm__ __volatile__ ("lock; cmpxchg %3, %0; setz %1"
     150             :                         : "=m" (*addr), "=a" (result)
     151             :                         : "m" (*addr), "r" (new_val), "a" (old)
     152             :                         : "memory");
     153             :       return (int)result;
     154             : #   endif
     155             :   }
     156             : # define AO_HAVE_compare_and_swap_full
     157             : #endif /* !AO_GENERALIZE_ASM_BOOL_CAS */
     158             : 
     159             : AO_INLINE AO_t
     160             : AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
     161             :                                AO_t new_val)
     162             : {
     163             : # ifdef AO_USE_SYNC_CAS_BUILTIN
     164             :     return __sync_val_compare_and_swap(addr, old_val, new_val
     165             :                                        /* empty protection list */);
     166             : # else
     167             :     AO_t fetched_val;
     168             :     __asm__ __volatile__ ("lock; cmpxchg %3, %4"
     169             :                         : "=a" (fetched_val), "=m" (*addr)
     170             :                         : "a" (old_val), "r" (new_val), "m" (*addr)
     171             :                         : "memory");
     172             :     return fetched_val;
     173             : # endif
     174             : }
     175             : #define AO_HAVE_fetch_compare_and_swap_full
     176             : 
     177             : #if !defined(__x86_64__) && !defined(AO_USE_SYNC_CAS_BUILTIN)
     178             : # include "../standard_ao_double_t.h"
     179             : 
     180             :   /* Reading or writing a quadword aligned on a 64-bit boundary is      */
     181             :   /* always carried out atomically on at least a Pentium according to   */
     182             :   /* Chapter 8.1.1 of Volume 3A Part 1 of Intel processor manuals.      */
     183             : # define AO_ACCESS_double_CHECK_ALIGNED
     184             : # include "../loadstore/double_atomic_load_store.h"
     185             : 
     186             :   /* Returns nonzero if the comparison succeeded.       */
     187             :   /* Really requires at least a Pentium.                */
     188             :   AO_INLINE int
     189             :   AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
     190             :                                          AO_t old_val1, AO_t old_val2,
     191             :                                          AO_t new_val1, AO_t new_val2)
     192             :   {
     193             :     char result;
     194             : #   ifdef __PIC__
     195             :       AO_t saved_ebx;
     196             : 
     197             :       /* If PIC is turned on, we cannot use ebx as it is reserved for the */
     198             :       /* GOT pointer.  We should save and restore ebx.  The proposed      */
     199             :       /* solution is not so efficient as the older alternatives using     */
     200             :       /* push ebx or edi as new_val1 (w/o clobbering edi and temporary    */
     201             :       /* local variable usage) but it is more portable (it works even if  */
     202             :       /* ebx is not used as GOT pointer, and it works for the buggy GCC   */
     203             :       /* releases that incorrectly evaluate memory operands offset in the */
     204             :       /* inline assembly after push).                                     */
     205             : #     ifdef __OPTIMIZE__
     206             :         __asm__ __volatile__("mov %%ebx, %2\n\t" /* save ebx */
     207             :                              "lea %0, %%edi\n\t" /* in case addr is in ebx */
     208             :                              "mov %7, %%ebx\n\t" /* load new_val1 */
     209             :                              "lock; cmpxchg8b (%%edi)\n\t"
     210             :                              "mov %2, %%ebx\n\t" /* restore ebx */
     211             :                              "setz %1"
     212             :                         : "=m" (*addr), "=a" (result), "=m" (saved_ebx)
     213             :                         : "m" (*addr), "d" (old_val2), "a" (old_val1),
     214             :                           "c" (new_val2), "m" (new_val1)
     215             :                         : "%edi", "memory");
     216             : #     else
     217             :         /* A less-efficient code manually preserving edi if GCC invoked */
     218             :         /* with -O0 option (otherwise it fails while finding a register */
     219             :         /* in class 'GENERAL_REGS').                                    */
     220             :         AO_t saved_edi;
     221             :         __asm__ __volatile__("mov %%edi, %3\n\t" /* save edi */
     222             :                              "mov %%ebx, %2\n\t" /* save ebx */
     223             :                              "lea %0, %%edi\n\t" /* in case addr is in ebx */
     224             :                              "mov %8, %%ebx\n\t" /* load new_val1 */
     225             :                              "lock; cmpxchg8b (%%edi)\n\t"
     226             :                              "mov %2, %%ebx\n\t" /* restore ebx */
     227             :                              "mov %3, %%edi\n\t" /* restore edi */
     228             :                              "setz %1"
     229             :                         : "=m" (*addr), "=a" (result),
     230             :                           "=m" (saved_ebx), "=m" (saved_edi)
     231             :                         : "m" (*addr), "d" (old_val2), "a" (old_val1),
     232             :                           "c" (new_val2), "m" (new_val1) : "memory");
     233             : #     endif
     234             : #   else
     235             :       /* For non-PIC mode, this operation could be simplified (and be   */
     236             :       /* faster) by using ebx as new_val1 (GCC would refuse to compile  */
     237             :       /* such code for PIC mode).                                       */
     238             :       __asm__ __volatile__ ("lock; cmpxchg8b %0; setz %1"
     239             :                         : "=m" (*addr), "=a" (result)
     240             :                         : "m" (*addr), "d" (old_val2), "a" (old_val1),
     241             :                           "c" (new_val2), "b" (new_val1)
     242             :                         : "memory");
     243             : #   endif
     244             :     return (int) result;
     245             :   }
     246             : # define AO_HAVE_compare_double_and_swap_double_full
     247             : 
     248             : # define AO_T_IS_INT
     249             : 
     250             : #elif defined(__ILP32__) || !defined(__x86_64__)
     251             : # include "../standard_ao_double_t.h"
     252             : 
     253             :   /* Reading or writing a quadword aligned on a 64-bit boundary is      */
     254             :   /* always carried out atomically (requires at least a Pentium).       */
     255             : # define AO_ACCESS_double_CHECK_ALIGNED
     256             : # include "../loadstore/double_atomic_load_store.h"
     257             : 
     258             :   /* X32 has native support for 64-bit integer operations (AO_double_t  */
     259             :   /* is a 64-bit integer and we could use 64-bit cmpxchg).              */
     260             :   /* This primitive is used by compare_double_and_swap_double_full.     */
     261             :   AO_INLINE int
     262             :   AO_double_compare_and_swap_full(volatile AO_double_t *addr,
     263             :                                   AO_double_t old_val, AO_double_t new_val)
     264             :   {
     265             :     /* It is safe to use __sync CAS built-in here.      */
     266             :     return __sync_bool_compare_and_swap(&addr->AO_whole,
     267             :                                         old_val.AO_whole, new_val.AO_whole
     268             :                                         /* empty protection list */);
     269             :   }
     270             : # define AO_HAVE_double_compare_and_swap_full
     271             : 
     272             : # define AO_T_IS_INT
     273             : 
     274             : #else /* 64-bit */
     275             : 
     276             :   AO_INLINE unsigned int
     277             :   AO_int_fetch_and_add_full (volatile unsigned int *p, unsigned int incr)
     278             :   {
     279             :     unsigned int result;
     280             : 
     281             :     __asm__ __volatile__ ("lock; xaddl %0, %1"
     282             :                         : "=r" (result), "=m" (*p)
     283             :                         : "0" (incr), "m" (*p)
     284             :                         : "memory");
     285             :     return result;
     286             :   }
     287             : # define AO_HAVE_int_fetch_and_add_full
     288             : 
     289             :   /* The Intel and AMD Architecture Programmer Manuals state roughly    */
     290             :   /* the following:                                                     */
     291             :   /* - CMPXCHG16B (with a LOCK prefix) can be used to perform 16-byte   */
     292             :   /* atomic accesses in 64-bit mode (with certain alignment             */
     293             :   /* restrictions);                                                     */
     294             :   /* - SSE instructions that access data larger than a quadword (like   */
     295             :   /* MOVDQA) may be implemented using multiple memory accesses;         */
     296             :   /* - LOCK prefix causes an invalid-opcode exception when used with    */
     297             :   /* 128-bit media (SSE) instructions.                                  */
     298             :   /* Thus, currently, the only way to implement lock-free double_load   */
     299             :   /* and double_store on x86_64 is to use CMPXCHG16B (if available).    */
     300             : 
     301             : /* TODO: Test some gcc macro to detect presence of cmpxchg16b. */
     302             : 
     303             : # ifdef AO_CMPXCHG16B_AVAILABLE
     304             : #   include "../standard_ao_double_t.h"
     305             : 
     306             :     /* NEC LE-IT: older AMD Opterons are missing this instruction.      */
     307             :     /* On these machines SIGILL will be thrown.                         */
     308             :     /* Define AO_WEAK_DOUBLE_CAS_EMULATION to have an emulated (lock    */
     309             :     /* based) version available.                                        */
     310             :     /* HB: Changed this to not define either by default.  There are     */
     311             :     /* enough machines and tool chains around on which cmpxchg16b       */
     312             :     /* doesn't work.  And the emulation is unsafe by our usual rules.   */
     313             :     /* However both are clearly useful in certain cases.                */
     314             :     AO_INLINE int
     315             :     AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
     316             :                                            AO_t old_val1, AO_t old_val2,
     317             :                                            AO_t new_val1, AO_t new_val2)
     318             :     {
     319             :       char result;
     320             :       __asm__ __volatile__("lock; cmpxchg16b %0; setz %1"
     321             :                         : "=m"(*addr), "=a"(result)
     322             :                         : "m"(*addr), "d" (old_val2), "a" (old_val1),
     323             :                           "c" (new_val2), "b" (new_val1)
     324             :                         : "memory");
     325             :       return (int) result;
     326             :     }
     327             : #   define AO_HAVE_compare_double_and_swap_double_full
     328             : 
     329             : # elif defined(AO_WEAK_DOUBLE_CAS_EMULATION)
     330             : #   include "../standard_ao_double_t.h"
     331             : 
     332             :     /* This one provides spinlock based emulation of CAS implemented in */
     333             :     /* atomic_ops.c.  We probably do not want to do this here, since it */
     334             :     /* is not atomic with respect to other kinds of updates of *addr.   */
     335             :     /* On the other hand, this may be a useful facility on occasion.    */
     336             :     int AO_compare_double_and_swap_double_emulation(
     337             :                                                 volatile AO_double_t *addr,
     338             :                                                 AO_t old_val1, AO_t old_val2,
     339             :                                                 AO_t new_val1, AO_t new_val2);
     340             : 
     341             :     AO_INLINE int
     342             :     AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
     343             :                                            AO_t old_val1, AO_t old_val2,
     344             :                                            AO_t new_val1, AO_t new_val2)
     345             :     {
     346             :       return AO_compare_double_and_swap_double_emulation(addr,
     347             :                                 old_val1, old_val2, new_val1, new_val2);
     348             :     }
     349             : #   define AO_HAVE_compare_double_and_swap_double_full
     350             : # endif /* AO_WEAK_DOUBLE_CAS_EMULATION && !AO_CMPXCHG16B_AVAILABLE */
     351             : 
     352             : #endif /* x86_64 && !ILP32 */
     353             : 
     354             : /* Real X86 implementations, except for some old 32-bit WinChips,       */
     355             : /* appear to enforce ordering between memory operations, EXCEPT that    */
     356             : /* a later read can pass earlier writes, presumably due to the visible  */
     357             : /* presence of store buffers.                                           */
     358             : /* We ignore both the WinChips and the fact that the official specs     */
     359             : /* seem to be much weaker (and arguably too weak to be usable).         */
     360             : #include "../ordered_except_wr.h"

Generated by: LCOV version 1.11