Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 1 | |
| 2 | /* |
| 3 | * Copyright (C) Igor Sysoev |
| 4 | */ |
| 5 | |
| 6 | |
| 7 | #if (NGX_SMP) |
| 8 | #define NGX_SMP_LOCK "lock;" |
| 9 | #else |
| 10 | #define NGX_SMP_LOCK |
| 11 | #endif |
| 12 | |
| 13 | |
| 14 | /* |
| 15 | * "cmpxchgl r, [m]": |
| 16 | * |
| 17 | * if (eax == [m]) { |
| 18 | * zf = 1; |
| 19 | * [m] = r; |
| 20 | * } else { |
| 21 | * zf = 0; |
| 22 | * eax = [m]; |
| 23 | * } |
| 24 | * |
Igor Sysoev | fb0f94f | 2005-11-15 12:26:51 +0000 | [diff] [blame] | 25 | * |
Igor Sysoev | a234c0f | 2007-01-15 17:49:33 +0000 | [diff] [blame] | 26 | * The "r" means the general register. |
| 27 | * The "=a" and "a" are the %eax register. |
| 28 | * Although we can return result in any register, we use "a" because it is |
| 29 | * used in cmpxchgl anyway. The result is actually in %al but not in %eax, |
| 30 | * however, as the code is inlined gcc can test %al as well as %eax, |
| 31 | * and icc adds "movzbl %al, %eax" by itself. |
| 32 | * |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 33 | * The "cc" means that flags were changed. |
| 34 | */ |
| 35 | |
| 36 | static ngx_inline ngx_atomic_uint_t |
| 37 | ngx_atomic_cmp_set(ngx_atomic_t *lock, ngx_atomic_uint_t old, |
| 38 | ngx_atomic_uint_t set) |
| 39 | { |
Igor Sysoev | a234c0f | 2007-01-15 17:49:33 +0000 | [diff] [blame] | 40 | u_char res; |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 41 | |
| 42 | __asm__ volatile ( |
| 43 | |
| 44 | NGX_SMP_LOCK |
| 45 | " cmpxchgl %3, %1; " |
Igor Sysoev | a234c0f | 2007-01-15 17:49:33 +0000 | [diff] [blame] | 46 | " sete %0; " |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 47 | |
Igor Sysoev | a234c0f | 2007-01-15 17:49:33 +0000 | [diff] [blame] | 48 | : "=a" (res) : "m" (*lock), "a" (old), "r" (set) : "cc", "memory"); |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 49 | |
| 50 | return res; |
| 51 | } |
| 52 | |
| 53 | |
| 54 | /* |
| 55 | * "xaddl r, [m]": |
| 56 | * |
| 57 | * temp = [m]; |
| 58 | * [m] += r; |
| 59 | * r = temp; |
| 60 | * |
| 61 | * |
Igor Sysoev | a234c0f | 2007-01-15 17:49:33 +0000 | [diff] [blame] | 62 | * The "+r" means the general register. |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 63 | * The "cc" means that flags were changed. |
| 64 | */ |
| 65 | |
| 66 | |
Igor Sysoev | a961a37 | 2005-12-16 15:00:41 +0000 | [diff] [blame] | 67 | #if !(( __GNUC__ == 2 && __GNUC_MINOR__ <= 7 ) || ( __INTEL_COMPILER >= 800 )) |
| 68 | |
| 69 | /* |
| 70 | * icc 8.1 and 9.0 compile broken code with -march=pentium4 option: |
| 71 | * ngx_atomic_fetch_add() always return the input "add" value, |
| 72 | * so we use the gcc 2.7 version. |
| 73 | * |
| 74 | * icc 8.1 and 9.0 with -march=pentiumpro option or icc 7.1 compile |
| 75 | * correct code. |
| 76 | */ |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 77 | |
| 78 | static ngx_inline ngx_atomic_int_t |
| 79 | ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add) |
| 80 | { |
| 81 | __asm__ volatile ( |
| 82 | |
| 83 | NGX_SMP_LOCK |
| 84 | " xaddl %0, %1; " |
| 85 | |
Igor Sysoev | a234c0f | 2007-01-15 17:49:33 +0000 | [diff] [blame] | 86 | : "+r" (add) : "m" (*value) : "cc", "memory"); |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 87 | |
| 88 | return add; |
| 89 | } |
| 90 | |
| 91 | |
Igor Sysoev | a961a37 | 2005-12-16 15:00:41 +0000 | [diff] [blame] | 92 | #else |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 93 | |
| 94 | /* |
Igor Sysoev | a234c0f | 2007-01-15 17:49:33 +0000 | [diff] [blame] | 95 | * gcc 2.7 does not support "+r", so we have to use the fixed |
| 96 | * %eax ("=a" and "a") and this adds two superfluous instructions in the end |
| 97 | * of code, something like this: "mov %eax, %edx / mov %edx, %eax". |
Igor Sysoev | fd595b5 | 2005-10-12 13:45:55 +0000 | [diff] [blame] | 98 | */ |
| 99 | |
| 100 | static ngx_inline ngx_atomic_int_t |
| 101 | ngx_atomic_fetch_add(ngx_atomic_t *value, ngx_atomic_int_t add) |
| 102 | { |
| 103 | ngx_atomic_uint_t old; |
| 104 | |
| 105 | __asm__ volatile ( |
| 106 | |
| 107 | NGX_SMP_LOCK |
| 108 | " xaddl %2, %1; " |
| 109 | |
| 110 | : "=a" (old) : "m" (*value), "a" (add) : "cc", "memory"); |
| 111 | |
| 112 | return old; |
| 113 | } |
| 114 | |
| 115 | #endif |
Igor Sysoev | af6d700 | 2005-10-19 12:31:31 +0000 | [diff] [blame] | 116 | |
| 117 | |
| 118 | /* |
| 119 | * on x86 the write operations go in a program order, so we need only |
| 120 | * to disable the gcc reorder optimizations |
| 121 | */ |
| 122 | |
| 123 | #define ngx_memory_barrier() __asm__ volatile ("" ::: "memory") |
Igor Sysoev | 818adbf | 2006-02-08 15:28:30 +0000 | [diff] [blame] | 124 | |
| 125 | /* old as does not support "pause" opcode */ |
| 126 | #define ngx_cpu_pause() __asm__ (".byte 0xf3, 0x90") |