| #include "fio.h" |
| |
| #define HWD_BITS 64 |
| |
| static uint64_t next(void) { return fio_rand64(); } |
| |
| /* |
| * Copyright (C) 2004-2016 David Blackman. |
| * Copyright (C) 2017-2018 David Blackman and Sebastiano Vigna. |
| * |
| * This program is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * This program is distributed in the hope that it will be useful, but |
| * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, see <http://www.gnu.org/licenses/>. |
| * |
| */ |
| |
| #include <assert.h> |
| #include <fcntl.h> |
| #include <float.h> |
| #include <inttypes.h> |
| #include <math.h> |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <time.h> |
| #include <unistd.h> |
| #ifdef HWD_MMAP |
| #include <sys/mman.h> |
| #endif |
| |
| /* |
| HWD 1.1 (2018-05-24) |
| |
| This code implements the Hamming-weight dependency test based on z9 |
| from gjrand 4.2.1.0 and described in detail in |
| |
| David Blackman and Sebastiano Vigna, "Scrambled linear pseudorandom number |
| generators", 2018. |
| |
| Please refer to the paper for details about the test. |
| |
| To compile, you must define: |
| |
| - HWD_BITS, which is the width of the word tested (parameter w in the paper); |
| must be 32, 64, or 128. |
| - HWD_PRNG_BITS, which is the number of bits output by the PRNG, and it is by |
| default HWD_BITS. Presently legal combinations are 32/32, 32/64, |
| 64/64 and 128/64. |
| - Optionally HWD_DIM, which defines the length of the signatures examined |
| (parameter k in the paper). Valid values are between 1 and 19; |
| the default value is 8. |
| - Optionally, HWD_NOPOPCOUNT, if your compiler does not support gcc's |
| builtins. |
| - Optionally, HWD_NUMCATS, if you want to override the default number |
| of categories. Valid values are between 1 and HWD_DIM; the default value |
| is HWD_DIM/2 + 1. |
| - Optionally, HWD_MMAP if you want to allocate memory in huge pages using |
| mmap(). |
| |
| You must insert the code for your PRNG, providing a suitable next() |
| method (returning a uint32_t or a uint64_t, depending on HWD_PRNG_BITS) |
| at the HERE comment below. You may additionally initialize his state in |
| the main() if necessary. |
| */ |
| |
| #ifndef HWD_DIM |
| // This must be at most 19 |
| #define DIM (8) |
| #else |
| #define DIM (HWD_DIM) |
| #endif |
| |
| #ifndef HWD_NUMCATS |
| // This must be at most DIM |
| #define NUMCATS (DIM / 2 + 1) |
| #else |
| #define NUMCATS (HWD_NUMCATS) |
| #endif |
| |
| // Number of bits used for the sum in cs[] (small counters/sums). |
| #define SUM_BITS (19) |
| |
| // Compile-time computation of 3^DIM |
| #define SIZE \ |
| ((DIM >= 1 ? UINT64_C(3) : UINT64_C(1)) * (DIM >= 2 ? 3 : 1) * \ |
| (DIM >= 3 ? 3 : 1) * (DIM >= 4 ? 3 : 1) * (DIM >= 5 ? 3 : 1) * \ |
| (DIM >= 6 ? 3 : 1) * (DIM >= 7 ? 3 : 1) * (DIM >= 8 ? 3 : 1) * \ |
| (DIM >= 9 ? 3 : 1) * (DIM >= 10 ? 3 : 1) * (DIM >= 11 ? 3 : 1) * \ |
| (DIM >= 12 ? 3 : 1) * (DIM >= 13 ? 3 : 1) * (DIM >= 14 ? 3 : 1) * \ |
| (DIM >= 15 ? 3 : 1) * (DIM >= 16 ? 3 : 1) * (DIM >= 17 ? 3 : 1) * \ |
| (DIM >= 18 ? 3 : 1) * (DIM >= 19 ? 3 : 1)) |
| |
| // Fast division by 3; works up to DIM = 19. |
| #define DIV3(x) ((x)*UINT64_C(1431655766) >> 32) |
| |
| #ifndef HWD_PRNG_BITS |
| #define HWD_PRNG_BITS HWD_BITS |
| #endif |
| |
| // batch_size values MUST be even. P is the probability of a 1 trit. |
| |
| #if HWD_BITS == 32 |
| |
| #define P (0.40338510414585471153) |
| const int64_t batch_size[] = {-1, |
| UINT64_C(16904), |
| UINT64_C(37848), |
| UINT64_C(88680), |
| UINT64_C(213360), |
| UINT64_C(520784), |
| UINT64_C(1280664), |
| UINT64_C(3160976), |
| UINT64_C(7815952), |
| UINT64_C(19342248), |
| UINT64_C(47885112), |
| UINT64_C(118569000), |
| UINT64_C(293614056), |
| UINT64_C(727107408), |
| UINT64_C(1800643824), |
| UINT64_C(4459239480), |
| UINT64_C(11043223056), |
| UINT64_C(27348419104), |
| UINT64_C(67728213816), |
| UINT64_C(167728896072)}; |
| |
| #if HWD_PRNG_BITS == 64 |
| static uint64_t next(void); |
| #define TEST_ITERATIONS(b) ((b) / 2) |
| #elif HWD_PRNG_BITS == 32 |
| #define TEST_ITERATIONS(b) (b) |
| static uint32_t next(void); |
| #else |
| #error "Test 32-bit test supports PRNG of size 32 or 64" |
| #endif |
| |
| #elif HWD_BITS == 64 |
| |
| #define P (0.46769122397215788544) |
| const int64_t batch_size[] = {-1, |
| UINT64_C(14744), |
| UINT64_C(28320), |
| UINT64_C(56616), |
| UINT64_C(116264), |
| UINT64_C(242784), |
| UINT64_C(512040), |
| UINT64_C(1086096), |
| UINT64_C(2311072), |
| UINT64_C(4926224), |
| UINT64_C(10510376), |
| UINT64_C(22435504), |
| UINT64_C(47903280), |
| UINT64_C(102294608), |
| UINT64_C(218459240), |
| UINT64_C(466556056), |
| UINT64_C(996427288), |
| UINT64_C(2128099936), |
| UINT64_C(4545075936), |
| UINT64_C(9707156552)}; |
| |
| #if HWD_PRNG_BITS == 64 |
| #define TEST_ITERATIONS(b) (b) |
| static uint64_t next(void); |
| #else |
| #error "Test 64-bit test supports PRNGs of size 64" |
| #endif |
| |
| #elif HWD_BITS == 128 |
| |
| #define P (0.46373128592889397439) |
| const int64_t batch_size[] = {-1, |
| UINT64_C(14856), |
| UINT64_C(28792), |
| UINT64_C(58088), |
| UINT64_C(120392), |
| UINT64_C(253680), |
| UINT64_C(539816), |
| UINT64_C(1155104), |
| UINT64_C(2479360), |
| UINT64_C(5330680), |
| UINT64_C(11471256), |
| UINT64_C(24696808), |
| UINT64_C(53183328), |
| UINT64_C(114541856), |
| UINT64_C(246706584), |
| UINT64_C(531387952), |
| UINT64_C(1144590984), |
| UINT64_C(2465432776), |
| UINT64_C(5310537968), |
| UINT64_C(11438933136)}; |
| |
| #if HWD_PRNG_BITS == 64 |
| #define TEST_ITERATIONS(b) (b) |
| static uint64_t next(void); |
| #else |
| #error "Test 128-bit test supports PRNG of size 64" |
| #endif |
| |
| #else |
| #error "Please define HWD_BITS as 32, 64, or 128" |
| #endif |
| |
| #if HWD_BITS == 64 || HWD_BITS == 128 |
| |
| #define WTYPE uint64_t |
| #ifdef HWD_NO_POPCOUNT |
| static inline int popcount64(uint64_t x) { |
| x = x - ((x >> 1) & 0x5555555555555555); |
| x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); |
| x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f; |
| x = x + (x >> 8); |
| x = x + (x >> 16); |
| x = x + (x >> 32); |
| return x & 0x7f; |
| } |
| #else |
| #define popcount64(x) __builtin_popcountll(x) |
| #endif |
| |
| #else /* HWD_BITS == 32 */ |
| |
| #define WTYPE uint32_t |
| #ifdef HWD_NO_POPCOUNT |
| static inline int popcount32(uint32_t x) { |
| x = x - ((x >> 1) & 0x55555555); |
| x = (x & 0x33333333) + ((x >> 2) & 0x33333333); |
| x = (x + (x >> 4)) & 0x0f0f0f0f; |
| x = x + (x >> 8); |
| x = x + (x >> 16); |
| return x & 0x7f; |
| } |
| #else |
| #define popcount32(x) __builtin_popcount((uint32_t)x) |
| #endif |
| |
| #endif |
| |
| /* Probability that the smallest of n numbers in [0..1) is <= x . */ |
| static double pco_scale(double x, double n) { |
| if (x >= 1.0 || x <= 0.0) |
| return x; |
| |
| /* This is the result we want: return 1.0 - pow(1.0 - x, n); except the |
| important cases are with x very small so this method gives better |
| accuracy. */ |
| |
| return -expm1(log1p(-x) * n); |
| } |
| |
| /* The idea of the test is based around Hamming weights. We calculate the |
| average number of bits per BITS-bit word and how it depends on the |
| weights of the previous DIM words. There are SIZE different categories |
| for the previous words. For each one accumulate number of samples |
| (get_count(cs[j]) and count_sum[j].c) and number of bits per sample |
| (get_sum(cs[j]) and count_sum[j].s) . |
| |
| To increase cache hits, we pack a 13-bit unsigned counter (upper bits) |
| and a and a 19-bit unsigned sum of Hamming weights (lower bits) into a |
| uint32_t. It would make sense to use bitfields, but in this way |
| update_cs() can update both fields with a single sum. */ |
| |
| static inline int get_count(uint32_t cs) { return cs >> SUM_BITS; } |
| |
| static inline int get_sum(uint32_t cs) { return cs & ((1 << SUM_BITS) - 1); } |
| |
| /* We add bc to the sum field of *p then add 1 to the count field. */ |
| static inline void update_cs(int bc, uint32_t *p) { |
| *p += bc + (1 << SUM_BITS); |
| } |
| |
| #ifdef HWD_MMAP |
| // "Small" counters/sums |
| static uint32_t *cs; |
| |
| // "Large" counters/sums |
| static struct { |
| uint64_t c; |
| int64_t s; |
| } * count_sum; |
| #else |
| // "Small" counters/sums |
| static uint32_t cs[SIZE]; |
| |
| // "Large" counters/sums |
| static struct { |
| uint64_t c; |
| int64_t s; |
| } count_sum[SIZE]; |
| #endif |
| |
| #if HWD_BITS == 128 |
| |
| /* Keeps track of the sum of values, as is in this case it is not |
| guaranteed not to overflow (but probability is infinitesimal if the |
| source is random). */ |
| static int64_t tot_sums; |
| |
| /* Copy accumulated numbers out of cs[] into count_sum, then zero the ones |
| in cs[]. We have to check explicitly that values do not overflow. */ |
| |
| static void desat(const int64_t next_batch_size) { |
| int64_t c = 0, s = 0; |
| |
| for (int i = 0; i < SIZE; i++) { |
| const int32_t st = cs[i]; |
| |
| const int count = get_count(st); |
| const int sum = get_sum(st); |
| |
| c += count; |
| s += sum; |
| |
| count_sum[i].c += count; |
| /* In cs[] the total Hamming weight is stored as actual weight. In |
| count_sum, it is stored as difference from expected average |
| Hamming weight, hence (BITS/2) * count */ |
| count_sum[i].s += sum - (HWD_BITS / 2) * count; |
| cs[i] = 0; |
| } |
| |
| if (c != next_batch_size || s != tot_sums) { |
| fprintf(stderr, "Counters or values overflowed. Seriously non-random.\n"); |
| printf("p = %.3g\n", 1e-100); |
| exit(0); |
| } |
| } |
| |
| #else |
| |
| /* Copy accumulated numbers out of cs[] into count_sum, then zero the ones |
| in cs[]. Note it is impossible for totals to overflow unless counts do. */ |
| |
| static void desat(const int64_t next_batch_size) { |
| int64_t c = 0; |
| |
| for (uint64_t i = 0; i < SIZE; i++) { |
| const int32_t st = cs[i]; |
| const int count = get_count(st); |
| |
| c += count; |
| |
| count_sum[i].c += count; |
| /* In cs[] the total Hamming weight is stored as actual weight. In |
| count_sum, it is stored as difference from expected average |
| Hamming weight, hence (BITS/2) * ct */ |
| count_sum[i].s += get_sum(st) - (HWD_BITS / 2) * count; |
| cs[i] = 0; |
| } |
| |
| if (c != next_batch_size) { |
| fprintf(stderr, "Counters overflowed. Seriously non-random.\n"); |
| printf("p = %.3g\n", 1e-100); |
| exit(0); |
| } |
| } |
| |
| #endif |
| |
| /* sig is the last signature from the previous call. At each step it |
| contains an index into cs[], derived from the Hamming weights of the |
| previous DIM numbers. Considered as a base 3 number, the most |
| significant digit is the most recent trit. n is the batch size. */ |
| |
| #if HWD_BITS == 32 |
| |
| static inline uint32_t scan_batch(uint32_t sig, int64_t n, uint32_t *ts) { |
| uint32_t t = ts ? *ts : 0; |
| int bc; |
| |
| for (int64_t i = 0; i < n; i++) { |
| #if HWD_PRNG_BITS == 64 |
| const uint64_t w64 = next(); |
| uint32_t w32 = w64 >> 32; |
| if (ts) { |
| bc = popcount32(w32 ^ w32 << 1 ^ t); |
| t = w32 >> 31; |
| } else |
| bc = popcount32(w32); |
| |
| update_cs(bc, cs + sig); |
| sig = DIV3(sig) + ((bc >= 15) + (bc >= 18)) * (SIZE / 3); |
| |
| w32 = w64; |
| |
| if (ts) { |
| bc = popcount32(w32 ^ w32 << 1 ^ t); |
| t = w32 >> 31; |
| } else |
| bc = popcount32(w32); |
| |
| update_cs(bc, cs + sig); |
| sig = DIV3(sig) + ((bc >= 15) + (bc >= 18)) * (SIZE / 3); |
| #else |
| const uint32_t w = next(); |
| if (ts) { |
| bc = popcount32(w ^ w << 1 ^ t); |
| t = w >> 31; |
| } else |
| bc = popcount32(w); |
| |
| update_cs(bc, cs + sig); |
| sig = DIV3(sig) + ((bc >= 15) + (bc >= 18)) * (SIZE / 3); |
| #endif |
| } |
| |
| if (ts) |
| *ts = t; |
| /* return the current signature so it can be passed back in on the next batch |
| */ |
| return sig; |
| } |
| |
| #elif HWD_BITS == 64 |
| |
| static inline uint32_t scan_batch(uint32_t sig, int64_t n, uint64_t *ts) { |
| uint64_t t = ts ? *ts : 0; |
| int bc; |
| |
| for (int64_t i = 0; i < n; i++) { |
| const uint64_t w = next(); |
| |
| if (ts) { |
| bc = popcount64(w ^ w << 1 ^ t); |
| t = w >> 63; |
| } else |
| bc = popcount64(w); |
| |
| update_cs(bc, cs + sig); |
| sig = DIV3(sig) + ((bc >= 30) + (bc >= 35)) * (SIZE / 3); |
| } |
| |
| if (ts) |
| *ts = t; |
| /* return the current signature so it can be passed back in on the next batch |
| */ |
| return sig; |
| } |
| |
| #else |
| |
| static inline uint32_t scan_batch(uint32_t sig, int64_t n, uint64_t *ts) { |
| uint64_t t = ts ? *ts : 0; |
| int bc; |
| tot_sums = 0; // In this case we have to keep track of the values, too |
| |
| for (int64_t i = 0; i < n; i++) { |
| const uint64_t w0 = next(); |
| const uint64_t w1 = next(); |
| |
| if (ts) { |
| bc = popcount64(w0 ^ w0 << 1 ^ t); |
| bc += popcount64(w1 ^ (w1 << 1) ^ (w0 >> 63)); |
| t = w1 >> 63; |
| } else |
| bc = popcount64(w0) + popcount64(w1); |
| |
| tot_sums += bc; |
| update_cs(bc, cs + sig); |
| sig = DIV3(sig) + ((bc >= 61) + (bc >= 68)) * (SIZE / 3); |
| } |
| |
| if (ts) |
| *ts = t; |
| /* return the current signature so it can be passed back in on the next batch |
| */ |
| return sig; |
| } |
| |
| #endif |
| |
| /* Now we're out of the the accumulate phase, which is the inside loop. |
| Next is analysis. */ |
| |
| /* Mostly a debugging printf, though it can tell you a bit about the |
| structure of a prng when it fails. Print sig out in base 3, least |
| significant digits first. This means the most recent trit is the |
| rightmost. */ |
| |
| static void print_sig(uint32_t sig) { |
| for (uint64_t i = DIM; i > 0; i--) { |
| putchar(sig % 3 + '0'); |
| sig /= 3; |
| } |
| } |
| |
| #ifndef M_SQRT1_2 |
| /* 1.0/sqrt(2.0) */ |
| #define M_SQRT1_2 0.70710678118654752438 |
| #endif |
| /* 1.0/sqrt(3.0) */ |
| #define CORRECT3 0.57735026918962576451 |
| /* 1.0/sqrt(6.0) */ |
| #define CORRECT6 0.40824829046386301636 |
| |
| /* This is a transform similar in spirit to the Walsh-Hadamard transform |
| (see the paper). It's ortho-normal. So with independent normal |
| distribution mean 0 standard deviation 1 in, we get independent normal |
| distribution mean 0 standard deviation 1 out, except maybe for element 0. |
| And of course, for certain kinds of bad prngs when the null hypthosis is |
| false, some of these numbers will get extreme. */ |
| |
| static void mix3(double *ct, int sig) { |
| double *p1 = ct + sig, *p2 = p1 + sig; |
| double a, b, c; |
| |
| for (int i = 0; i < sig; i++) { |
| a = ct[i]; |
| b = p1[i]; |
| c = p2[i]; |
| ct[i] = (a + b + c) * CORRECT3; |
| p1[i] = (a - c) * M_SQRT1_2; |
| p2[i] = (2 * b - a - c) * CORRECT6; |
| } |
| |
| sig = DIV3(sig); |
| if (sig) { |
| mix3(ct, sig); |
| mix3(p1, sig); |
| mix3(p2, sig); |
| } |
| } |
| |
| /* categorise sig based on nonzero ternary digits. */ |
| static int cat(uint32_t sig) { |
| int r = 0; |
| |
| while (sig) { |
| r += (sig % 3) != 0; |
| sig /= 3; |
| } |
| |
| return (r >= NUMCATS ? NUMCATS : r) - 1; |
| } |
| |
| /* Apply the transform; then, compute, log and return the resulting p-value. */ |
| |
| #ifdef HWD_MMAP |
| static double *norm; |
| #else |
| static double norm[SIZE]; // This might be large |
| #endif |
| |
| static double compute_pvalue(const bool trans) { |
| const double db = HWD_BITS * 0.25; |
| |
| for (uint64_t i = 0; i < SIZE; i++) { |
| /* copy the bit count totals from count_sum[i].s to norm[i] with |
| normalisation. We expect mean 0 standard deviation 1 db is the |
| expected variance for Hamming weight of BITS-bit words. |
| count_sum[i].c is number of samples */ |
| if (count_sum[i].c == 0) |
| norm[i] = 0.0; |
| else |
| norm[i] = count_sum[i].s / sqrt(count_sum[i].c * db); |
| } |
| |
| /* The transform. The wonderful transform. After this we expect still |
| normalised to mean 0 stdev 1 under the null hypothesis. (But not for |
| element 0 which we will ignore.) */ |
| mix3(norm, SIZE / 3); |
| |
| double overall_pvalue = DBL_MAX; |
| |
| /* To make the test more sensitive (see the paper) we split the |
| elements of norm into NUMCAT categories. These are based only on the |
| index into norm, not the content. We go though norm[], decide which |
| category each one is in, and record the signature (sig[]) and the |
| absolute value (sigma[]) For the most extreme value in each |
| category. Also a count (cat_count[]) of how many were in each |
| category. */ |
| |
| double sigma[NUMCATS]; |
| uint32_t sig[NUMCATS], cat_count[NUMCATS] = {}; |
| for (int i = 0; i < NUMCATS; i++) |
| sigma[i] = DBL_MIN; |
| |
| for (uint64_t i = 1; i < SIZE; i++) { |
| const int c = cat(i); |
| cat_count[c]++; |
| const double x = fabs(norm[i]); |
| if (x > sigma[c]) { |
| sig[c] = i; |
| sigma[c] = x; |
| } |
| } |
| |
| /* For each category, calculate a p-value, put the lowest into |
| overall_pvalue, and print something out. */ |
| for (int i = 0; i < NUMCATS; i++) { |
| printf("mix3 extreme = %.5f (sig = ", sigma[i]); |
| print_sig(sig[i]); |
| /* convert absolute value of approximate normal into p-value. */ |
| double pvalue = erfc(M_SQRT1_2 * sigma[i]); |
| /* Ok, that's the lowest p-value cherry picked out of a choice of |
| cat_count[i] of them. Must correct for that. */ |
| pvalue = pco_scale(pvalue, cat_count[i]); |
| printf(") weight %s%d (%" PRIu32 "), p-value = %.3g\n", |
| i == NUMCATS - 1 ? ">=" : "", i + 1, cat_count[i], pvalue); |
| if (pvalue < overall_pvalue) |
| overall_pvalue = pvalue; |
| } |
| |
| printf("bits per word = %d (analyzing %s); min category p-value = %.3g\n\n", |
| HWD_BITS, trans ? "transitions" : "bits", overall_pvalue); |
| /* again, we're cherry picking worst of NUMCATS, so correct it again. */ |
| return pco_scale(overall_pvalue, NUMCATS); |
| } |
| |
| static time_t tstart; |
| static double low_pvalue = DBL_MIN; |
| |
| /* This is the call made when we want to print some analysis. This will be |
| done multiple times if --progress is used. */ |
| static void analyze(int64_t pos, bool trans, bool final) { |
| |
| if (pos < 2 * pow(2.0 / (1.0 - P), DIM)) |
| printf("WARNING: p-values are unreliable, you have to wait (insufficient " |
| "data for meaningful answer)\n"); |
| |
| const double pvalue = compute_pvalue(trans); |
| const time_t tm = time(0); |
| |
| printf("processed %.3g bytes in %.3g seconds (%.4g GB/s, %.4g TB/h). %s\n", |
| (double)pos, (double)(tm - tstart), pos * 1E-9 / (double)(tm - tstart), |
| pos * (3600 * 1E-12) / (double)(tm - tstart), ctime(&tm)); |
| |
| if (final) |
| printf("final\n"); |
| printf("p = %.3g\n", pvalue); |
| |
| if (pvalue < low_pvalue) |
| exit(0); |
| |
| if (!final) |
| printf("------\n\n"); |
| } |
| |
| static int64_t progsize[] = { |
| 100000000, 125000000, 150000000, 175000000, 200000000, 250000000, 300000000, |
| 400000000, 500000000, 600000000, 700000000, 850000000, 0}; |
| |
| /* We use the all-one signature (the most probable) as initial signature. */ |
| static int64_t pos; |
| static uint32_t last_sig = (SIZE - 1) / 2; |
| static WTYPE ts; |
| static int64_t next_progr = 100000000; // progsize[0] |
| static int progr_index; |
| |
| static void run_test(const int64_t n, const bool trans, const bool progress) { |
| |
| WTYPE *const p = trans ? &ts : NULL; |
| |
| while (n < 0 || pos < n) { |
| int64_t next_batch_size = batch_size[DIM]; |
| if (n >= 0 && (n - pos) / (HWD_BITS / 8) < next_batch_size) |
| next_batch_size = (n - pos) / (HWD_BITS / 8) & ~UINT64_C(7); |
| |
| if (next_batch_size == 0) |
| break; |
| /* TEST_ITERATIONS() corrects batch_size depending on HWD_BITS and |
| * HWD_PRNG_BITS */ |
| last_sig = scan_batch(last_sig, TEST_ITERATIONS(next_batch_size), p); |
| desat(next_batch_size); |
| pos += next_batch_size * (HWD_BITS / 8); |
| |
| if (progress && pos >= next_progr) { |
| analyze(pos, trans, false); |
| progsize[progr_index++] *= 10; |
| next_progr = progsize[progr_index]; |
| if (next_progr == 0) { |
| progr_index = 0; |
| next_progr = progsize[0]; |
| } |
| } |
| } |
| |
| analyze(pos, trans, true); |
| } |
| |
| int main(int argc, char **argv) { |
| double dn; |
| int64_t n = -1; |
| bool trans = false, progress = false; |
| |
| #ifdef HWD_MMAP |
| fprintf(stderr, "Allocating memory via mmap()... "); |
| // (SIZE + 1) is necessary for a correct memory alignment. |
| cs = mmap( |
| (void *)(0x0UL), |
| (SIZE + 1) * sizeof *cs + SIZE * sizeof *norm + SIZE * sizeof *count_sum, |
| PROT_READ | PROT_WRITE, |
| MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | (30 << MAP_HUGE_SHIFT), 0, 0); |
| if (cs == MAP_FAILED) { |
| fprintf(stderr, "Failed.\n"); |
| exit(1); |
| } |
| fprintf(stderr, "OK.\n"); |
| norm = (void *)(cs + SIZE + 1); |
| count_sum = (void *)(norm + SIZE); |
| #endif |
| |
| tstart = time(0); |
| |
| for (int i = 1; i < argc; i++) { |
| if (strcmp(argv[i], "--progress") == 0) |
| progress = true; |
| else if (strcmp(argv[i], "-t") == 0) |
| trans = true; |
| else if (sscanf(argv[i], "%lf", &dn) == 1) |
| n = (int64_t)dn; |
| else if (sscanf(argv[i], "--low-pv=%lf", &low_pvalue) == 1) { |
| } else { |
| fprintf(stderr, "Optional arg must be --progress or -t or " |
| "--low-pv=number or numeric\n"); |
| exit(1); |
| } |
| } |
| |
| if (n <= 0) |
| progress = true; |
| |
| run_test(n, trans, progress); |
| |
| exit(0); |
| } |