| /* |
| datagen.c - compressible data generator test tool |
| Copyright (C) Yann Collet 2012-2015 |
| |
| GPL v2 License |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License along |
| with this program; if not, write to the Free Software Foundation, Inc., |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| |
| You can contact the author at : |
| - LZ4 source repository : http://code.google.com/p/lz4 |
| - LZ4 source mirror : https://github.com/Cyan4973/lz4 |
| - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c |
| */ |
| |
| /************************************** |
| Remove Visual warning messages |
| **************************************/ |
| #define _CRT_SECURE_NO_WARNINGS // fgets |
| |
| |
| /************************************** |
| Includes |
| **************************************/ |
| #include <stdio.h> // fgets, sscanf |
| #include <string.h> // strcmp |
| |
| |
| /************************************** |
| Basic Types |
| **************************************/ |
| #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ |
| # include <stdint.h> |
| typedef uint8_t BYTE; |
| typedef uint16_t U16; |
| typedef uint32_t U32; |
| typedef int32_t S32; |
| typedef uint64_t U64; |
| #else |
| typedef unsigned char BYTE; |
| typedef unsigned short U16; |
| typedef unsigned int U32; |
| typedef signed int S32; |
| typedef unsigned long long U64; |
| #endif |
| |
| |
| /************************************** |
| Constants |
| **************************************/ |
| #ifndef LZ4_VERSION |
| # define LZ4_VERSION "r125" |
| #endif |
| |
| #define KB *(1 <<10) |
| #define MB *(1 <<20) |
| #define GB *(1U<<30) |
| |
| #define CDG_SIZE_DEFAULT (64 KB) |
| #define CDG_SEED_DEFAULT 0 |
| #define CDG_COMPRESSIBILITY_DEFAULT 50 |
| #define PRIME1 2654435761U |
| #define PRIME2 2246822519U |
| |
| |
| /************************************** |
| Macros |
| **************************************/ |
| #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) |
| #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } |
| |
| |
| /************************************** |
| Local Parameters |
| **************************************/ |
| static unsigned no_prompt = 0; |
| static char* programName; |
| static unsigned displayLevel = 2; |
| |
| |
| /********************************************************* |
| functions |
| *********************************************************/ |
| |
| #define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) |
| static unsigned int CDG_rand(U32* src) |
| { |
| U32 rand32 = *src; |
| rand32 *= PRIME1; |
| rand32 += PRIME2; |
| rand32 = CDG_rotl32(rand32, 13); |
| *src = rand32; |
| return rand32; |
| } |
| |
| |
| #define CDG_RAND15BITS ((CDG_rand(seed) >> 3) & 32767) |
| #define CDG_RANDLENGTH ( ((CDG_rand(seed) >> 7) & 3) ? (CDG_rand(seed) % 14) : (CDG_rand(seed) & 511) + 15) |
| #define CDG_RANDCHAR (((CDG_rand(seed) >> 9) & 63) + '0') |
| static void CDG_generate(U64 size, U32* seed, double proba) |
| { |
| BYTE fullbuff[32 KB + 128 KB + 1]; |
| BYTE* buff = fullbuff + 32 KB; |
| U64 total=0; |
| U32 P32 = (U32)(32768 * proba); |
| U32 pos=1; |
| U32 genBlockSize = 128 KB; |
| |
| // Build initial prefix |
| fullbuff[0] = CDG_RANDCHAR; |
| while (pos<32 KB) |
| { |
| // Select : Literal (char) or Match (within 32K) |
| if (CDG_RAND15BITS < P32) |
| { |
| // Copy (within 64K) |
| U32 d; |
| int ref; |
| int length = CDG_RANDLENGTH + 4; |
| U32 offset = CDG_RAND15BITS + 1; |
| if (offset > pos) offset = pos; |
| ref = pos - offset; |
| d = pos + length; |
| while (pos < d) fullbuff[pos++] = fullbuff[ref++]; |
| } |
| else |
| { |
| // Literal (noise) |
| U32 d = pos + CDG_RANDLENGTH; |
| while (pos < d) fullbuff[pos++] = CDG_RANDCHAR; |
| } |
| } |
| |
| // Generate compressible data |
| pos = 0; |
| while (total < size) |
| { |
| if (size-total < 128 KB) genBlockSize = (U32)(size-total); |
| total += genBlockSize; |
| buff[genBlockSize] = 0; |
| pos = 0; |
| while (pos<genBlockSize) |
| { |
| // Select : Literal (char) or Match (within 32K) |
| if (CDG_RAND15BITS < P32) |
| { |
| // Copy (within 64K) |
| int ref; |
| U32 d; |
| int length = CDG_RANDLENGTH + 4; |
| U32 offset = CDG_RAND15BITS + 1; |
| if (pos + length > genBlockSize ) length = genBlockSize - pos; |
| ref = pos - offset; |
| d = pos + length; |
| while (pos < d) buff[pos++] = buff[ref++]; |
| } |
| else |
| { |
| // Literal (noise) |
| U32 d; |
| int length = CDG_RANDLENGTH; |
| if (pos + length > genBlockSize) length = genBlockSize - pos; |
| d = pos + length; |
| while (pos < d) buff[pos++] = CDG_RANDCHAR; |
| } |
| } |
| // output datagen |
| pos=0; |
| for (;pos+512<=genBlockSize;pos+=512) |
| printf("%512.512s", buff+pos); |
| for (;pos<genBlockSize;pos++) printf("%c", buff[pos]); |
| // Regenerate prefix |
| memcpy(fullbuff, buff + 96 KB, 32 KB); |
| } |
| } |
| |
| |
| int CDG_usage(void) |
| { |
| DISPLAY( "Compressible data generator\n"); |
| DISPLAY( "Usage :\n"); |
| DISPLAY( " %s [size] [args]\n", programName); |
| DISPLAY( "\n"); |
| DISPLAY( "Arguments :\n"); |
| DISPLAY( " -g# : generate # data (default:%i)\n", CDG_SIZE_DEFAULT); |
| DISPLAY( " -s# : Select seed (default:%i)\n", CDG_SEED_DEFAULT); |
| DISPLAY( " -p# : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT); |
| DISPLAY( " -h : display help and exit\n"); |
| return 0; |
| } |
| |
| |
| int main(int argc, char** argv) |
| { |
| int argNb; |
| int proba = CDG_COMPRESSIBILITY_DEFAULT; |
| U64 size = CDG_SIZE_DEFAULT; |
| U32 seed = CDG_SEED_DEFAULT; |
| |
| // Check command line |
| programName = argv[0]; |
| for(argNb=1; argNb<argc; argNb++) |
| { |
| char* argument = argv[argNb]; |
| |
| if(!argument) continue; // Protection if argument empty |
| |
| // Decode command (note : aggregated commands are allowed) |
| if (*argument=='-') |
| { |
| if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; } |
| |
| argument++; |
| while (*argument!=0) |
| { |
| switch(*argument) |
| { |
| case 'h': |
| return CDG_usage(); |
| case 'g': |
| argument++; |
| size=0; |
| while ((*argument>='0') && (*argument<='9')) |
| { |
| size *= 10; |
| size += *argument - '0'; |
| argument++; |
| } |
| if (*argument=='K') { size <<= 10; argument++; } |
| if (*argument=='M') { size <<= 20; argument++; } |
| if (*argument=='G') { size <<= 30; argument++; } |
| if (*argument=='B') { argument++; } |
| break; |
| case 's': |
| argument++; |
| seed=0; |
| while ((*argument>='0') && (*argument<='9')) |
| { |
| seed *= 10; |
| seed += *argument - '0'; |
| argument++; |
| } |
| break; |
| case 'p': |
| argument++; |
| proba=0; |
| while ((*argument>='0') && (*argument<='9')) |
| { |
| proba *= 10; |
| proba += *argument - '0'; |
| argument++; |
| } |
| if (proba<0) proba=0; |
| if (proba>100) proba=100; |
| break; |
| case 'v': |
| displayLevel = 4; |
| argument++; |
| break; |
| default: ; |
| } |
| } |
| |
| } |
| } |
| |
| // Get Seed |
| DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION); |
| DISPLAYLEVEL(3, "Seed = %u \n", seed); |
| if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba); |
| |
| CDG_generate(size, &seed, ((double)proba) / 100); |
| |
| return 0; |
| } |