blob: 6cd822e0ec6e5605d4e106e27aba17a55fc73300 [file] [log] [blame] [raw]
#ifndef H_FIO_STRING_H
/*
Copyright: Boaz Segev, 2018
License: MIT
*/
/**
* A Dynamic String C library for ease of use and binary strings.
*
* This is different from the fio.h library in the sense that it does NOT
* include a built-in reference counter.
*
* The string is a simple byte string which is compatible with binary data (NUL
* is a valid byte).
*
* Example use:
*
* fio_str_s str = FIO_STR_INIT; // container on the stack.
* fio_str_write(&str, "hello", 5); // add / remove / read data...
* printf("String: %s", fio_str_data(&str)); // print data
* fio_str_free(&str) // free the data - NOT the container.
*
* Should work with both 32bit and 64bit architectures.
*/
#define H_FIO_STRING_H
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#if defined(__unix__) || defined(__APPLE__) || defined(__linux__)
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#ifndef FIO_FUNC
#define FIO_FUNC static __attribute__((unused))
#endif
#ifndef FIO_ASSERT_ALLOC
/** Tests for an allocation failure. The behavior can be overridden. */
#define FIO_ASSERT_ALLOC(ptr) \
if (!(ptr)) { \
perror("FATAL ERROR: no memory (for string allocation)"); \
exit(errno); \
}
#endif
/* *****************************************************************************
String API - Initialization and Destruction
***************************************************************************** */
/**
* The `fio_str_s` type should be considered opaque.
*
* The type's attributes should be accessed ONLY through the accessor functions:
* `fio_str_state`, `fio_str_len`, `fio_str_data`, `fio_str_capa`, etc'.
*
* Note: when the `small` flag is present, the structure is ignored and used as
* raw memory for a small String (no aditional allocation). This changes the
* String's behavior drastically and requires that the accessor functions be
* used.
*/
typedef struct {
uint8_t small; /* Flag indicating the String is small and self-contained */
uint8_t frozen; /* Flag indicating the String is frozen (don't edit) */
uint8_t reserved[sizeof(size_t) - (sizeof(uint8_t) * 2)]; /* padding */
size_t capa; /* Known capacity for longer Strings */
size_t len; /* String length for longer Strings */
char *data; /* Data for longer Strings */
} fio_str_s;
/**
* This value should be used for initialization. For example:
*
* // on the stack
* fio_str_s str = FIO_STR_INIT;
*
* // or on the heap
* fio_str_s *str = malloc(sizeof(*str);
* *str = FIO_STR_INIT;
*
* Remember to cleanup:
*
* // on the stack
* fio_str_free(&str);
*
* // or on the heap
* fio_str_free(str);
* free(str);
*/
#define FIO_STR_INIT ((fio_str_s){.data = NULL, .small = 1})
/**
* This macro allows the container to be initialized with existing data, as long
* as it's memory was allocated using `malloc`.
*
* The `capacity` value should exclude the NUL character (if exists).
*/
#define FIO_STR_INIT_EXISTING(buffer, length, capacity) \
((fio_str_s){.data = (buffer), .len = (length), .capa = (capacity)})
/**
* Frees the String's resources and _reinitializes the container_.
*
* Note: if the container isn't allocated on the stack, it should be freed
* separately using `free(s)`.
*/
inline FIO_FUNC void fio_str_free(fio_str_s *s);
/* *****************************************************************************
String API - String state (data pointers, length, capacity, etc')
***************************************************************************** */
/** String state information. */
typedef struct {
size_t capa; /* Buffer capacity. */
size_t len; /* String length. */
char *data; /* String's first byte. */
} fio_str_state_s;
/** Returns the String's complete state (capacity, length and pointer). */
inline FIO_FUNC fio_str_state_s fio_str_state(const fio_str_s *s);
/** Returns the String's length in bytes. */
inline FIO_FUNC size_t fio_str_len(fio_str_s *s);
/** Returns a pointer (`char *`) to the String's content. */
inline FIO_FUNC char *fio_str_data(fio_str_s *s);
/** Returns a byte pointer (`uint8_t *`) to the String's unsigned content. */
#define fio_str_bytes(s) ((uint8_t *)fio_str_data((s)))
/** Returns the String's existing capacity (total used & available memory). */
inline FIO_FUNC size_t fio_str_capa(fio_str_s *s);
/**
* Sets the new String size without reallocating any memory (limited by
* existing capacity).
*
* Returns the updated state of the String.
*
* Note: When shrinking, any existing data beyond the new size may be corrupted.
*/
inline FIO_FUNC fio_str_state_s fio_str_resize(fio_str_s *s, size_t size);
/**
* Clears the string (retaining the existing capacity).
*/
#define fio_str_clear(s) fio_str_resize((s), 0)
/* *****************************************************************************
String API - Memory management
***************************************************************************** */
/**
* Performs a best attempt at minimizing memory consumption.
*
* Actual effects depend on the underlying memory allocator and it's
* implementation. Not all allocators will free any memory.
*/
inline FIO_FUNC void fio_str_compact(fio_str_s *s);
/**
* Requires the String to have at least `needed` capacity. Returns the current
* state of the String.
*/
FIO_FUNC fio_str_state_s fio_str_capa_assert(fio_str_s *s, size_t needed);
/* *****************************************************************************
String API - UTF-8 State
***************************************************************************** */
/** Returns 1 if the String is UTF-8 valid and 0 if not. */
inline FIO_FUNC size_t fio_str_utf8_valid(fio_str_s *s);
/** Returns the String's length in UTF-8 characters. */
FIO_FUNC size_t fio_str_utf8_len(fio_str_s *s);
/**
* Takes a UTF-8 character selection information (UTF-8 position and length) and
* updates the same variables so they reference the raw byte slice information.
*
* If the String isn't UTF-8 valid up to the requested selection, than `pos`
* will be updated to `-1` otherwise values are always positive.
*
* The returned `len` value may be shorter than the original if there wasn't
* enough data left to accomodate the requested length. When a `len` value of
* `0` is returned, this means that `pos` marks the end of the String.
*
* Returns -1 on error and 0 on success.
*/
FIO_FUNC int fio_str_utf8_select(fio_str_s *s, intptr_t *pos, size_t *len);
/**
* Advances the `ptr` by one utf-8 character, placing the value of the UTF-8
* character into the i32 variable (which must be a signed integer with 32bits
* or more). On error, `i32` will be equal to `-1` and `ptr` will not step
* forwards.
*
* The `end` value is only used for overflow protection.
*
* This helper macro is used internally but left exposed for external use.
*/
#define FIO_STR_UTF8_CODE_POINT(ptr, end, i32)
/* *****************************************************************************
String API - Content Manipulation and Review
***************************************************************************** */
/**
* Writes data at the end of the String (similar to `fio_str_insert` with the
* argument `pos == -1`).
*/
inline FIO_FUNC fio_str_state_s fio_str_write(fio_str_s *s, const void *src,
size_t src_len);
/**
* Appens the `src` String to the end of the `dest` String.
*
* If `src` is empty, the resulting Strings will be equal.
*/
inline FIO_FUNC fio_str_state_s fio_str_concat(fio_str_s *dest,
fio_str_s const *src);
/**
* Replaces the data in the String - replacing `old_len` bytes starting at
* `start_pos`, with the data at `src` (`src_len` bytes long).
*
* Negative `start_pos` values are calculated backwards, `-1` == end of String.
*
* When `old_len` is zero, the function will insert the data at `start_pos`.
*
* If `src_len == 0` than `src` will be ignored and the data marked for
* replacement will be erased.
*/
inline FIO_FUNC fio_str_state_s fio_str_replace(fio_str_s *s,
intptr_t start_pos,
size_t old_len, const void *src,
size_t src_len);
/**
* Writes to the String using a vprintf like interface.
*
* Data is written to the end of the String.
*/
FIO_FUNC fio_str_state_s fio_str_vprintf(fio_str_s *s, const char *format,
va_list argv);
/**
* Writes to the String using a printf like interface.
*
* Data is written to the end of the String.
*/
FIO_FUNC fio_str_state_s fio_str_printf(fio_str_s *s, const char *format, ...);
/**
* Opens the file `filename` and pastes it's contents (or a slice ot it) at the
* end of the String. If `limit == 0`, than the data will be read until EOF.
*
* If the file can't be located, opened or read, or if `start_at` is beyond
* the EOF position, NULL is returned in the state's `data` field.
*
* Works on POSIX only.
*/
inline FIO_FUNC fio_str_state_s fio_str_fread(fio_str_s *s,
const char *filename,
intptr_t start_at,
intptr_t limit);
/**
* Prevents further manipulations to the String's content.
*/
inline FIO_FUNC void fio_str_freeze(fio_str_s *s);
/**
* Binary comparison returns `1` if both strings are equal and `0` if not.
*/
inline FIO_FUNC int fio_str_iseq(const fio_str_s *str1, const fio_str_s *str2);
/* *****************************************************************************
IMPLEMENTATION
***************************************************************************** */
/* *****************************************************************************
Implementation - String state (data pointers, length, capacity, etc')
***************************************************************************** */
/* the capacity when the string is stored in the container itself */
#define FIO_STR_SMALL_CAPA \
(sizeof(fio_str_s) - (size_t)(&((fio_str_s *)0)->reserved))
typedef struct {
uint8_t small;
uint8_t frozen;
char data[1];
} fio_str__small_s;
/** Returns the String's state (capacity, length and pointer). */
inline FIO_FUNC fio_str_state_s fio_str_state(const fio_str_s *s) {
if (!s)
return (fio_str_state_s){.capa = 0};
return (s->small || !s->data)
? (fio_str_state_s){.capa =
(s->frozen ? 0 : (FIO_STR_SMALL_CAPA - 1)),
.len = (size_t)(s->small >> 1),
.data = ((fio_str__small_s *)s)->data}
: (fio_str_state_s){.capa = (s->frozen ? 0 : s->capa),
.len = s->len,
.data = s->data};
}
/**
* Frees the String's resources and reinitializes the container.
*
* Note: if the container isn't allocated on the stack, it should be freed
* separately using `free(s)`.
*/
inline FIO_FUNC void fio_str_free(fio_str_s *s) {
if (!s->small)
free(s->data);
*s = FIO_STR_INIT;
}
/** Returns the String's length in bytes. */
inline FIO_FUNC size_t fio_str_len(fio_str_s *s) {
return (s->small || !s->data) ? (s->small >> 1) : s->len;
}
/** Returns a pointer (`char *`) to the String's content. */
inline FIO_FUNC char *fio_str_data(fio_str_s *s) {
return (s->small || !s->data) ? (((fio_str__small_s *)s)->data) : s->data;
}
/** Returns the String's existing capacity (allocated memory). */
inline FIO_FUNC size_t fio_str_capa(fio_str_s *s) {
return (s->small || !s->data) ? (FIO_STR_SMALL_CAPA - 1) : s->capa;
}
/**
* Sets the new String size without reallocating any memory (limited by
* existing capacity).
*
* Returns the updated state of the String.
*
* Note: When shrinking, any existing data beyond the new size may be corrupted.
*/
inline FIO_FUNC fio_str_state_s fio_str_resize(fio_str_s *s, size_t size) {
if (!s || s->frozen) {
return fio_str_state(s);
}
fio_str_capa_assert(s, size);
if (s->small || !s->data) {
s->small = (uint8_t)(((size << 1) | 1) & 0xFF);
((fio_str__small_s *)s)->data[size] = 0;
return (fio_str_state_s){.capa = (FIO_STR_SMALL_CAPA - 1),
.len = size,
.data = ((fio_str__small_s *)s)->data};
}
s->len = size;
s->data[size] = 0;
return (fio_str_state_s){.capa = s->capa, .len = size, .data = s->data};
}
/* *****************************************************************************
Implementation - Memory management
***************************************************************************** */
/**
* Rounds up allocated capacity to the closest 2 words byte boundary (leaving 1
* byte space for the NUL byte).
*
* This shouldn't effect actual allocation size and should only minimize the
* effects of the memory allocator's alignment rounding scheme.
*
* To clarify:
*
* Memory allocators are required to allocate memory on the minimal alignment
* required by the largest type (`long double`), which usually results in memory
* allocations using this alignment as a minimal spacing.
*
* For example, on 64 bit architectures, it's likely that `malloc(18)` will
* allocate the same amount of memory as `malloc(32)` due to alignment.
*
* In fact, on some allocators (i.e., jemalloc), spacing increases for larger
* allocations - meaning the allocator will round up to more than 16 bytes, as
* noted here: http://jemalloc.net/jemalloc.3.html#size_classes
*
* Note that this increased spacing, doesn't occure with facil.io's `fio_mem.h`
* allocator, since it uses 16 byte alignment right up until allocations are
* routed directly to `mmap` (due to their size, usually over 12KB).
*/
#define ROUND_UP_CAPA_2WORDS(num) \
(((num + 1) & (sizeof(long double) - 1)) \
? ((num + 1) | (sizeof(long double) - 1)) \
: (num))
/**
* Requires the String to have at least `needed` capacity. Returns the current
* state of the String.
*/
FIO_FUNC fio_str_state_s fio_str_capa_assert(fio_str_s *s, size_t needed) {
if (!s)
return (fio_str_state_s){.capa = 0};
char *tmp;
if (s->small || !s->data) {
goto is_small;
}
if (needed > s->capa) {
needed = ROUND_UP_CAPA_2WORDS(needed);
tmp = (char *)realloc(s->data, needed + 1);
FIO_ASSERT_ALLOC(tmp);
s->capa = needed;
s->data = tmp;
s->data[needed] = 0;
}
return (fio_str_state_s){
.capa = (s->frozen ? 0 : s->capa), .len = s->len, .data = s->data};
is_small:
/* small string (string data is within the container) */
if (needed < FIO_STR_SMALL_CAPA) {
return (fio_str_state_s){.capa = (s->frozen ? 0 : (FIO_STR_SMALL_CAPA - 1)),
.len = (size_t)(s->small >> 1),
.data = ((fio_str__small_s *)s)->data};
}
needed = ROUND_UP_CAPA_2WORDS(needed);
tmp = (char *)malloc(needed + 1);
FIO_ASSERT_ALLOC(tmp);
const size_t existing_len = (size_t)((s->small >> 1) & 0xFF);
if (existing_len) {
memcpy(tmp, ((fio_str__small_s *)s)->data, existing_len + 1);
} else {
tmp[0] = 0;
}
*s = (fio_str_s){
.small = 0,
.capa = needed,
.len = existing_len,
.data = tmp,
};
return (fio_str_state_s){
.capa = (s->frozen ? 0 : needed), .len = existing_len, .data = s->data};
}
/** Performs a best attempt at minimizing memory consumption. */
inline FIO_FUNC void fio_str_compact(fio_str_s *s) {
if (!s || (s->small || !s->data))
return;
char *tmp;
if (s->len < FIO_STR_SMALL_CAPA)
goto shrink2small;
tmp = realloc(s->data, s->len + 1);
FIO_ASSERT_ALLOC(tmp);
s->data = tmp;
s->capa = s->len;
return;
shrink2small:
/* move the string into the container */
tmp = s->data;
size_t len = s->len;
*s = (fio_str_s){.small = (uint8_t)(((len << 1) | 1) & 0xFF),
.frozen = s->frozen};
if (len) {
memcpy(((fio_str__small_s *)s)->data, tmp, len + 1);
}
free(tmp);
}
/* *****************************************************************************
Implementation - UTF-8 State
***************************************************************************** */
/**
* Maps the last 5 bits in a byte (0b11111xxx) to a UTF-8 codepoint length.
*
* Codepoint length 0 == error.
*
* The first valid length can be any value between 1 to 4.
*
* An intermidiate (second, third or forth) valid length must be 5.
*
* To map was populated using the following Ruby script:
*
* map = []; 32.times { map << 0 }; (0..0b1111).each {|i| map[i] = 1} ;
* (0b10000..0b10111).each {|i| map[i] = 5} ;
* (0b11000..0b11011).each {|i| map[i] = 2} ;
* (0b11100..0b11101).each {|i| map[i] = 3} ;
* map[0b11110] = 4; map;
*/
static uint8_t fio_str_utf8_map[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5,
5, 5, 2, 2, 2, 2, 3, 3, 4, 0};
#undef FIO_STR_UTF8_CODE_POINT
/**
* Advances the `ptr` by one utf-8 character, placing the value of the UTF-8
* character into the i32 variable (which must be a signed integer with 32bits
* or more). On error, `i32` will be equal to `-1` and `ptr` will not step
* forwards.
*
* The `end` value is only used for overflow protection.
*/
#define FIO_STR_UTF8_CODE_POINT(ptr, end, i32) \
do { \
switch (fio_str_utf8_map[((uint8_t *)(ptr))[0] >> 3]) { \
case 1: \
(i32) = ((uint8_t *)(ptr))[0]; \
++(ptr); \
break; \
case 2: \
if (((ptr) + 2 > (end)) || \
fio_str_utf8_map[((uint8_t *)(ptr))[1] >> 3] != 5) { \
(i32) = -1; \
break; \
} \
(i32) = \
((((uint8_t *)(ptr))[0] & 31) << 6) | (((uint8_t *)(ptr))[1] & 63); \
(ptr) += 2; \
break; \
case 3: \
if (((ptr) + 3 > (end)) || \
fio_str_utf8_map[((uint8_t *)(ptr))[1] >> 3] != 5 || \
fio_str_utf8_map[((uint8_t *)(ptr))[2] >> 3] != 5) { \
(i32) = -1; \
break; \
} \
(i32) = ((((uint8_t *)(ptr))[0] & 15) << 12) | \
((((uint8_t *)(ptr))[1] & 63) << 6) | \
(((uint8_t *)(ptr))[2] & 63); \
(ptr) += 3; \
break; \
case 4: \
if (((ptr) + 4 > (end)) || \
fio_str_utf8_map[((uint8_t *)(ptr))[1] >> 3] != 5 || \
fio_str_utf8_map[((uint8_t *)(ptr))[2] >> 3] != 5 || \
fio_str_utf8_map[((uint8_t *)(ptr))[3] >> 3] != 5) { \
(i32) = -1; \
break; \
} \
(i32) = ((((uint8_t *)(ptr))[0] & 7) << 18) | \
((((uint8_t *)(ptr))[1] & 63) << 12) | \
((((uint8_t *)(ptr))[2] & 63) << 6) | \
(((uint8_t *)(ptr))[3] & 63); \
(ptr) += 4; \
break; \
default: \
(i32) = -1; \
break; \
} \
} while (0);
/** Returns 1 if the String is UTF-8 valid and 0 if not. */
inline FIO_FUNC size_t fio_str_utf8_valid(fio_str_s *s) {
if (!s)
return 0;
fio_str_state_s state = fio_str_state(s);
if (!state.len)
return 1;
char *const end = state.data + state.len;
int32_t c = 0;
do {
FIO_STR_UTF8_CODE_POINT(state.data, end, c);
} while (c > 0 && state.data < end);
return state.data == end && c >= 0;
}
/** Returns the String's length in UTF-8 characters. */
FIO_FUNC size_t fio_str_utf8_len(fio_str_s *s) {
fio_str_state_s state = fio_str_state(s);
if (!state.len)
return 0;
char *end = state.data + state.len;
size_t utf8len = 0;
int32_t c = 0;
do {
++utf8len;
FIO_STR_UTF8_CODE_POINT(state.data, end, c);
} while (c > 0 && state.data < end);
if (state.data != end || c == -1) {
/* invalid */
return 0;
}
return utf8len;
}
/**
* Takes a UTF-8 character selection information (UTF-8 position and length) and
* updates the same variables so they reference the raw byte slice information.
*
* If the String isn't UTF-8 valid up to the requested selection, than `pos`
* will be updated to `-1` otherwise values are always positive.
*
* The returned `len` value may be shorter than the original if there wasn't
* enough data left to accomodate the requested length. When a `len` value of
* `0` is returned, this means that `pos` marks the end of the String.
*
* Returns -1 on error and 0 on success.
*/
FIO_FUNC int fio_str_utf8_select(fio_str_s *s, intptr_t *pos, size_t *len) {
fio_str_state_s state = fio_str_state(s);
if (!state.data)
goto error;
if (!state.len || *pos == -1)
goto at_end;
int32_t c = 0;
char *p = state.data;
char *const end = state.data + state.len;
size_t start;
if (*pos) {
if ((*pos) > 0) {
start = *pos;
while (start && p < end && c >= 0) {
FIO_STR_UTF8_CODE_POINT(p, end, c);
--start;
}
if (c == -1)
goto error;
if (start || p >= end)
goto at_end;
*pos = p - state.data;
} else {
/* walk backwards */
p = state.data + state.len - 1;
c = 0;
++*pos;
do {
switch (fio_str_utf8_map[((uint8_t *)p)[0] >> 3]) {
case 5:
++c;
break;
case 4:
if (c != 3)
goto error;
c = 0;
++(*pos);
break;
case 3:
if (c != 2)
goto error;
c = 0;
++(*pos);
break;
case 2:
if (c != 1)
goto error;
c = 0;
++(*pos);
break;
case 1:
if (c)
goto error;
++(*pos);
break;
default:
goto error;
}
--p;
} while (p > state.data && *pos);
if (c)
goto error;
++p; /* There's always an extra back-step */
*pos = (p - state.data);
}
}
/* find end */
start = *len;
while (start && p < end && c >= 0) {
FIO_STR_UTF8_CODE_POINT(p, end, c);
--start;
}
if (c == -1 || p > end)
goto error;
*len = p - (state.data + (*pos));
return 0;
at_end:
*pos = state.len;
*len = 0;
return 0;
error:
*pos = -1;
*len = 0;
return -1;
}
/* *****************************************************************************
Implementation - Content Manipulation and Review
***************************************************************************** */
/**
* Writes data at the end of the String (similar to `fio_str_insert` with the
* argument `pos == -1`).
*/
inline FIO_FUNC fio_str_state_s fio_str_write(fio_str_s *s, const void *src,
size_t src_len) {
if (!s || !src_len || !src || s->frozen)
return fio_str_state(s);
fio_str_state_s state = fio_str_resize(s, src_len + fio_str_len(s));
memcpy(state.data + (state.len - src_len), src, src_len);
return state;
}
/**
* Appens the `src` String to the end of the `dest` String.
*/
inline FIO_FUNC fio_str_state_s fio_str_concat(fio_str_s *dest,
fio_str_s const *src) {
if (!dest || !src || dest->frozen)
return fio_str_state(dest);
fio_str_state_s src_state = fio_str_state(src);
if (!src_state.len)
return fio_str_state(dest);
fio_str_state_s state =
fio_str_resize(dest, src_state.len + fio_str_len(dest));
memcpy(state.data + state.len - src_state.len, src_state.data, src_state.len);
return state;
}
/**
* Replaces the data in the String - replacing `old_len` bytes starting at
* `start_pos`, with the data at `src` (`src_len` bytes long).
*
* Negative `start_pos` values are calculated backwards, `-1` == end of String.
*
* When `old_len` is zero, the function will insert the data at `start_pos`.
*
* If `src_len == 0` than `src` will be ignored and the data marked for
* replacement will be erased.
*/
inline FIO_FUNC fio_str_state_s fio_str_replace(fio_str_s *s,
intptr_t start_pos,
size_t old_len, const void *src,
size_t src_len) {
fio_str_state_s state = fio_str_state(s);
if (!s || s->frozen || (!old_len && !src_len))
return state;
if (start_pos < 0) {
/* backwards position indexing */
start_pos += s->len + 1;
if (start_pos < 0)
start_pos = 0;
}
if (start_pos + old_len >= state.len) {
/* old_len overflows the end of the String */
if (s->small || !s->data) {
s->small = 1 | ((size_t)((start_pos << 1) & 0xFF));
} else {
s->len = start_pos;
}
return fio_str_write(s, src, src_len);
}
/* data replacement is now always in the middle (or start) of the String */
const size_t new_size = state.len + (src_len - old_len);
if (old_len != src_len) {
/* there's an offset requiring an adjustment */
if (old_len < src_len) {
/* make room for new data */
const size_t offset = src_len - old_len;
state = fio_str_resize(s, state.len + offset);
}
memmove(state.data + start_pos + src_len, state.data + start_pos + old_len,
(state.len - start_pos) - old_len);
}
if (src_len) {
memcpy(state.data + start_pos, src, src_len);
}
return fio_str_resize(s, new_size);
}
/** Writes to the String using a vprintf like interface. */
FIO_FUNC __attribute__((format(printf, 2, 0))) fio_str_state_s
fio_str_vprintf(fio_str_s *s, const char *format, va_list argv) {
va_list argv_cpy;
va_copy(argv_cpy, argv);
int len = vsnprintf(NULL, 0, format, argv_cpy);
va_end(argv_cpy);
if (len <= 0)
return fio_str_state(s);
fio_str_state_s state = fio_str_resize(s, len + fio_str_len(s));
vsnprintf(state.data + (state.len - len), len + 1, format, argv);
return state;
}
/** Writes to the String using a printf like interface. */
FIO_FUNC __attribute__((format(printf, 2, 3))) fio_str_state_s
fio_str_printf(fio_str_s *s, const char *format, ...) {
va_list argv;
va_start(argv, format);
fio_str_state_s state = fio_str_vprintf(s, format, argv);
va_end(argv);
return state;
}
/**
* Opens the file `filename` and pastes it's contents (or a slice ot it) at the
* end of the String. If `limit == 0`, than the data will be read until EOF.
*
* If the file can't be located, opened or read, or if `start_at` is beyond
* the EOF position, NULL is returned in the state's `data` field.
*/
inline FIO_FUNC fio_str_state_s fio_str_fread(fio_str_s *s,
const char *filename,
intptr_t start_at,
intptr_t limit) {
fio_str_state_s state = {.data = NULL};
#if defined(__unix__) || defined(__linux__) || defined(__APPLE__)
/* POSIX implementations. */
if (filename == NULL)
return state;
struct stat f_data;
int file = -1;
char *path = NULL;
size_t path_len = 0;
if (filename[0] == '~' && (filename[1] == '/' || filename[1] == '\\')) {
char *home = getenv("HOME");
if (home) {
size_t filename_len = strlen(filename);
size_t home_len = strlen(home);
if ((home_len + filename_len) >= (1 << 16)) {
/* too long */
return state;
}
if (home[home_len - 1] == '/' || home[home_len - 1] == '\\')
--home_len;
path_len = home_len + filename_len - 1;
path = malloc(path_len + 1);
FIO_ASSERT_ALLOC(path);
memcpy(path, home, home_len);
memcpy(path + home_len, filename + 1, filename_len);
path[path_len] = 0;
filename = path;
}
}
if (stat(filename, &f_data)) {
goto finish;
}
if (f_data.st_size <= 0 || start_at >= f_data.st_size) {
state = fio_str_state(s);
goto finish;
}
file = open(filename, O_RDONLY);
if (-1 == file)
goto finish;
if (start_at < 0) {
start_at = f_data.st_size + start_at;
if (start_at < 0)
start_at = 0;
}
if (limit <= 0 || f_data.st_size < (limit + start_at))
limit = f_data.st_size - start_at;
const size_t org_len = fio_str_len(s);
state = fio_str_resize(s, org_len + limit);
if (pread(file, state.data + org_len, limit, start_at) != (ssize_t)limit) {
close(file);
fio_str_resize(s, org_len);
state.data = NULL;
state.len = state.capa = 0;
goto finish;
}
close(file);
finish:
free(path);
return state;
#else
/* TODO: consider adding non POSIX implementations. */
return state;
#endif
}
/**
* Prevents further manipulations to the String's content.
*/
inline FIO_FUNC void fio_str_freeze(fio_str_s *s) {
if (!s)
return;
s->frozen = 1;
}
/**
* Binary comparison returns `1` if both strings are equal and `0` if not.
*/
inline FIO_FUNC int fio_str_iseq(const fio_str_s *str1, const fio_str_s *str2) {
if (str1 == str2)
return 1;
if (!str1 || !str2)
return 0;
fio_str_state_s s1 = fio_str_state(str1);
fio_str_state_s s2 = fio_str_state(str2);
return (s1.len == s2.len && !memcmp(s1.data, s2.data, s1.len));
}
/* *****************************************************************************
Testing
***************************************************************************** */
#if DEBUG
#include <stdio.h>
#define TEST_ASSERT(cond, ...) \
if (!(cond)) { \
fprintf(stderr, "* " __VA_ARGS__); \
fprintf(stderr, "\n !!! Testing failed !!!\n"); \
exit(-1); \
}
/**
* Removes any FIO_ARY_TYPE_INVALID *pointers* from an Array, keeping all other
* data in the array.
*
* This action is O(n) where n in the length of the array.
* It could get expensive.
*/
FIO_FUNC inline void fio_str_test(void) {
fprintf(stderr, "=== Testing Core String features (fio_str.h)\n");
fprintf(stderr, "* String container size: %zu\n", sizeof(fio_str_s));
fprintf(stderr,
"* Self-Contained String Capacity (FIO_STR_SMALL_CAPA): %zu\n",
FIO_STR_SMALL_CAPA);
fio_str_s str = {.small = 0}; /* test zeroed out memory */
TEST_ASSERT(fio_str_capa(&str) == FIO_STR_SMALL_CAPA - 1,
"Small String capacity reporting error!");
TEST_ASSERT(fio_str_len(&str) == 0, "Small String length reporting error!");
TEST_ASSERT(fio_str_data(&str) ==
(char *)((uintptr_t)(&str + 1) - FIO_STR_SMALL_CAPA),
"Small String pointer reporting error!");
fio_str_write(&str, "World", 4);
TEST_ASSERT(str.small,
"Small String writing error - not small on small write!");
TEST_ASSERT(fio_str_capa(&str) == FIO_STR_SMALL_CAPA - 1,
"Small String capacity reporting error after write!");
TEST_ASSERT(fio_str_len(&str) == 4,
"Small String length reporting error after write!");
TEST_ASSERT(fio_str_data(&str) ==
(char *)((uintptr_t)(&str + 1) - FIO_STR_SMALL_CAPA),
"Small String pointer reporting error after write!");
TEST_ASSERT(strlen(fio_str_data(&str)) == 4,
"Small String NUL missing after write (%zu)!",
strlen(fio_str_data(&str)));
TEST_ASSERT(!strcmp(fio_str_data(&str), "Worl"),
"Small String write error (%s)!", fio_str_data(&str));
fio_str_capa_assert(&str, sizeof(fio_str_s) - 1);
TEST_ASSERT(!str.small,
"Long String reporting as small after capacity update!");
TEST_ASSERT(fio_str_capa(&str) == sizeof(fio_str_s) - 1,
"Long String capacity update error (%zu != %zu)!",
fio_str_capa(&str), sizeof(fio_str_s));
TEST_ASSERT(
fio_str_len(&str) == 4,
"Long String length changed during conversion from small string (%zu)!",
fio_str_len(&str));
TEST_ASSERT(fio_str_data(&str) == str.data,
"Long String pointer reporting error after capacity update!");
TEST_ASSERT(strlen(fio_str_data(&str)) == 4,
"Long String NUL missing after capacity update (%zu)!",
strlen(fio_str_data(&str)));
TEST_ASSERT(!strcmp(fio_str_data(&str), "Worl"),
"Long String value changed after capacity update (%s)!",
fio_str_data(&str));
fio_str_write(&str, "d!", 2);
TEST_ASSERT(!strcmp(fio_str_data(&str), "World!"),
"Long String `write` error (%s)!", fio_str_data(&str));
fio_str_replace(&str, 0, 0, "Hello ", 6);
TEST_ASSERT(!strcmp(fio_str_data(&str), "Hello World!"),
"Long String `insert` error (%s)!", fio_str_data(&str));
fio_str_resize(&str, 6);
TEST_ASSERT(!strcmp(fio_str_data(&str), "Hello "),
"Long String `resize` clipping error (%s)!", fio_str_data(&str));
fio_str_replace(&str, 6, 0, "My World!", 9);
TEST_ASSERT(!strcmp(fio_str_data(&str), "Hello My World!"),
"Long String `replace` error when testing overflow (%s)!",
fio_str_data(&str));
str.capa = str.len;
fio_str_replace(&str, -10, 2, "Big", 3);
TEST_ASSERT(!strcmp(fio_str_data(&str), "Hello Big World!"),
"Long String `replace` error when testing splicing (%s)!",
fio_str_data(&str));
TEST_ASSERT(
fio_str_capa(&str) == ROUND_UP_CAPA_2WORDS(strlen("Hello Big World!")),
"Long String `fio_str_replace` capacity update error (%zu != %zu)!",
fio_str_capa(&str), ROUND_UP_CAPA_2WORDS(strlen("Hello Big World!")));
if (str.len < FIO_STR_SMALL_CAPA) {
fio_str_compact(&str);
TEST_ASSERT(str.small, "Compacting didn't change String to small!");
TEST_ASSERT(fio_str_len(&str) == strlen("Hello Big World!"),
"Compacting altered String length! (%zu != %zu)!",
fio_str_len(&str), strlen("Hello Big World!"));
TEST_ASSERT(!strcmp(fio_str_data(&str), "Hello Big World!"),
"Compact data error (%s)!", fio_str_data(&str));
TEST_ASSERT(fio_str_capa(&str) == FIO_STR_SMALL_CAPA - 1,
"Compacted String capacity reporting error!");
} else {
fprintf(stderr, "* skipped `compact` test!\n");
}
{
fio_str_freeze(&str);
fio_str_state_s old_state = fio_str_state(&str);
fio_str_write(&str, "more data to be written here", 28);
fio_str_replace(&str, 2, 1, "more data to be written here", 28);
fio_str_state_s new_state = fio_str_state(&str);
TEST_ASSERT(old_state.len == new_state.len,
"Frozen String length changed!");
TEST_ASSERT(old_state.data == new_state.data,
"Frozen String pointer changed!");
TEST_ASSERT(
old_state.capa == new_state.capa,
"Frozen String capacity changed (allowed, but shouldn't happen)!");
str.frozen = 0;
}
fio_str_printf(&str, " %u", 42);
TEST_ASSERT(!strcmp(fio_str_data(&str), "Hello Big World! 42"),
"`fio_str_printf` data error (%s)!", fio_str_data(&str));
{
fio_str_s str2 = FIO_STR_INIT;
fio_str_concat(&str2, &str);
TEST_ASSERT(fio_str_iseq(&str, &str2),
"`fio_str_concat` error, strings not equal (%s != %s)!",
fio_str_data(&str), fio_str_data(&str2));
fio_str_write(&str2, ":extra data", 11);
TEST_ASSERT(
!fio_str_iseq(&str, &str2),
"`fio_str_write` error after copy, strings equal ((%zu)%s == (%zu)%s)!",
fio_str_len(&str), fio_str_data(&str), fio_str_len(&str2),
fio_str_data(&str2));
fio_str_free(&str2);
}
fio_str_free(&str);
{
fio_str_state_s state = fio_str_fread(&str, __FILE__, 0, 0);
TEST_ASSERT(state.data,
"`fio_str_fread` error, no data was read for file %s!",
__FILE__);
TEST_ASSERT(!memcmp(state.data, "#ifndef H_FIO_STRING_H", 22),
"`fio_str_fread` content error, header mismatch!\n %s",
state.data);
TEST_ASSERT(
fio_str_utf8_valid(&str),
"`fio_str_utf8_valid` error, code in this file should be valid!");
TEST_ASSERT(fio_str_utf8_len(&str) &&
(fio_str_utf8_len(&str) <= fio_str_len(&str)) &&
(fio_str_utf8_len(&str) >= (fio_str_len(&str)) >> 1),
"`fio_str_utf8_len` error, invalid value (%zu / %zu!",
fio_str_utf8_len(&str), fio_str_len(&str));
{
/* String content == whole file (this file) */
intptr_t pos = -11;
size_t len = 20;
TEST_ASSERT(
fio_str_utf8_select(&str, &pos, &len) == 0,
"`fio_str_utf8_select` returned error for negative pos! (%zd, %zu)",
(ssize_t)pos, len);
TEST_ASSERT(
pos == (intptr_t)state.len - 10, /* no UTF-8 bytes in this file */
"`fio_str_utf8_select` error, negative position invalid! (%zd)",
(ssize_t)pos);
TEST_ASSERT(
len == 10,
"`fio_str_utf8_select` error, trancated length invalid! (%zd)",
(ssize_t)len);
pos = 10;
len = 20;
TEST_ASSERT(fio_str_utf8_select(&str, &pos, &len) == 0,
"`fio_str_utf8_select` returned error! (%zd, %zu)",
(ssize_t)pos, len);
TEST_ASSERT(pos == 10,
"`fio_str_utf8_select` error, position invalid! (%zd)",
(ssize_t)pos);
TEST_ASSERT(len == 20,
"`fio_str_utf8_select` error, length invalid! (%zd)",
(ssize_t)len);
}
}
fio_str_free(&str);
{
const char *utf8_sample = /* three hearts, small-big-small*/
"\xf0\x9f\x92\x95\xe2\x9d\xa4\xef\xb8\x8f\xf0\x9f\x92\x95";
fio_str_write(&str, utf8_sample, strlen(utf8_sample));
intptr_t pos = -2;
size_t len = 2;
TEST_ASSERT(fio_str_utf8_select(&str, &pos, &len) == 0,
"`fio_str_utf8_select` returned error for negative pos on "
"UTF-8 data! (%zd, %zu)",
(ssize_t)pos, len);
TEST_ASSERT(pos == (intptr_t)fio_str_len(&str) - 4, /* 4 byte emoji */
"`fio_str_utf8_select` error, negative position invalid on "
"UTF-8 data! (%zd)",
(ssize_t)pos);
TEST_ASSERT(len == 4, /* last utf-8 char is 4 byte long */
"`fio_str_utf8_select` error, trancated length invalid on "
"UTF-8 data! (%zd)",
(ssize_t)len);
pos = 1;
len = 20;
TEST_ASSERT(
fio_str_utf8_select(&str, &pos, &len) == 0,
"`fio_str_utf8_select` returned error on UTF-8 data! (%zd, %zu)",
(ssize_t)pos, len);
TEST_ASSERT(
pos == 4,
"`fio_str_utf8_select` error, position invalid on UTF-8 data! (%zd)",
(ssize_t)pos);
TEST_ASSERT(
len == 10,
"`fio_str_utf8_select` error, length invalid on UTF-8 data! (%zd)",
(ssize_t)len);
pos = 1;
len = 3;
TEST_ASSERT(
fio_str_utf8_select(&str, &pos, &len) == 0,
"`fio_str_utf8_select` returned error on UTF-8 data (2)! (%zd, %zu)",
(ssize_t)pos, len);
TEST_ASSERT(
len == 10, /* 3 UTF-8 chars: 4 byte + 4 byte + 2 byte codes == 10 */
"`fio_str_utf8_select` error, length invalid on UTF-8 data! (%zd)",
(ssize_t)len);
}
fio_str_free(&str);
fprintf(stderr, "* passed.\n");
}
#undef TEST_ASSERT
#else
#define fio_str_test()
#endif
/* *****************************************************************************
Done
***************************************************************************** */
#undef FIO_FUNC
#undef FIO_ASSERT_ALLOC
#undef ROUND_UP_CAPA_2WORDS
#endif