| /*- |
| * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> |
| * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| #include <sys/cdefs.h> |
| __FBSDID("$FreeBSD: releng/11.1/usr.bin/sort/coll.c 318153 2017-05-10 20:29:33Z marius $"); |
| |
| #include <sys/types.h> |
| #include <errno.h> |
| #include <err.h> |
| #ifdef HAVE_LANGINFO |
| #include <langinfo.h> |
| #endif |
| #include <limits.h> |
| #include <math.h> |
| #include "md5.h" |
| #include <stdlib.h> |
| #include <string.h> |
| #include <wchar.h> |
| #include <wctype.h> |
| |
| #include "coll.h" |
| #include "vsort.h" |
| |
| #if defined __ANDROID__ && !defined iswblank |
| #define iswblank(c) ((c) == L' ' || (c) == L' ') |
| #endif |
| |
| struct key_specs *keys; |
| size_t keys_num = 0; |
| |
| wint_t symbol_decimal_point = L'.'; |
| /* there is no default thousands separator in collate rules: */ |
| wint_t symbol_thousands_sep = 0; |
| wint_t symbol_negative_sign = L'-'; |
| wint_t symbol_positive_sign = L'+'; |
| |
| static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); |
| static int gnumcoll(struct key_value*, struct key_value *, size_t offset); |
| static int monthcoll(struct key_value*, struct key_value *, size_t offset); |
| static int numcoll(struct key_value*, struct key_value *, size_t offset); |
| static int hnumcoll(struct key_value*, struct key_value *, size_t offset); |
| static int randomcoll(struct key_value*, struct key_value *, size_t offset); |
| static int versioncoll(struct key_value*, struct key_value *, size_t offset); |
| |
| #ifdef __ANDROID__ |
| static unsigned long long int wcstoull(const wchar_t *ws, wchar_t **end_p, int base) { |
| //assert(!end_p); |
| size_t buffer_size = wcslen(ws) * 4 + 1; |
| char buffer[buffer_size]; |
| size_t len = wcstombs(buffer, ws, buffer_size); |
| if(len == (size_t)-1) return 0; |
| if(len == buffer_size) buffer[len - 1] = 0; |
| return strtoull(buffer, NULL, base); |
| } |
| #endif |
| |
| /* |
| * Allocate keys array |
| */ |
| struct keys_array * |
| keys_array_alloc(void) |
| { |
| struct keys_array *ka; |
| size_t sz; |
| |
| sz = keys_array_size(); |
| ka = sort_malloc(sz); |
| memset(ka, 0, sz); |
| |
| return (ka); |
| } |
| |
| /* |
| * Calculate whether we need key hint space |
| */ |
| static size_t |
| key_hint_size(void) |
| { |
| |
| return (need_hint ? sizeof(struct key_hint) : 0); |
| } |
| |
| /* |
| * Calculate keys array size |
| */ |
| size_t |
| keys_array_size(void) |
| { |
| |
| return (keys_num * (sizeof(struct key_value) + key_hint_size())); |
| } |
| |
| /* |
| * Clean data of keys array |
| */ |
| void |
| clean_keys_array(const struct bwstring *s, struct keys_array *ka) |
| { |
| |
| if (ka) { |
| for (size_t i = 0; i < keys_num; ++i) { |
| const struct key_value *kv; |
| |
| kv = get_key_from_keys_array(ka, i); |
| if (kv->k && kv->k != s) |
| bwsfree(kv->k); |
| } |
| memset(ka, 0, keys_array_size()); |
| } |
| } |
| |
| /* |
| * Get pointer to a key value in the keys set |
| */ |
| struct key_value * |
| get_key_from_keys_array(struct keys_array *ka, size_t ind) |
| { |
| |
| return ((struct key_value *)((caddr_t)ka->key + |
| ind * (sizeof(struct key_value) + key_hint_size()))); |
| } |
| |
| /* |
| * Set value of a key in the keys set |
| */ |
| void |
| set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) |
| { |
| |
| if (ka && keys_num > ind) { |
| struct key_value *kv; |
| |
| kv = get_key_from_keys_array(ka, ind); |
| |
| if (kv->k && kv->k != s) |
| bwsfree(kv->k); |
| kv->k = s; |
| } |
| } |
| |
| /* |
| * Initialize a sort list item |
| */ |
| struct sort_list_item * |
| sort_list_item_alloc(void) |
| { |
| struct sort_list_item *si; |
| size_t sz; |
| |
| sz = sizeof(struct sort_list_item) + keys_array_size(); |
| si = sort_malloc(sz); |
| memset(si, 0, sz); |
| |
| return (si); |
| } |
| |
| size_t |
| sort_list_item_size(struct sort_list_item *si) |
| { |
| size_t ret = 0; |
| |
| if (si) { |
| ret = sizeof(struct sort_list_item) + keys_array_size(); |
| if (si->str) |
| ret += bws_memsize(si->str); |
| for (size_t i = 0; i < keys_num; ++i) { |
| const struct key_value *kv; |
| |
| kv = get_key_from_keys_array(&si->ka, i); |
| |
| if (kv->k != si->str) |
| ret += bws_memsize(kv->k); |
| } |
| } |
| return (ret); |
| } |
| |
| /* |
| * Calculate key for a sort list item |
| */ |
| static void |
| sort_list_item_make_key(struct sort_list_item *si) |
| { |
| |
| preproc(si->str, &(si->ka)); |
| } |
| |
| /* |
| * Set value of a sort list item. |
| * Return combined string and keys memory size. |
| */ |
| void |
| sort_list_item_set(struct sort_list_item *si, struct bwstring *str) |
| { |
| |
| if (si) { |
| clean_keys_array(si->str, &(si->ka)); |
| if (si->str) { |
| if (si->str == str) { |
| /* we are trying to reset the same string */ |
| return; |
| } else { |
| bwsfree(si->str); |
| si->str = NULL; |
| } |
| } |
| si->str = str; |
| sort_list_item_make_key(si); |
| } |
| } |
| |
| /* |
| * De-allocate a sort list item object memory |
| */ |
| void |
| sort_list_item_clean(struct sort_list_item *si) |
| { |
| |
| if (si) { |
| clean_keys_array(si->str, &(si->ka)); |
| if (si->str) { |
| bwsfree(si->str); |
| si->str = NULL; |
| } |
| } |
| } |
| |
| /* |
| * Skip columns according to specs |
| */ |
| static size_t |
| skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, |
| bool skip_blanks, bool *empty_key) |
| { |
| if (cols < 1) |
| return (BWSLEN(s) + 1); |
| |
| if (skip_blanks) |
| while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) |
| ++start; |
| |
| while (start < BWSLEN(s) && cols > 1) { |
| --cols; |
| ++start; |
| } |
| |
| if (start >= BWSLEN(s)) |
| *empty_key = true; |
| |
| return (start); |
| } |
| |
| /* |
| * Skip fields according to specs |
| */ |
| static size_t |
| skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) |
| { |
| |
| if (fields < 2) { |
| if (BWSLEN(s) == 0) |
| *empty_field = true; |
| return (0); |
| } else if (!(sort_opts_vals.tflag)) { |
| size_t cpos = 0; |
| bool pb = true; |
| |
| while (cpos < BWSLEN(s)) { |
| bool isblank; |
| |
| isblank = iswblank(BWS_GET(s, cpos)); |
| |
| if (isblank && !pb) { |
| --fields; |
| if (fields <= 1) |
| return (cpos); |
| } |
| pb = isblank; |
| ++cpos; |
| } |
| if (fields > 1) |
| *empty_field = true; |
| return (cpos); |
| } else { |
| size_t cpos = 0; |
| |
| while (cpos < BWSLEN(s)) { |
| if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { |
| --fields; |
| if (fields <= 1) |
| return (cpos + 1); |
| } |
| ++cpos; |
| } |
| if (fields > 1) |
| *empty_field = true; |
| return (cpos); |
| } |
| } |
| |
| /* |
| * Find fields start |
| */ |
| static void |
| find_field_start(const struct bwstring *s, struct key_specs *ks, |
| size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) |
| { |
| |
| *field_start = skip_fields_to_start(s, ks->f1, empty_field); |
| if (!*empty_field) |
| *key_start = skip_cols_to_start(s, ks->c1, *field_start, |
| ks->pos1b, empty_key); |
| else |
| *empty_key = true; |
| } |
| |
| /* |
| * Find end key position |
| */ |
| static size_t |
| find_field_end(const struct bwstring *s, struct key_specs *ks) |
| { |
| size_t f2, next_field_start, pos_end; |
| bool empty_field, empty_key; |
| |
| pos_end = 0; |
| next_field_start = 0; |
| empty_field = false; |
| empty_key = false; |
| f2 = ks->f2; |
| |
| if (f2 == 0) |
| return (BWSLEN(s) + 1); |
| else { |
| if (ks->c2 == 0) { |
| next_field_start = skip_fields_to_start(s, f2 + 1, |
| &empty_field); |
| if ((next_field_start > 0) && sort_opts_vals.tflag && |
| ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, |
| next_field_start - 1))) |
| --next_field_start; |
| } else |
| next_field_start = skip_fields_to_start(s, f2, |
| &empty_field); |
| } |
| |
| if (empty_field || (next_field_start >= BWSLEN(s))) |
| return (BWSLEN(s) + 1); |
| |
| if (ks->c2) { |
| pos_end = skip_cols_to_start(s, ks->c2, next_field_start, |
| ks->pos2b, &empty_key); |
| if (pos_end < BWSLEN(s)) |
| ++pos_end; |
| } else |
| pos_end = next_field_start; |
| |
| return (pos_end); |
| } |
| |
| /* |
| * Cut a field according to the key specs |
| */ |
| static struct bwstring * |
| cut_field(const struct bwstring *s, struct key_specs *ks) |
| { |
| struct bwstring *ret = NULL; |
| |
| if (s && ks) { |
| size_t field_start, key_end, key_start, sz; |
| bool empty_field, empty_key; |
| |
| field_start = 0; |
| key_start = 0; |
| empty_field = false; |
| empty_key = false; |
| |
| find_field_start(s, ks, &field_start, &key_start, |
| &empty_field, &empty_key); |
| |
| if (empty_key) |
| sz = 0; |
| else { |
| key_end = find_field_end(s, ks); |
| sz = (key_end < key_start) ? 0 : (key_end - key_start); |
| } |
| |
| ret = bwsalloc(sz); |
| if (sz) |
| bwsnocpy(ret, s, key_start, sz); |
| } else |
| ret = bwsalloc(0); |
| |
| return (ret); |
| } |
| |
| /* |
| * Preprocesses a line applying the necessary transformations |
| * specified by command line options and returns the preprocessed |
| * string, which can be used to compare. |
| */ |
| int |
| preproc(struct bwstring *s, struct keys_array *ka) |
| { |
| |
| if (sort_opts_vals.kflag) |
| for (size_t i = 0; i < keys_num; i++) { |
| struct bwstring *key; |
| struct key_specs *kspecs; |
| struct sort_mods *sm; |
| |
| kspecs = &(keys[i]); |
| key = cut_field(s, kspecs); |
| |
| sm = &(kspecs->sm); |
| if (sm->dflag) |
| key = dictionary_order(key); |
| else if (sm->iflag) |
| key = ignore_nonprinting(key); |
| if (sm->fflag || sm->Mflag) |
| key = ignore_case(key); |
| |
| set_key_on_keys_array(ka, key, i); |
| } |
| else { |
| struct bwstring *ret = NULL; |
| struct sort_mods *sm = default_sort_mods; |
| |
| if (sm->bflag) { |
| if (ret == NULL) |
| ret = bwsdup(s); |
| ret = ignore_leading_blanks(ret); |
| } |
| if (sm->dflag) { |
| if (ret == NULL) |
| ret = bwsdup(s); |
| ret = dictionary_order(ret); |
| } else if (sm->iflag) { |
| if (ret == NULL) |
| ret = bwsdup(s); |
| ret = ignore_nonprinting(ret); |
| } |
| if (sm->fflag || sm->Mflag) { |
| if (ret == NULL) |
| ret = bwsdup(s); |
| ret = ignore_case(ret); |
| } |
| if (ret == NULL) |
| set_key_on_keys_array(ka, s, 0); |
| else |
| set_key_on_keys_array(ka, ret, 0); |
| } |
| |
| return 0; |
| } |
| |
| cmpcoll_t |
| get_sort_func(struct sort_mods *sm) |
| { |
| |
| if (sm->nflag) |
| return (numcoll); |
| else if (sm->hflag) |
| return (hnumcoll); |
| else if (sm->gflag) |
| return (gnumcoll); |
| else if (sm->Mflag) |
| return (monthcoll); |
| else if (sm->Rflag) |
| return (randomcoll); |
| else if (sm->Vflag) |
| return (versioncoll); |
| else |
| return (wstrcoll); |
| } |
| |
| /* |
| * Compares the given strings. Returns a positive number if |
| * the first precedes the second, a negative number if the second is |
| * the preceding one, and zero if they are equal. This function calls |
| * the underlying collate functions, which done the actual comparison. |
| */ |
| int |
| key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) |
| { |
| struct key_value *kv1, *kv2; |
| struct sort_mods *sm; |
| int res = 0; |
| |
| for (size_t i = 0; i < keys_num; ++i) { |
| kv1 = get_key_from_keys_array(ps1, i); |
| kv2 = get_key_from_keys_array(ps2, i); |
| sm = &(keys[i].sm); |
| |
| if (sm->rflag) |
| res = sm->func(kv2, kv1, offset); |
| else |
| res = sm->func(kv1, kv2, offset); |
| |
| if (res) |
| break; |
| |
| /* offset applies to only the first key */ |
| offset = 0; |
| } |
| return (res); |
| } |
| |
| /* |
| * Compare two strings. |
| * Plain symbol-by-symbol comparison. |
| */ |
| int |
| top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) |
| { |
| |
| if (default_sort_mods->rflag) { |
| const struct bwstring *tmp; |
| |
| tmp = s1; |
| s1 = s2; |
| s2 = tmp; |
| } |
| |
| return (bwscoll(s1, s2, 0)); |
| } |
| |
| /* |
| * Compare a string and a sort list item, according to the sort specs. |
| */ |
| int |
| str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) |
| { |
| struct keys_array *ka1; |
| int ret = 0; |
| |
| ka1 = keys_array_alloc(); |
| |
| preproc(str1, ka1); |
| |
| sort_list_item_make_key(*ss2); |
| |
| if (debug_sort) { |
| bwsprintf(stdout, str1, "; s1=<", ">"); |
| bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); |
| } |
| |
| ret = key_coll(ka1, &((*ss2)->ka), 0); |
| |
| if (debug_sort) |
| printf("; cmp1=%d", ret); |
| |
| clean_keys_array(str1, ka1); |
| sort_free(ka1); |
| |
| if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { |
| ret = top_level_str_coll(str1, ((*ss2)->str)); |
| if (debug_sort) |
| printf("; cmp2=%d", ret); |
| } |
| |
| if (debug_sort) |
| printf("\n"); |
| |
| return (ret); |
| } |
| |
| /* |
| * Compare two sort list items, according to the sort specs. |
| */ |
| int |
| list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, |
| size_t offset) |
| { |
| int ret; |
| |
| ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); |
| |
| if (debug_sort) { |
| if (offset) |
| printf("; offset=%d", (int) offset); |
| bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); |
| bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); |
| printf("; cmp1=%d\n", ret); |
| } |
| |
| if (ret) |
| return (ret); |
| |
| if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { |
| ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); |
| if (debug_sort) |
| printf("; cmp2=%d\n", ret); |
| } |
| |
| return (ret); |
| } |
| |
| /* |
| * Compare two sort list items, according to the sort specs. |
| */ |
| int |
| list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) |
| { |
| |
| return (list_coll_offset(ss1, ss2, 0)); |
| } |
| |
| #define LSCDEF(N) \ |
| static int \ |
| list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ |
| { \ |
| \ |
| return (list_coll_offset(ss1, ss2, N)); \ |
| } |
| |
| LSCDEF(1) |
| LSCDEF(2) |
| LSCDEF(3) |
| LSCDEF(4) |
| LSCDEF(5) |
| LSCDEF(6) |
| LSCDEF(7) |
| LSCDEF(8) |
| LSCDEF(9) |
| LSCDEF(10) |
| LSCDEF(11) |
| LSCDEF(12) |
| LSCDEF(13) |
| LSCDEF(14) |
| LSCDEF(15) |
| LSCDEF(16) |
| LSCDEF(17) |
| LSCDEF(18) |
| LSCDEF(19) |
| LSCDEF(20) |
| |
| listcoll_t |
| get_list_call_func(size_t offset) |
| { |
| static const listcoll_t lsarray[] = { list_coll, list_coll_1, |
| list_coll_2, list_coll_3, list_coll_4, list_coll_5, |
| list_coll_6, list_coll_7, list_coll_8, list_coll_9, |
| list_coll_10, list_coll_11, list_coll_12, list_coll_13, |
| list_coll_14, list_coll_15, list_coll_16, list_coll_17, |
| list_coll_18, list_coll_19, list_coll_20 }; |
| |
| if (offset <= 20) |
| return (lsarray[offset]); |
| |
| return (list_coll); |
| } |
| |
| /* |
| * Compare two sort list items, only by their original string. |
| */ |
| int |
| list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) |
| { |
| |
| return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); |
| } |
| |
| /* |
| * Maximum size of a number in the string (before or after decimal point) |
| */ |
| #define MAX_NUM_SIZE (128) |
| |
| /* |
| * Set suffix value |
| */ |
| static void setsuffix(wchar_t c, unsigned char *si) |
| { |
| switch (c){ |
| case L'k': |
| case L'K': |
| *si = 1; |
| break; |
| case L'M': |
| *si = 2; |
| break; |
| case L'G': |
| *si = 3; |
| break; |
| case L'T': |
| *si = 4; |
| break; |
| case L'P': |
| *si = 5; |
| break; |
| case L'E': |
| *si = 6; |
| break; |
| case L'Z': |
| *si = 7; |
| break; |
| case L'Y': |
| *si = 8; |
| break; |
| default: |
| *si = 0; |
| } |
| } |
| |
| /* |
| * Read string s and parse the string into a fixed-decimal-point number. |
| * sign equals -1 if the number is negative (explicit plus is not allowed, |
| * according to GNU sort's "info sort". |
| * The number part before decimal point is in the smain, after the decimal |
| * point is in sfrac, tail is the pointer to the remainder of the string. |
| */ |
| static int |
| read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) |
| { |
| bwstring_iterator s; |
| |
| s = bws_begin(s0); |
| |
| /* always end the fraction with zero, even if we have no fraction */ |
| sfrac[0] = 0; |
| |
| while (iswblank(bws_get_iter_value(s))) |
| s = bws_iterator_inc(s, 1); |
| |
| if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { |
| *sign = -1; |
| s = bws_iterator_inc(s, 1); |
| } |
| |
| // This is '0', not '\0', do not change this |
| while (iswdigit(bws_get_iter_value(s)) && |
| (bws_get_iter_value(s) == L'0')) |
| s = bws_iterator_inc(s, 1); |
| |
| while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { |
| if (iswdigit(bws_get_iter_value(s))) { |
| smain[*main_len] = bws_get_iter_value(s); |
| s = bws_iterator_inc(s, 1); |
| *main_len += 1; |
| } else if (symbol_thousands_sep && |
| (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) |
| s = bws_iterator_inc(s, 1); |
| else |
| break; |
| } |
| |
| smain[*main_len] = 0; |
| |
| if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { |
| s = bws_iterator_inc(s, 1); |
| while (iswdigit(bws_get_iter_value(s)) && |
| *frac_len < MAX_NUM_SIZE) { |
| sfrac[*frac_len] = bws_get_iter_value(s); |
| s = bws_iterator_inc(s, 1); |
| *frac_len += 1; |
| } |
| sfrac[*frac_len] = 0; |
| |
| while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { |
| --(*frac_len); |
| sfrac[*frac_len] = L'\0'; |
| } |
| } |
| |
| setsuffix(bws_get_iter_value(s),si); |
| |
| if ((*main_len + *frac_len) == 0) |
| *sign = 0; |
| |
| return (0); |
| } |
| |
| /* |
| * Implements string sort. |
| */ |
| static int |
| wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) |
| { |
| |
| if (debug_sort) { |
| if (offset) |
| printf("; offset=%d\n", (int) offset); |
| bwsprintf(stdout, kv1->k, "; k1=<", ">"); |
| printf("(%zu)", BWSLEN(kv1->k)); |
| bwsprintf(stdout, kv2->k, ", k2=<", ">"); |
| printf("(%zu)", BWSLEN(kv2->k)); |
| } |
| |
| return (bwscoll(kv1->k, kv2->k, offset)); |
| } |
| |
| /* |
| * Compare two suffixes |
| */ |
| static inline int |
| cmpsuffix(unsigned char si1, unsigned char si2) |
| { |
| |
| return ((char)si1 - (char)si2); |
| } |
| |
| /* |
| * Implements numeric sort for -n and -h. |
| */ |
| static int |
| numcoll_impl(struct key_value *kv1, struct key_value *kv2, |
| size_t offset __attribute__((unused)), bool use_suffix) |
| { |
| struct bwstring *s1, *s2; |
| wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; |
| wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; |
| int cmp_res, sign1, sign2; |
| size_t frac1, frac2, main1, main2; |
| unsigned char SI1, SI2; |
| bool e1, e2, key1_read, key2_read; |
| |
| s1 = kv1->k; |
| s2 = kv2->k; |
| sign1 = sign2 = 0; |
| main1 = main2 = 0; |
| frac1 = frac2 = 0; |
| |
| cmp_res = 0; |
| key1_read = key2_read = false; |
| |
| if (debug_sort) { |
| bwsprintf(stdout, s1, "; k1=<", ">"); |
| bwsprintf(stdout, s2, ", k2=<", ">"); |
| } |
| |
| if (s1 == s2) |
| return (0); |
| |
| if (kv1->hint->status == HS_UNINITIALIZED) { |
| /* read the number from the string */ |
| read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); |
| key1_read = true; |
| kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); |
| if(main1 < 1 && frac1 < 1) |
| kv1->hint->v.nh.empty=true; |
| kv1->hint->v.nh.si = SI1; |
| kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? |
| HS_INITIALIZED : HS_ERROR; |
| kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; |
| } |
| |
| if (kv2->hint->status == HS_UNINITIALIZED) { |
| /* read the number from the string */ |
| read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); |
| key2_read = true; |
| kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); |
| if(main2 < 1 && frac2 < 1) |
| kv2->hint->v.nh.empty=true; |
| kv2->hint->v.nh.si = SI2; |
| kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? |
| HS_INITIALIZED : HS_ERROR; |
| kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; |
| } |
| |
| if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == |
| HS_INITIALIZED) { |
| unsigned long long n1, n2; |
| bool neg1, neg2; |
| |
| e1 = kv1->hint->v.nh.empty; |
| e2 = kv2->hint->v.nh.empty; |
| |
| if (e1 && e2) |
| return (0); |
| |
| neg1 = kv1->hint->v.nh.neg; |
| neg2 = kv2->hint->v.nh.neg; |
| |
| if (neg1 && !neg2) |
| return (-1); |
| if (neg2 && !neg1) |
| return (+1); |
| |
| if (e1) |
| return (neg2 ? +1 : -1); |
| else if (e2) |
| return (neg1 ? -1 : +1); |
| |
| |
| if (use_suffix) { |
| cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); |
| if (cmp_res) |
| return (neg1 ? -cmp_res : cmp_res); |
| } |
| |
| n1 = kv1->hint->v.nh.n1; |
| n2 = kv2->hint->v.nh.n1; |
| if (n1 < n2) |
| return (neg1 ? +1 : -1); |
| else if (n1 > n2) |
| return (neg1 ? -1 : +1); |
| } |
| |
| /* read the numbers from the strings */ |
| if (!key1_read) |
| read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); |
| if (!key2_read) |
| read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); |
| |
| e1 = ((main1 + frac1) == 0); |
| e2 = ((main2 + frac2) == 0); |
| |
| if (e1 && e2) |
| return (0); |
| |
| /* we know the result if the signs are different */ |
| if (sign1 < 0 && sign2 >= 0) |
| return (-1); |
| if (sign1 >= 0 && sign2 < 0) |
| return (+1); |
| |
| if (e1) |
| return ((sign2 < 0) ? +1 : -1); |
| else if (e2) |
| return ((sign1 < 0) ? -1 : +1); |
| |
| if (use_suffix) { |
| cmp_res = cmpsuffix(SI1, SI2); |
| if (cmp_res) |
| return ((sign1 < 0) ? -cmp_res : cmp_res); |
| } |
| |
| /* if both numbers are empty assume that the strings are equal */ |
| if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) |
| return (0); |
| |
| /* |
| * if the main part is of different size, we know the result |
| * (because the leading zeros are removed) |
| */ |
| if (main1 < main2) |
| cmp_res = -1; |
| else if (main1 > main2) |
| cmp_res = +1; |
| /* if the sizes are equal then simple non-collate string compare gives the correct result */ |
| else |
| cmp_res = wcscmp(smain1, smain2); |
| |
| /* check fraction */ |
| if (!cmp_res) |
| cmp_res = wcscmp(sfrac1, sfrac2); |
| |
| if (!cmp_res) |
| return (0); |
| |
| /* reverse result if the signs are negative */ |
| if (sign1 < 0 && sign2 < 0) |
| cmp_res = -cmp_res; |
| |
| return (cmp_res); |
| } |
| |
| /* |
| * Implements numeric sort (-n). |
| */ |
| static int |
| numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) |
| { |
| |
| return (numcoll_impl(kv1, kv2, offset, false)); |
| } |
| |
| /* |
| * Implements 'human' numeric sort (-h). |
| */ |
| static int |
| hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) |
| { |
| |
| return (numcoll_impl(kv1, kv2, offset, true)); |
| } |
| |
| /* |
| * Implements random sort (-R). |
| */ |
| static int |
| randomcoll(struct key_value *kv1, struct key_value *kv2, |
| size_t offset __attribute__((unused))) |
| { |
| struct bwstring *s1, *s2; |
| MD5_CTX ctx1, ctx2; |
| unsigned char b1[16], b2[16]; |
| |
| s1 = kv1->k; |
| s2 = kv2->k; |
| |
| if (debug_sort) { |
| bwsprintf(stdout, s1, "; k1=<", ">"); |
| bwsprintf(stdout, s2, ", k2=<", ">"); |
| } |
| |
| if (s1 == s2) |
| return (0); |
| |
| memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); |
| memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); |
| |
| MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); |
| MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); |
| MD5Final(b1, &ctx1); |
| MD5Final(b2, &ctx2); |
| |
| int cmp_res = memcmp(b1, b2, 16); |
| if(!cmp_res) cmp_res = bwscoll(s1, s2, 0); |
| return cmp_res; |
| } |
| |
| /* |
| * Implements version sort (-V). |
| */ |
| static int |
| versioncoll(struct key_value *kv1, struct key_value *kv2, |
| size_t offset __attribute__((unused))) |
| { |
| struct bwstring *s1, *s2; |
| |
| s1 = kv1->k; |
| s2 = kv2->k; |
| |
| if (debug_sort) { |
| bwsprintf(stdout, s1, "; k1=<", ">"); |
| bwsprintf(stdout, s2, ", k2=<", ">"); |
| } |
| |
| if (s1 == s2) |
| return (0); |
| |
| return (vcmp(s1, s2)); |
| } |
| |
| /* |
| * Check for minus infinity |
| */ |
| static inline bool |
| huge_minus(double d, int err1) |
| { |
| |
| if (err1 == ERANGE) |
| if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) |
| return (+1); |
| |
| return (0); |
| } |
| |
| /* |
| * Check for plus infinity |
| */ |
| static inline bool |
| huge_plus(double d, int err1) |
| { |
| |
| if (err1 == ERANGE) |
| if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) |
| return (+1); |
| |
| return (0); |
| } |
| |
| /* |
| * Check whether a function is a NAN |
| */ |
| static bool |
| is_nan(double d) |
| { |
| |
| return ((d == NAN) || (isnan(d))); |
| } |
| |
| /* |
| * Compare two NANs |
| */ |
| static int |
| cmp_nans(double d1, double d2) |
| { |
| |
| if (d1 < d2) |
| return (-1); |
| if (d1 > d2) |
| return (+1); |
| return (0); |
| } |
| |
| /* |
| * Implements general numeric sort (-g). |
| */ |
| static int |
| gnumcoll(struct key_value *kv1, struct key_value *kv2, |
| size_t offset __attribute__((unused))) |
| { |
| double d1, d2; |
| int err1, err2; |
| bool empty1, empty2, key1_read, key2_read; |
| |
| d1 = d2 = 0; |
| err1 = err2 = 0; |
| key1_read = key2_read = false; |
| |
| if (debug_sort) { |
| bwsprintf(stdout, kv1->k, "; k1=<", ">"); |
| bwsprintf(stdout, kv2->k, "; k2=<", ">"); |
| } |
| |
| if (kv1->hint->status == HS_UNINITIALIZED) { |
| errno = 0; |
| d1 = bwstod(kv1->k, &empty1); |
| err1 = errno; |
| |
| if (empty1) |
| kv1->hint->v.gh.notnum = true; |
| else if (err1 == 0) { |
| kv1->hint->v.gh.d = d1; |
| kv1->hint->v.gh.nan = is_nan(d1); |
| kv1->hint->status = HS_INITIALIZED; |
| } else |
| kv1->hint->status = HS_ERROR; |
| |
| key1_read = true; |
| } |
| |
| if (kv2->hint->status == HS_UNINITIALIZED) { |
| errno = 0; |
| d2 = bwstod(kv2->k, &empty2); |
| err2 = errno; |
| |
| if (empty2) |
| kv2->hint->v.gh.notnum = true; |
| else if (err2 == 0) { |
| kv2->hint->v.gh.d = d2; |
| kv2->hint->v.gh.nan = is_nan(d2); |
| kv2->hint->status = HS_INITIALIZED; |
| } else |
| kv2->hint->status = HS_ERROR; |
| |
| key2_read = true; |
| } |
| |
| if (kv1->hint->status == HS_INITIALIZED && |
| kv2->hint->status == HS_INITIALIZED) { |
| if (kv1->hint->v.gh.notnum) |
| return ((kv2->hint->v.gh.notnum) ? 0 : -1); |
| else if (kv2->hint->v.gh.notnum) |
| return (+1); |
| |
| if (kv1->hint->v.gh.nan) |
| return ((kv2->hint->v.gh.nan) ? |
| cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : |
| -1); |
| else if (kv2->hint->v.gh.nan) |
| return (+1); |
| |
| d1 = kv1->hint->v.gh.d; |
| d2 = kv2->hint->v.gh.d; |
| |
| if (d1 < d2) |
| return (-1); |
| else if (d1 > d2) |
| return (+1); |
| else |
| return (0); |
| } |
| |
| if (!key1_read) { |
| errno = 0; |
| d1 = bwstod(kv1->k, &empty1); |
| err1 = errno; |
| } |
| |
| if (!key2_read) { |
| errno = 0; |
| d2 = bwstod(kv2->k, &empty2); |
| err2 = errno; |
| } |
| |
| /* Non-value case: */ |
| if (empty1) |
| return (empty2 ? 0 : -1); |
| else if (empty2) |
| return (+1); |
| |
| /* NAN case */ |
| if (is_nan(d1)) |
| return (is_nan(d2) ? cmp_nans(d1, d2) : -1); |
| else if (is_nan(d2)) |
| return (+1); |
| |
| /* Infinities */ |
| if (err1 == ERANGE || err2 == ERANGE) { |
| /* Minus infinity case */ |
| if (huge_minus(d1, err1)) { |
| if (huge_minus(d2, err2)) { |
| if (d1 < d2) |
| return (-1); |
| if (d1 > d2) |
| return (+1); |
| return (0); |
| } else |
| return (-1); |
| |
| } else if (huge_minus(d2, err2)) { |
| if (huge_minus(d1, err1)) { |
| if (d1 < d2) |
| return (-1); |
| if (d1 > d2) |
| return (+1); |
| return (0); |
| } else |
| return (+1); |
| } |
| |
| /* Plus infinity case */ |
| if (huge_plus(d1, err1)) { |
| if (huge_plus(d2, err2)) { |
| if (d1 < d2) |
| return (-1); |
| if (d1 > d2) |
| return (+1); |
| return (0); |
| } else |
| return (+1); |
| } else if (huge_plus(d2, err2)) { |
| if (huge_plus(d1, err1)) { |
| if (d1 < d2) |
| return (-1); |
| if (d1 > d2) |
| return (+1); |
| return (0); |
| } else |
| return (-1); |
| } |
| } |
| |
| if (d1 < d2) |
| return (-1); |
| if (d1 > d2) |
| return (+1); |
| |
| return (0); |
| } |
| |
| /* |
| * Implements month sort (-M). |
| */ |
| static int |
| monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __attribute__((unused))) |
| { |
| int val1, val2; |
| bool key1_read, key2_read; |
| |
| val1 = val2 = 0; |
| key1_read = key2_read = false; |
| |
| if (debug_sort) { |
| bwsprintf(stdout, kv1->k, "; k1=<", ">"); |
| bwsprintf(stdout, kv2->k, "; k2=<", ">"); |
| } |
| |
| if (kv1->hint->status == HS_UNINITIALIZED) { |
| kv1->hint->v.Mh.m = bws_month_score(kv1->k); |
| key1_read = true; |
| kv1->hint->status = HS_INITIALIZED; |
| } |
| |
| if (kv2->hint->status == HS_UNINITIALIZED) { |
| kv2->hint->v.Mh.m = bws_month_score(kv2->k); |
| key2_read = true; |
| kv2->hint->status = HS_INITIALIZED; |
| } |
| |
| if (kv1->hint->status == HS_INITIALIZED) { |
| val1 = kv1->hint->v.Mh.m; |
| key1_read = true; |
| } |
| |
| if (kv2->hint->status == HS_INITIALIZED) { |
| val2 = kv2->hint->v.Mh.m; |
| key2_read = true; |
| } |
| |
| if (!key1_read) |
| val1 = bws_month_score(kv1->k); |
| if (!key2_read) |
| val2 = bws_month_score(kv2->k); |
| |
| if (val1 == val2) { |
| return (0); |
| } |
| if (val1 < val2) |
| return (-1); |
| return (+1); |
| } |