blob: 08a48b56d29319c8f34a6a09f45a6676b56f547b [file] [log] [blame] [raw]
/*
Copyright (C) 2014 insane coder (http://insanecoding.blogspot.com/, http://chacha20.insanecoding.org/)
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
This implementation is intended to be simple, many optimizations can be performed.
*/
#include <string.h>
#include "chacha20_simple.h"
#include <stdio.h>
/*
static void debug_print_bitstr(const uint32_t *a, size_t len) {
unsigned int i;
for(i=0; i<len; i++) fprintf(stderr, "0x%x\n", (unsigned int)a[i]);
}
static void debug_print_bytes(const uint8_t *a, size_t len) {
unsigned int i;
for(i=0; i<len; i++) fprintf(stderr, "0x%hhx, ", a[i]);
fputc('\n', stderr);
}
*/
void chacha20_setup(chacha20_ctx *ctx, const uint8_t *key, size_t length, uint8_t nonce[8])
{
const char *constants = (length == 32) ? "expand 32-byte k" : "expand 16-byte k";
ctx->schedule[0] = LE(constants + 0);
ctx->schedule[1] = LE(constants + 4);
ctx->schedule[2] = LE(constants + 8);
ctx->schedule[3] = LE(constants + 12);
ctx->schedule[4] = LE(key + 0);
ctx->schedule[5] = LE(key + 4);
ctx->schedule[6] = LE(key + 8);
ctx->schedule[7] = LE(key + 12);
ctx->schedule[8] = LE(key + 16 % length);
ctx->schedule[9] = LE(key + 20 % length);
ctx->schedule[10] = LE(key + 24 % length);
ctx->schedule[11] = LE(key + 28 % length);
//Surprise! This is really a block cipher in CTR mode
ctx->schedule[12] = 0; //Counter
ctx->schedule[13] = 0; //Counter
ctx->schedule[14] = LE(nonce+0);
ctx->schedule[15] = LE(nonce+4);
ctx->available = 0;
}
void chacha20_counter_set(chacha20_ctx *ctx, uint64_t counter)
{
ctx->schedule[12] = counter & UINT32_C(0xFFFFFFFF);
ctx->schedule[13] = counter >> 32;
ctx->available = 0;
}
#define QUARTERROUND(x, a, b, c, d) \
x[a] += x[b]; x[d] = ROTL32(x[d] ^ x[a], 16); \
x[c] += x[d]; x[b] = ROTL32(x[b] ^ x[c], 12); \
x[a] += x[b]; x[d] = ROTL32(x[d] ^ x[a], 8); \
x[c] += x[d]; x[b] = ROTL32(x[b] ^ x[c], 7);
void chacha20_block(chacha20_ctx *ctx, uint32_t output[16])
{
//fprintf(stderr, "function: chacha20_block(%p, %p)\n", ctx, output);
//debug_print_bitstr(ctx->schedule, 16);
uint32_t *const nonce = ctx->schedule+12; //12 is where the 128 bit counter is
int i = 10;
memcpy(output, ctx->schedule, sizeof(ctx->schedule));
while (i--)
{
QUARTERROUND(output, 0, 4, 8, 12)
QUARTERROUND(output, 1, 5, 9, 13)
QUARTERROUND(output, 2, 6, 10, 14)
QUARTERROUND(output, 3, 7, 11, 15)
QUARTERROUND(output, 0, 5, 10, 15)
QUARTERROUND(output, 1, 6, 11, 12)
QUARTERROUND(output, 2, 7, 8, 13)
QUARTERROUND(output, 3, 4, 9, 14)
}
for (i = 0; i < 16; ++i)
{
uint32_t result = output[i] + ctx->schedule[i];
FROMLE((uint8_t *)(output+i), result);
}
/*
Official specs calls for performing a 64 bit increment here, and limit usage to 2^64 blocks.
However, recommendations for CTR mode in various papers recommend including the nonce component for a 128 bit increment.
This implementation will remain compatible with the official up to 2^64 blocks, and past that point, the official is not intended to be used.
This implementation with this change also allows this algorithm to become compatible for a Fortuna-like construct.
*/
//if (!++nonce[0] && !++nonce[1] && !++nonce[2]) { ++nonce[3]; }
(void)(!++nonce[0] && !++nonce[1] && !++nonce[2] && !++nonce[3]);
}
static inline void chacha20_xor(uint8_t *keystream, const uint8_t **in, uint8_t **out, size_t length)
{
uint8_t *end_keystream = keystream + length;
do { *(*out)++ = *(*in)++ ^ *keystream++; } while (keystream < end_keystream);
}
void chacha20_encrypt(chacha20_ctx *ctx, const uint8_t *in, uint8_t *out, size_t length)
{
//fprintf(stderr, "function: chacha20_encrypt(%p, %p, %p, %u)\n", ctx, in, out, (unsigned int)length);
if (length)
{
//fprintf(stderr, "chacha20_encrypt: ctx->available = %u\n", (unsigned int)ctx->available);
//debug_print_bytes(in, length);
uint8_t *const k = (uint8_t *)ctx->keystream;
//First, use any buffered keystream from previous calls
if (ctx->available)
{
size_t amount = MIN(length, ctx->available);
//fprintf(stderr, "chacha20_encrypt: amount = %u\n", (unsigned int)amount);
//debug_print_bytes((uint8_t *)ctx->keystream, sizeof ctx->keystream);
chacha20_xor(k + (sizeof(ctx->keystream)-ctx->available), &in, &out, amount);
ctx->available -= amount;
length -= amount;
}
//fprintf(stderr, "chacha20_encrypt: length = %u\n", (unsigned int)length);
//Then, handle new blocks
while (length)
{
size_t amount = MIN(length, sizeof(ctx->keystream));
chacha20_block(ctx, ctx->keystream);
//debug_print_bytes((uint8_t *)ctx->keystream, 64);
//fprintf(stderr, "chacha20_encrypt: amount = %u\n", (unsigned int)amount);
chacha20_xor(k, &in, &out, amount);
length -= amount;
ctx->available = sizeof(ctx->keystream) - amount;
}
}
}
void chacha20_decrypt(chacha20_ctx *ctx, const uint8_t *in, uint8_t *out, size_t length)
{
chacha20_encrypt(ctx, in, out, length);
}