Fix many cases of unaligned accesses

This commit is contained in:
James Clarke 2017-06-25 18:10:55 +01:00
parent bc72179d89
commit 2b43be4d84
16 changed files with 269 additions and 163 deletions

View File

@ -32,6 +32,7 @@
#define BLOCK128_H #define BLOCK128_H
#include <cryptonite_bitfn.h> #include <cryptonite_bitfn.h>
#include <cryptonite_align.h>
typedef union { typedef union {
uint64_t q[2]; uint64_t q[2];
@ -40,38 +41,71 @@ typedef union {
uint8_t b[16]; uint8_t b[16];
} block128; } block128;
static inline void block128_copy_bytes(block128 *block, uint8_t *src, uint32_t len) static inline void block128_copy_bytes(block128 *block, const uint8_t *src, uint32_t len)
{ {
int i; int i;
for (i = 0; i < len; i++) block->b[i] = src[i]; for (i = 0; i < len; i++) block->b[i] = src[i];
} }
static inline void block128_copy(block128 *d, const block128 *s) static inline void block128_copy_aligned(block128 *d, const block128 *s)
{ {
d->q[0] = s->q[0]; d->q[1] = s->q[1]; d->q[0] = s->q[0]; d->q[1] = s->q[1];
} }
static inline void block128_copy(block128 *d, const block128 *s)
{
if (need_alignment(d, 8) || need_alignment(s, 8)) {
block128_copy_bytes(d, (const uint8_t *) s, 16);
} else {
block128_copy_aligned(d, s);
}
}
static inline void block128_zero(block128 *d) static inline void block128_zero(block128 *d)
{ {
d->q[0] = 0; d->q[1] = 0; d->q[0] = 0; d->q[1] = 0;
} }
static inline void block128_xor(block128 *d, const block128 *s) static inline void block128_xor_bytes(block128 *block, const uint8_t *src, uint32_t len)
{
int i;
for (i = 0; i < len; i++) block->b[i] ^= src[i];
}
static inline void block128_xor_aligned(block128 *d, const block128 *s)
{ {
d->q[0] ^= s->q[0]; d->q[0] ^= s->q[0];
d->q[1] ^= s->q[1]; d->q[1] ^= s->q[1];
} }
static inline void block128_vxor(block128 *d, const block128 *s1, const block128 *s2) static inline void block128_xor(block128 *d, const block128 *s)
{
if (need_alignment(d, 8) || need_alignment(s, 8)) {
block128_xor_bytes(d, (const uint8_t *) s, 16);
} else {
block128_xor_aligned(d, s);
}
}
static inline void block128_vxor_bytes(block128 *block, const uint8_t *src1, const uint8_t *src2, uint32_t len)
{
int i;
for (i = 0; i < len; i++) block->b[i] = src1[i] ^ src2[i];
}
static inline void block128_vxor_aligned(block128 *d, const block128 *s1, const block128 *s2)
{ {
d->q[0] = s1->q[0] ^ s2->q[0]; d->q[0] = s1->q[0] ^ s2->q[0];
d->q[1] = s1->q[1] ^ s2->q[1]; d->q[1] = s1->q[1] ^ s2->q[1];
} }
static inline void block128_xor_bytes(block128 *block, uint8_t *src, uint32_t len) static inline void block128_vxor(block128 *d, const block128 *s1, const block128 *s2)
{ {
int i; if (need_alignment(d, 8) || need_alignment(s1, 8) || need_alignment(s2, 8)) {
for (i = 0; i < len; i++) block->b[i] ^= src[i]; block128_vxor_bytes(d, (const uint8_t *) s1, (const uint8_t *) s2, 16);
} else {
block128_vxor_aligned(d, s1, s2);
}
} }
static inline void block128_inc_be(block128 *b) static inline void block128_inc_be(block128 *b)

View File

@ -324,21 +324,22 @@ static void create_round_key(uint8_t *expandedKey, uint8_t *rk)
static void aes_main(aes_key *key, uint8_t *state) static void aes_main(aes_key *key, uint8_t *state)
{ {
int i = 0; int i = 0;
uint8_t rk[16]; uint32_t rk[4];
uint8_t *rkptr = (uint8_t *) rk;
create_round_key(key->data, rk); create_round_key(key->data, rkptr);
add_round_key(state, rk); add_round_key(state, rkptr);
for (i = 1; i < key->nbr; i++) { for (i = 1; i < key->nbr; i++) {
create_round_key(key->data + 16 * i, rk); create_round_key(key->data + 16 * i, rkptr);
shift_rows(state); shift_rows(state);
mix_columns(state); mix_columns(state);
add_round_key(state, rk); add_round_key(state, rkptr);
} }
create_round_key(key->data + 16 * key->nbr, rk); create_round_key(key->data + 16 * key->nbr, rkptr);
shift_rows(state); shift_rows(state);
add_round_key(state, rk); add_round_key(state, rkptr);
} }
static void shift_rows_inv(uint8_t *state) static void shift_rows_inv(uint8_t *state)
@ -374,21 +375,22 @@ static void mix_columns_inv(uint8_t *state)
static void aes_main_inv(aes_key *key, uint8_t *state) static void aes_main_inv(aes_key *key, uint8_t *state)
{ {
int i = 0; int i = 0;
uint8_t rk[16]; uint32_t rk[4];
uint8_t *rkptr = (uint8_t *) rk;
create_round_key(key->data + 16 * key->nbr, rk); create_round_key(key->data + 16 * key->nbr, rkptr);
add_round_key(state, rk); add_round_key(state, rkptr);
for (i = key->nbr - 1; i > 0; i--) { for (i = key->nbr - 1; i > 0; i--) {
create_round_key(key->data + 16 * i, rk); create_round_key(key->data + 16 * i, rkptr);
shift_rows_inv(state); shift_rows_inv(state);
add_round_key(state, rk); add_round_key(state, rkptr);
mix_columns_inv(state); mix_columns_inv(state);
} }
create_round_key(key->data, rk); create_round_key(key->data, rkptr);
shift_rows_inv(state); shift_rows_inv(state);
add_round_key(state, rk); add_round_key(state, rkptr);
} }
/* Set the block values, for the block: /* Set the block values, for the block:
@ -405,26 +407,28 @@ static void aes_main_inv(aes_key *key, uint8_t *state)
void cryptonite_aes_generic_encrypt_block(aes_block *output, aes_key *key, aes_block *input) void cryptonite_aes_generic_encrypt_block(aes_block *output, aes_key *key, aes_block *input)
{ {
uint8_t block[16]; uint32_t block[4];
uint8_t *iptr, *optr; uint8_t *iptr, *optr, *bptr;
iptr = (uint8_t *) input; iptr = (uint8_t *) input;
optr = (uint8_t *) output; optr = (uint8_t *) output;
swap_block(block, iptr); bptr = (uint8_t *) block;
aes_main(key, block); swap_block(bptr, iptr);
swap_block(optr, block); aes_main(key, bptr);
swap_block(optr, bptr);
} }
void cryptonite_aes_generic_decrypt_block(aes_block *output, aes_key *key, aes_block *input) void cryptonite_aes_generic_decrypt_block(aes_block *output, aes_key *key, aes_block *input)
{ {
uint8_t block[16]; uint32_t block[4];
uint8_t *iptr, *optr; uint8_t *iptr, *optr, *bptr;
iptr = (uint8_t *) input; iptr = (uint8_t *) input;
optr = (uint8_t *) output; optr = (uint8_t *) output;
swap_block(block, iptr); bptr = (uint8_t *) block;
aes_main_inv(key, block); swap_block(bptr, iptr);
swap_block(optr, block); aes_main_inv(key, bptr);
swap_block(optr, bptr);
} }
void cryptonite_aes_generic_init(aes_key *key, uint8_t *origkey, uint8_t size) void cryptonite_aes_generic_init(aes_key *key, uint8_t *origkey, uint8_t size)

View File

@ -370,7 +370,7 @@ void cryptonite_aes_gcm_init(aes_gcm *gcm, aes_key *key, uint8_t *iv, uint32_t l
cryptonite_gf_mul(&gcm->iv, &gcm->h); cryptonite_gf_mul(&gcm->iv, &gcm->h);
} }
block128_copy(&gcm->civ, &gcm->iv); block128_copy_aligned(&gcm->civ, &gcm->iv);
} }
void cryptonite_aes_gcm_aad(aes_gcm *gcm, uint8_t *input, uint32_t length) void cryptonite_aes_gcm_aad(aes_gcm *gcm, uint8_t *input, uint32_t length)
@ -399,7 +399,7 @@ void cryptonite_aes_gcm_finish(uint8_t *tag, aes_gcm *gcm, aes_key *key)
gcm_ghash_add(gcm, &lblock); gcm_ghash_add(gcm, &lblock);
cryptonite_aes_encrypt_block(&lblock, key, &gcm->iv); cryptonite_aes_encrypt_block(&lblock, key, &gcm->iv);
block128_xor(&gcm->tag, &lblock); block128_xor_aligned(&gcm->tag, &lblock);
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
tag[i] = gcm->tag.b[i]; tag[i] = gcm->tag.b[i];
@ -464,7 +464,7 @@ void cryptonite_aes_ocb_init(aes_ocb *ocb, aes_key *key, uint8_t *iv, uint32_t l
memcpy(stretch, ktop.b, 16); memcpy(stretch, ktop.b, 16);
memcpy(tmp.b, ktop.b + 1, 8); memcpy(tmp.b, ktop.b + 1, 8);
block128_xor(&tmp, &ktop); block128_xor_aligned(&tmp, &ktop);
memcpy(stretch + 16, tmp.b, 8); memcpy(stretch + 16, tmp.b, 8);
/* initialize the encryption offset from stretch */ /* initialize the encryption offset from stretch */
@ -490,22 +490,22 @@ void cryptonite_aes_ocb_aad(aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t
for (i=1; i<= length/16; i++, input=input+16) { for (i=1; i<= length/16; i++, input=input+16) {
ocb_get_L_i(&tmp, ocb->li, i); ocb_get_L_i(&tmp, ocb->li, i);
block128_xor(&ocb->offset_aad, &tmp); block128_xor_aligned(&ocb->offset_aad, &tmp);
block128_vxor(&tmp, &ocb->offset_aad, (block128 *) input); block128_vxor(&tmp, &ocb->offset_aad, (block128 *) input);
cryptonite_aes_encrypt_block(&tmp, key, &tmp); cryptonite_aes_encrypt_block(&tmp, key, &tmp);
block128_xor(&ocb->sum_aad, &tmp); block128_xor_aligned(&ocb->sum_aad, &tmp);
} }
length = length % 16; /* Bytes in final block */ length = length % 16; /* Bytes in final block */
if (length > 0) { if (length > 0) {
block128_xor(&ocb->offset_aad, &ocb->lstar); block128_xor_aligned(&ocb->offset_aad, &ocb->lstar);
block128_zero(&tmp); block128_zero(&tmp);
block128_copy_bytes(&tmp, input, length); block128_copy_bytes(&tmp, input, length);
tmp.b[length] = 0x80; tmp.b[length] = 0x80;
block128_xor(&tmp, &ocb->offset_aad); block128_xor_aligned(&tmp, &ocb->offset_aad);
cryptonite_aes_encrypt_block(&tmp, key, &tmp); cryptonite_aes_encrypt_block(&tmp, key, &tmp);
block128_xor(&ocb->sum_aad, &tmp); block128_xor_aligned(&ocb->sum_aad, &tmp);
} }
} }
@ -513,8 +513,8 @@ void cryptonite_aes_ocb_finish(uint8_t *tag, aes_ocb *ocb, aes_key *key)
{ {
block128 tmp; block128 tmp;
block128_vxor(&tmp, &ocb->sum_enc, &ocb->offset_enc); block128_vxor_aligned(&tmp, &ocb->sum_enc, &ocb->offset_enc);
block128_xor(&tmp, &ocb->ldollar); block128_xor_aligned(&tmp, &ocb->ldollar);
cryptonite_aes_encrypt_block((block128 *) tag, key, &tmp); cryptonite_aes_encrypt_block((block128 *) tag, key, &tmp);
block128_xor((block128 *) tag, &ocb->sum_aad); block128_xor((block128 *) tag, &ocb->sum_aad);
} }
@ -699,7 +699,7 @@ static void ocb_generic_crypt(uint8_t *output, aes_ocb *ocb, aes_key *key,
for (i = 1; i <= length/16; i++, input += 16, output += 16) { for (i = 1; i <= length/16; i++, input += 16, output += 16) {
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
ocb_get_L_i(&tmp, ocb->li, i); ocb_get_L_i(&tmp, ocb->li, i);
block128_xor(&ocb->offset_enc, &tmp); block128_xor_aligned(&ocb->offset_enc, &tmp);
block128_vxor(&tmp, &ocb->offset_enc, (block128 *) input); block128_vxor(&tmp, &ocb->offset_enc, (block128 *) input);
if (encrypt) { if (encrypt) {
@ -716,24 +716,24 @@ static void ocb_generic_crypt(uint8_t *output, aes_ocb *ocb, aes_key *key,
/* process the last partial block if any */ /* process the last partial block if any */
length = length % 16; length = length % 16;
if (length > 0) { if (length > 0) {
block128_xor(&ocb->offset_enc, &ocb->lstar); block128_xor_aligned(&ocb->offset_enc, &ocb->lstar);
cryptonite_aes_encrypt_block(&pad, key, &ocb->offset_enc); cryptonite_aes_encrypt_block(&pad, key, &ocb->offset_enc);
if (encrypt) { if (encrypt) {
block128_zero(&tmp); block128_zero(&tmp);
block128_copy_bytes(&tmp, input, length); block128_copy_bytes(&tmp, input, length);
tmp.b[length] = 0x80; tmp.b[length] = 0x80;
block128_xor(&ocb->sum_enc, &tmp); block128_xor_aligned(&ocb->sum_enc, &tmp);
block128_xor(&pad, &tmp); block128_xor_aligned(&pad, &tmp);
memcpy(output, pad.b, length); memcpy(output, pad.b, length);
output += length; output += length;
} else { } else {
block128_copy(&tmp, &pad); block128_copy_aligned(&tmp, &pad);
block128_copy_bytes(&tmp, input, length); block128_copy_bytes(&tmp, input, length);
block128_xor(&tmp, &pad); block128_xor_aligned(&tmp, &pad);
tmp.b[length] = 0x80; tmp.b[length] = 0x80;
memcpy(output, tmp.b, length); memcpy(output, tmp.b, length);
block128_xor(&ocb->sum_enc, &tmp); block128_xor_aligned(&ocb->sum_enc, &tmp);
input += length; input += length;
} }
} }

View File

@ -25,6 +25,7 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
#include "cryptonite_md4.h" #include "cryptonite_md4.h"
void cryptonite_md4_init(struct md4_ctx *ctx) void cryptonite_md4_init(struct md4_ctx *ctx)
@ -130,9 +131,18 @@ void cryptonite_md4_update(struct md4_ctx *ctx, const uint8_t *data, uint32_t le
index = 0; index = 0;
} }
/* process as much 64-block as possible */ if (need_alignment(data, 4)) {
for (; len >= 64; len -= 64, data += 64) uint32_t tramp[16];
md4_do_chunk(ctx, (uint32_t *) data); ASSERT_ALIGNMENT(tramp, 4);
for (; len >= 64; len -= 64, data += 64) {
memcpy(tramp, data, 64);
md4_do_chunk(ctx, tramp);
}
} else {
/* process as much 64-block as possible */
for (; len >= 64; len -= 64, data += 64)
md4_do_chunk(ctx, (uint32_t *) data);
}
/* append data into buf */ /* append data into buf */
if (len) if (len)
@ -157,5 +167,8 @@ void cryptonite_md4_finalize(struct md4_ctx *ctx, uint8_t *out)
cryptonite_md4_update(ctx, (uint8_t *) &bits, sizeof(bits)); cryptonite_md4_update(ctx, (uint8_t *) &bits, sizeof(bits));
/* output hash */ /* output hash */
le32_to_cpu_array((uint32_t *) out, ctx->h, 4); store_le32(out , ctx->h[0]);
store_le32(out+ 4, ctx->h[1]);
store_le32(out+ 8, ctx->h[2]);
store_le32(out+12, ctx->h[3]);
} }

View File

@ -25,6 +25,7 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
#include "cryptonite_md5.h" #include "cryptonite_md5.h"
void cryptonite_md5_init(struct md5_ctx *ctx) void cryptonite_md5_init(struct md5_ctx *ctx)
@ -143,9 +144,18 @@ void cryptonite_md5_update(struct md5_ctx *ctx, const uint8_t *data, uint32_t le
index = 0; index = 0;
} }
/* process as much 64-block as possible */ if (need_alignment(data, 4)) {
for (; len >= 64; len -= 64, data += 64) uint32_t tramp[16];
md5_do_chunk(ctx, (uint32_t *) data); ASSERT_ALIGNMENT(tramp, 4);
for (; len >= 64; len -= 64, data += 64) {
memcpy(tramp, data, 64);
md5_do_chunk(ctx, tramp);
}
} else {
/* process as much 64-block as possible */
for (; len >= 64; len -= 64, data += 64)
md5_do_chunk(ctx, (uint32_t *) data);
}
/* append data into buf */ /* append data into buf */
if (len) if (len)
@ -157,7 +167,6 @@ void cryptonite_md5_finalize(struct md5_ctx *ctx, uint8_t *out)
static uint8_t padding[64] = { 0x80, }; static uint8_t padding[64] = { 0x80, };
uint64_t bits; uint64_t bits;
uint32_t index, padlen; uint32_t index, padlen;
uint32_t *p = (uint32_t *) out;
/* add padding and update data with it */ /* add padding and update data with it */
bits = cpu_to_le64(ctx->sz << 3); bits = cpu_to_le64(ctx->sz << 3);
@ -171,8 +180,8 @@ void cryptonite_md5_finalize(struct md5_ctx *ctx, uint8_t *out)
cryptonite_md5_update(ctx, (uint8_t *) &bits, sizeof(bits)); cryptonite_md5_update(ctx, (uint8_t *) &bits, sizeof(bits));
/* output hash */ /* output hash */
p[0] = cpu_to_le32(ctx->h[0]); store_le32(out , ctx->h[0]);
p[1] = cpu_to_le32(ctx->h[1]); store_le32(out+ 4, ctx->h[1]);
p[2] = cpu_to_le32(ctx->h[2]); store_le32(out+ 8, ctx->h[2]);
p[3] = cpu_to_le32(ctx->h[3]); store_le32(out+12, ctx->h[3]);
} }

View File

@ -37,11 +37,7 @@
#include <string.h> #include <string.h>
#include "cryptonite_poly1305.h" #include "cryptonite_poly1305.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
static inline uint32_t load32(uint8_t *p)
{
return (le32_to_cpu(*((uint32_t *) p)));
}
static void poly1305_do_chunk(poly1305_ctx *ctx, uint8_t *data, int blocks, int final) static void poly1305_do_chunk(poly1305_ctx *ctx, uint8_t *data, int blocks, int final)
{ {
@ -61,11 +57,11 @@ static void poly1305_do_chunk(poly1305_ctx *ctx, uint8_t *data, int blocks, int
s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5;
while (blocks--) { while (blocks--) {
h0 += (load32(data+ 0) ) & 0x3ffffff; h0 += (load_le32(data+ 0) ) & 0x3ffffff;
h1 += (load32(data+ 3) >> 2) & 0x3ffffff; h1 += (load_le32(data+ 3) >> 2) & 0x3ffffff;
h2 += (load32(data+ 6) >> 4) & 0x3ffffff; h2 += (load_le32(data+ 6) >> 4) & 0x3ffffff;
h3 += (load32(data+ 9) >> 6) & 0x3ffffff; h3 += (load_le32(data+ 9) >> 6) & 0x3ffffff;
h4 += (load32(data+12) >> 8) | hibit; h4 += (load_le32(data+12) >> 8) | hibit;
d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1); d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1);
d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2); d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2);
@ -94,16 +90,16 @@ void cryptonite_poly1305_init(poly1305_ctx *ctx, poly1305_key *key)
memset(ctx, 0, sizeof(poly1305_ctx)); memset(ctx, 0, sizeof(poly1305_ctx));
ctx->r[0] = (load32(&k[ 0]) ) & 0x3ffffff; ctx->r[0] = (load_le32(&k[ 0]) ) & 0x3ffffff;
ctx->r[1] = (load32(&k[ 3]) >> 2) & 0x3ffff03; ctx->r[1] = (load_le32(&k[ 3]) >> 2) & 0x3ffff03;
ctx->r[2] = (load32(&k[ 6]) >> 4) & 0x3ffc0ff; ctx->r[2] = (load_le32(&k[ 6]) >> 4) & 0x3ffc0ff;
ctx->r[3] = (load32(&k[ 9]) >> 6) & 0x3f03fff; ctx->r[3] = (load_le32(&k[ 9]) >> 6) & 0x3f03fff;
ctx->r[4] = (load32(&k[12]) >> 8) & 0x00fffff; ctx->r[4] = (load_le32(&k[12]) >> 8) & 0x00fffff;
ctx->pad[0] = load32(&k[16]); ctx->pad[0] = load_le32(&k[16]);
ctx->pad[1] = load32(&k[20]); ctx->pad[1] = load_le32(&k[20]);
ctx->pad[2] = load32(&k[24]); ctx->pad[2] = load_le32(&k[24]);
ctx->pad[3] = load32(&k[28]); ctx->pad[3] = load_le32(&k[28]);
ctx->index = 0; ctx->index = 0;
} }

View File

@ -24,6 +24,7 @@
#include "cryptonite_ripemd.h" #include "cryptonite_ripemd.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
#include <string.h> #include <string.h>
void cryptonite_ripemd160_init(struct ripemd160_ctx *ctx) void cryptonite_ripemd160_init(struct ripemd160_ctx *ctx)
@ -265,9 +266,20 @@ void cryptonite_ripemd160_update(struct ripemd160_ctx *ctx, const uint8_t *data,
index = 0; index = 0;
} }
for (; len >= 64; len -= 64, data += 64) if (need_alignment(data, 4)) {
ripemd160_do_chunk(ctx, (uint32_t *) data); uint32_t tramp[16];
ASSERT_ALIGNMENT(tramp, 4);
for (; len >= 64; len -= 64, data += 64) {
memcpy(tramp, data, 64);
ripemd160_do_chunk(ctx, tramp);
}
} else {
/* process as much 64-block as possible */
for (; len >= 64; len -= 64, data += 64)
ripemd160_do_chunk(ctx, (uint32_t *) data);
}
/* append data into buf */
if (len) if (len)
memcpy(ctx->buf + index, data, len); memcpy(ctx->buf + index, data, len);
} }
@ -277,7 +289,6 @@ void cryptonite_ripemd160_finalize(struct ripemd160_ctx *ctx, uint8_t *out)
static uint8_t padding[64] = { 0x80, }; static uint8_t padding[64] = { 0x80, };
uint64_t bits; uint64_t bits;
uint32_t index, padlen; uint32_t index, padlen;
uint32_t *p = (uint32_t *) out;
/* add padding and update data with it */ /* add padding and update data with it */
bits = cpu_to_le64(ctx->sz << 3); bits = cpu_to_le64(ctx->sz << 3);
@ -291,9 +302,9 @@ void cryptonite_ripemd160_finalize(struct ripemd160_ctx *ctx, uint8_t *out)
cryptonite_ripemd160_update(ctx, (uint8_t *) &bits, sizeof(bits)); cryptonite_ripemd160_update(ctx, (uint8_t *) &bits, sizeof(bits));
/* output digest */ /* output digest */
p[0] = cpu_to_le32(ctx->h[0]); store_le32(out , ctx->h[0]);
p[1] = cpu_to_le32(ctx->h[1]); store_le32(out+ 4, ctx->h[1]);
p[2] = cpu_to_le32(ctx->h[2]); store_le32(out+ 8, ctx->h[2]);
p[3] = cpu_to_le32(ctx->h[3]); store_le32(out+12, ctx->h[3]);
p[4] = cpu_to_le32(ctx->h[4]); store_le32(out+16, ctx->h[4]);
} }

View File

@ -33,6 +33,7 @@
#include <stdio.h> #include <stdio.h>
#include "cryptonite_salsa.h" #include "cryptonite_salsa.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
static const uint8_t sigma[16] = "expand 32-byte k"; static const uint8_t sigma[16] = "expand 32-byte k";
static const uint8_t tau[16] = "expand 16-byte k"; static const uint8_t tau[16] = "expand 16-byte k";
@ -58,11 +59,6 @@ static const uint8_t tau[16] = "expand 16-byte k";
QR (x15,x12,x13,x14); \ QR (x15,x12,x13,x14); \
} }
static inline uint32_t load32(const uint8_t *p)
{
return le32_to_cpu(*((uint32_t *) p));
}
static void salsa_core(int rounds, block *out, const cryptonite_salsa_state *in) static void salsa_core(int rounds, block *out, const cryptonite_salsa_state *in)
{ {
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
@ -126,34 +122,34 @@ void cryptonite_salsa_init_core(cryptonite_salsa_state *st,
const uint8_t *constants = (keylen == 32) ? sigma : tau; const uint8_t *constants = (keylen == 32) ? sigma : tau;
int i; int i;
st->d[0] = load32(constants + 0); st->d[0] = load_le32_aligned(constants + 0);
st->d[5] = load32(constants + 4); st->d[5] = load_le32_aligned(constants + 4);
st->d[10] = load32(constants + 8); st->d[10] = load_le32_aligned(constants + 8);
st->d[15] = load32(constants + 12); st->d[15] = load_le32_aligned(constants + 12);
st->d[1] = load32(key + 0); st->d[1] = load_le32(key + 0);
st->d[2] = load32(key + 4); st->d[2] = load_le32(key + 4);
st->d[3] = load32(key + 8); st->d[3] = load_le32(key + 8);
st->d[4] = load32(key + 12); st->d[4] = load_le32(key + 12);
/* we repeat the key on 128 bits */ /* we repeat the key on 128 bits */
if (keylen == 32) if (keylen == 32)
key += 16; key += 16;
st->d[11] = load32(key + 0); st->d[11] = load_le32(key + 0);
st->d[12] = load32(key + 4); st->d[12] = load_le32(key + 4);
st->d[13] = load32(key + 8); st->d[13] = load_le32(key + 8);
st->d[14] = load32(key + 12); st->d[14] = load_le32(key + 12);
st->d[9] = 0; st->d[9] = 0;
switch (ivlen) { switch (ivlen) {
case 8: case 8:
st->d[6] = load32(iv + 0); st->d[6] = load_le32(iv + 0);
st->d[7] = load32(iv + 4); st->d[7] = load_le32(iv + 4);
st->d[8] = 0; st->d[8] = 0;
break; break;
case 12: case 12:
st->d[6] = load32(iv + 0); st->d[6] = load_le32(iv + 0);
st->d[7] = load32(iv + 4); st->d[7] = load_le32(iv + 4);
st->d[8] = load32(iv + 8); st->d[8] = load_le32(iv + 8);
default: default:
return; return;
} }

View File

@ -27,6 +27,7 @@
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
#include "cryptonite_salsa.h" #include "cryptonite_salsa.h"
static void blockmix_salsa8(uint32_t *in, uint32_t *out, uint32_t *X, const uint32_t r) static void blockmix_salsa8(uint32_t *in, uint32_t *out, uint32_t *X, const uint32_t r)
@ -49,16 +50,6 @@ static inline uint64_t integerify(uint32_t *B, const uint32_t r)
return B[(2*r-1) * 16] | (uint64_t)B[(2*r-1) * 16 + 1] << 32; return B[(2*r-1) * 16] | (uint64_t)B[(2*r-1) * 16 + 1] << 32;
} }
static inline uint32_t load32(const uint8_t *p)
{
return le32_to_cpu(*((uint32_t *) p));
}
static inline void store32(const uint8_t *p, uint32_t val)
{
*((uint32_t *) p) = cpu_to_le32(val);
}
void cryptonite_scrypt_smix(uint8_t *B, const uint32_t r, const uint64_t N, uint32_t *V, uint32_t *XY) void cryptonite_scrypt_smix(uint8_t *B, const uint32_t r, const uint64_t N, uint32_t *V, uint32_t *XY)
{ {
uint32_t *X = XY; uint32_t *X = XY;
@ -69,7 +60,7 @@ void cryptonite_scrypt_smix(uint8_t *B, const uint32_t r, const uint64_t N, uint
const int r32 = 32*r; const int r32 = 32*r;
for (k = 0; k < r32; k++) for (k = 0; k < r32; k++)
X[k] = load32(&B[4 * k]); X[k] = load_le32_aligned(&B[4 * k]);
for (i = 0; i < N; i += 2) { for (i = 0; i < N; i += 2) {
array_copy32(&V[i * r32], X, r32); array_copy32(&V[i * r32], X, r32);
blockmix_salsa8(X, Y, Z, r); blockmix_salsa8(X, Y, Z, r);
@ -86,5 +77,5 @@ void cryptonite_scrypt_smix(uint8_t *B, const uint32_t r, const uint64_t N, uint
blockmix_salsa8(Y, X, Z, r); blockmix_salsa8(Y, X, Z, r);
} }
for (k = 0; k < r32; k++) for (k = 0; k < r32; k++)
store32(&B[4*k], X[k]); store_le32_aligned(&B[4*k], X[k]);
} }

View File

@ -25,6 +25,7 @@
#include <string.h> #include <string.h>
#include "cryptonite_sha1.h" #include "cryptonite_sha1.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
void cryptonite_sha1_init(struct sha1_ctx *ctx) void cryptonite_sha1_init(struct sha1_ctx *ctx)
{ {
@ -173,9 +174,18 @@ void cryptonite_sha1_update(struct sha1_ctx *ctx, const uint8_t *data, uint32_t
index = 0; index = 0;
} }
/* process as much 64-block as possible */ if (need_alignment(data, 4)) {
for (; len >= 64; len -= 64, data += 64) uint32_t tramp[16];
sha1_do_chunk(ctx, (uint32_t *) data); ASSERT_ALIGNMENT(tramp, 4);
for (; len >= 64; len -= 64, data += 64) {
memcpy(tramp, data, 64);
sha1_do_chunk(ctx, tramp);
}
} else {
/* process as much 64-block as possible */
for (; len >= 64; len -= 64, data += 64)
sha1_do_chunk(ctx, (uint32_t *) data);
}
/* append data into buf */ /* append data into buf */
if (len) if (len)
@ -187,7 +197,6 @@ void cryptonite_sha1_finalize(struct sha1_ctx *ctx, uint8_t *out)
static uint8_t padding[64] = { 0x80, }; static uint8_t padding[64] = { 0x80, };
uint64_t bits; uint64_t bits;
uint32_t index, padlen; uint32_t index, padlen;
uint32_t *p = (uint32_t *) out;
/* add padding and update data with it */ /* add padding and update data with it */
bits = cpu_to_be64(ctx->sz << 3); bits = cpu_to_be64(ctx->sz << 3);
@ -201,9 +210,9 @@ void cryptonite_sha1_finalize(struct sha1_ctx *ctx, uint8_t *out)
cryptonite_sha1_update(ctx, (uint8_t *) &bits, sizeof(bits)); cryptonite_sha1_update(ctx, (uint8_t *) &bits, sizeof(bits));
/* output hash */ /* output hash */
p[0] = cpu_to_be32(ctx->h[0]); store_be32(out , ctx->h[0]);
p[1] = cpu_to_be32(ctx->h[1]); store_be32(out+ 4, ctx->h[1]);
p[2] = cpu_to_be32(ctx->h[2]); store_be32(out+ 8, ctx->h[2]);
p[3] = cpu_to_be32(ctx->h[3]); store_be32(out+12, ctx->h[3]);
p[4] = cpu_to_be32(ctx->h[4]); store_be32(out+16, ctx->h[4]);
} }

View File

@ -25,6 +25,7 @@
#include <string.h> #include <string.h>
#include "cryptonite_sha256.h" #include "cryptonite_sha256.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
void cryptonite_sha224_init(struct sha224_ctx *ctx) void cryptonite_sha224_init(struct sha224_ctx *ctx)
{ {
@ -134,9 +135,18 @@ void cryptonite_sha256_update(struct sha256_ctx *ctx, const uint8_t *data, uint3
index = 0; index = 0;
} }
/* process as much 64-block as possible */ if (need_alignment(data, 4)) {
for (; len >= 64; len -= 64, data += 64) uint32_t tramp[16];
sha256_do_chunk(ctx, (uint32_t *) data); ASSERT_ALIGNMENT(tramp, 4);
for (; len >= 64; len -= 64, data += 64) {
memcpy(tramp, data, 64);
sha256_do_chunk(ctx, tramp);
}
} else {
/* process as much 64-block as possible */
for (; len >= 64; len -= 64, data += 64)
sha256_do_chunk(ctx, (uint32_t *) data);
}
/* append data into buf */ /* append data into buf */
if (len) if (len)
@ -156,7 +166,6 @@ void cryptonite_sha256_finalize(struct sha256_ctx *ctx, uint8_t *out)
static uint8_t padding[64] = { 0x80, }; static uint8_t padding[64] = { 0x80, };
uint64_t bits; uint64_t bits;
uint32_t i, index, padlen; uint32_t i, index, padlen;
uint32_t *p = (uint32_t *) out;
/* cpu -> big endian */ /* cpu -> big endian */
bits = cpu_to_be64(ctx->sz << 3); bits = cpu_to_be64(ctx->sz << 3);
@ -171,5 +180,5 @@ void cryptonite_sha256_finalize(struct sha256_ctx *ctx, uint8_t *out)
/* store to digest */ /* store to digest */
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
p[i] = cpu_to_be32(ctx->h[i]); store_be32(out+4*i, ctx->h[i]);
} }

View File

@ -24,6 +24,7 @@
#include <string.h> #include <string.h>
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
#include "cryptonite_sha512.h" #include "cryptonite_sha512.h"
void cryptonite_sha384_init(struct sha512_ctx *ctx) void cryptonite_sha384_init(struct sha512_ctx *ctx)
@ -153,9 +154,18 @@ void cryptonite_sha512_update(struct sha512_ctx *ctx, const uint8_t *data, uint3
index = 0; index = 0;
} }
/* process as much 128-block as possible */ if (need_alignment(data, 8)) {
for (; len >= 128; len -= 128, data += 128) uint64_t tramp[16];
sha512_do_chunk(ctx, (uint64_t *) data); ASSERT_ALIGNMENT(tramp, 8);
for (; len >= 128; len -= 128, data += 128) {
memcpy(tramp, data, 128);
sha512_do_chunk(ctx, tramp);
}
} else {
/* process as much 128-block as possible */
for (; len >= 128; len -= 128, data += 128)
sha512_do_chunk(ctx, (uint64_t *) data);
}
/* append data into buf */ /* append data into buf */
if (len) if (len)
@ -175,7 +185,6 @@ void cryptonite_sha512_finalize(struct sha512_ctx *ctx, uint8_t *out)
static uint8_t padding[128] = { 0x80, }; static uint8_t padding[128] = { 0x80, };
uint32_t i, index, padlen; uint32_t i, index, padlen;
uint64_t bits[2]; uint64_t bits[2];
uint64_t *p = (uint64_t *) out;
/* cpu -> big endian */ /* cpu -> big endian */
bits[0] = cpu_to_be64((ctx->sz[1] << 3 | ctx->sz[0] >> 61)); bits[0] = cpu_to_be64((ctx->sz[1] << 3 | ctx->sz[0] >> 61));
@ -191,7 +200,7 @@ void cryptonite_sha512_finalize(struct sha512_ctx *ctx, uint8_t *out)
/* store to digest */ /* store to digest */
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
p[i] = cpu_to_be64(ctx->h[i]); store_be64(out+8*i, ctx->h[i]);
} }
#include <stdio.h> #include <stdio.h>

View File

@ -26,6 +26,7 @@
#include "cryptonite_skein.h" #include "cryptonite_skein.h"
#include "cryptonite_skein256.h" #include "cryptonite_skein256.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
static const uint8_t K256_0[2] = { 14, 16, }; static const uint8_t K256_0[2] = { 14, 16, };
static const uint8_t K256_1[2] = { 52, 57, }; static const uint8_t K256_1[2] = { 52, 57, };
@ -143,9 +144,18 @@ void cryptonite_skein256_update(struct skein256_ctx *ctx, const uint8_t *data, u
ctx->bufindex = 0; ctx->bufindex = 0;
} }
/* process as much 32-block as possible except the last one in case we finalize */ if (need_alignment(data, 8)) {
for (; len > 32; len -= 32, data += 32) uint64_t tramp[4];
skein256_do_chunk(ctx, (uint64_t *) data, 32); ASSERT_ALIGNMENT(tramp, 8);
for (; len > 32; len -= 32, data += 32) {
memcpy(tramp, data, 32);
skein256_do_chunk(ctx, tramp, 32);
}
} else {
/* process as much 32-block as possible except the last one in case we finalize */
for (; len > 32; len -= 32, data += 32)
skein256_do_chunk(ctx, (uint64_t *) data, 32);
}
/* append data into buf */ /* append data into buf */
if (len) { if (len) {

View File

@ -26,6 +26,7 @@
#include "cryptonite_skein.h" #include "cryptonite_skein.h"
#include "cryptonite_skein512.h" #include "cryptonite_skein512.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
static const uint8_t K512_0[4] = { 46, 36, 19, 37, }; static const uint8_t K512_0[4] = { 46, 36, 19, 37, };
static const uint8_t K512_1[4] = { 33, 27, 14, 42, }; static const uint8_t K512_1[4] = { 33, 27, 14, 42, };
@ -161,9 +162,18 @@ void cryptonite_skein512_update(struct skein512_ctx *ctx, const uint8_t *data, u
ctx->bufindex = 0; ctx->bufindex = 0;
} }
/* process as much 64-block as possible except the last one in case we finalize */ if (need_alignment(data, 8)) {
for (; len > 64; len -= 64, data += 64) uint64_t tramp[8];
skein512_do_chunk(ctx, (uint64_t *) data, 64); ASSERT_ALIGNMENT(tramp, 8);
for (; len > 64; len -= 64, data += 64) {
memcpy(tramp, data, 64);
skein512_do_chunk(ctx, tramp, 64);
}
} else {
/* process as much 64-block as possible except the last one in case we finalize */
for (; len > 64; len -= 64, data += 64)
skein512_do_chunk(ctx, (uint64_t *) data, 64);
}
/* append data into buf */ /* append data into buf */
if (len) { if (len) {

View File

@ -25,6 +25,7 @@
#include <string.h> #include <string.h>
#include "cryptonite_tiger.h" #include "cryptonite_tiger.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
#include "cryptonite_align.h"
static const uint64_t t1[256] = { static const uint64_t t1[256] = {
0x02aab17cf7e90c5eULL,0xac424b03e243a8ecULL,0x72cd5be30dd5fcd3ULL,0x6d019b93f6f97f3aULL, 0x02aab17cf7e90c5eULL,0xac424b03e243a8ecULL,0x72cd5be30dd5fcd3ULL,0x6d019b93f6f97f3aULL,
@ -381,9 +382,18 @@ void cryptonite_tiger_update(struct tiger_ctx *ctx, const uint8_t *data, uint32_
index = 0; index = 0;
} }
/* process as much 64-block as possible */ if (need_alignment(data, 8)) {
for (; len >= 64; len -= 64, data += 64) uint64_t tramp[8];
tiger_do_chunk(ctx, (uint64_t *) data); ASSERT_ALIGNMENT(tramp, 8);
for (; len >= 64; len -= 64, data += 64) {
memcpy(tramp, data, 64);
tiger_do_chunk(ctx, tramp);
}
} else {
/* process as much 64-block as possible */
for (; len >= 64; len -= 64, data += 64)
tiger_do_chunk(ctx, (uint64_t *) data);
}
/* append data into buf */ /* append data into buf */
if (len) if (len)
@ -395,7 +405,6 @@ void cryptonite_tiger_finalize(struct tiger_ctx *ctx, uint8_t *out)
static uint8_t padding[64] = { 0x01, }; static uint8_t padding[64] = { 0x01, };
uint64_t bits; uint64_t bits;
uint32_t index, padlen; uint32_t index, padlen;
uint64_t *p = (uint64_t *) out;
/* add padding and update data with it */ /* add padding and update data with it */
bits = cpu_to_le64(ctx->sz << 3); bits = cpu_to_le64(ctx->sz << 3);
@ -409,7 +418,7 @@ void cryptonite_tiger_finalize(struct tiger_ctx *ctx, uint8_t *out)
cryptonite_tiger_update(ctx, (uint8_t *) &bits, sizeof(bits)); cryptonite_tiger_update(ctx, (uint8_t *) &bits, sizeof(bits));
/* output hash */ /* output hash */
p[0] = cpu_to_le64(ctx->h[0]); store_le64(out , ctx->h[0]);
p[1] = cpu_to_le64(ctx->h[1]); store_le64(out+ 8, ctx->h[1]);
p[2] = cpu_to_le64(ctx->h[2]); store_le64(out+16, ctx->h[2]);
} }

View File

@ -30,13 +30,9 @@
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include "cryptonite_xsalsa.h" #include "cryptonite_xsalsa.h"
#include "cryptonite_align.h"
#include "cryptonite_bitfn.h" #include "cryptonite_bitfn.h"
static inline uint32_t load32(const uint8_t *p)
{
return le32_to_cpu(*((uint32_t *) p));
}
/* XSalsa20 algorithm as described in https://cr.yp.to/snuffle/xsalsa-20081128.pdf */ /* XSalsa20 algorithm as described in https://cr.yp.to/snuffle/xsalsa-20081128.pdf */
void cryptonite_xsalsa_init(cryptonite_salsa_context *ctx, uint8_t nb_rounds, void cryptonite_xsalsa_init(cryptonite_salsa_context *ctx, uint8_t nb_rounds,
uint32_t keylen, const uint8_t *key, uint32_t keylen, const uint8_t *key,
@ -51,8 +47,8 @@ void cryptonite_xsalsa_init(cryptonite_salsa_context *ctx, uint8_t nb_rounds,
(x6, x7, x8, x9) is the first 128 bits of a 192-bit nonce (x6, x7, x8, x9) is the first 128 bits of a 192-bit nonce
*/ */
cryptonite_salsa_init_core(&ctx->st, keylen, key, 8, iv); cryptonite_salsa_init_core(&ctx->st, keylen, key, 8, iv);
ctx->st.d[ 8] = load32(iv + 8); ctx->st.d[ 8] = load_le32(iv + 8);
ctx->st.d[ 9] = load32(iv + 12); ctx->st.d[ 9] = load_le32(iv + 12);
/* Compute (z0, z1, . . . , z15) = doubleround ^(r/2) (x0, x1, . . . , x15) */ /* Compute (z0, z1, . . . , z15) = doubleround ^(r/2) (x0, x1, . . . , x15) */
block hSalsa; block hSalsa;
@ -73,8 +69,8 @@ void cryptonite_xsalsa_init(cryptonite_salsa_context *ctx, uint8_t nb_rounds,
ctx->st.d[12] = hSalsa.d[ 7] - ctx->st.d[ 7]; ctx->st.d[12] = hSalsa.d[ 7] - ctx->st.d[ 7];
ctx->st.d[13] = hSalsa.d[ 8] - ctx->st.d[ 8]; ctx->st.d[13] = hSalsa.d[ 8] - ctx->st.d[ 8];
ctx->st.d[14] = hSalsa.d[ 9] - ctx->st.d[ 9]; ctx->st.d[14] = hSalsa.d[ 9] - ctx->st.d[ 9];
ctx->st.d[ 6] = load32(iv + 16); ctx->st.d[ 6] = load_le32(iv + 16);
ctx->st.d[ 7] = load32(iv + 20); ctx->st.d[ 7] = load_le32(iv + 20);
ctx->st.d[ 8] = 0; ctx->st.d[ 8] = 0;
ctx->st.d[ 9] = 0; ctx->st.d[ 9] = 0;
} }