diff --git a/cbits/argon2/argon2.c b/cbits/argon2/argon2.c new file mode 100644 index 0000000..2bb3619 --- /dev/null +++ b/cbits/argon2/argon2.c @@ -0,0 +1,436 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include + +#include "argon2.h" +#include "encoding.c" +#include "core.c" + +const char *cryptonite_argon2_type2string(argon2_type type, int uppercase) { + switch (type) { + case Argon2_d: + return uppercase ? "Argon2d" : "argon2d"; + case Argon2_i: + return uppercase ? "Argon2i" : "argon2i"; + case Argon2_id: + return uppercase ? "Argon2id" : "argon2id"; + } + + return NULL; +} + +int cryptonite_argon2_ctx(argon2_context *context, argon2_type type) { + /* 1. Validate all inputs */ + int result = validate_inputs(context); + uint32_t memory_blocks, segment_length; + argon2_instance_t instance; + + if (ARGON2_OK != result) { + return result; + } + + if (Argon2_d != type && Argon2_i != type && Argon2_id != type) { + return ARGON2_INCORRECT_TYPE; + } + + /* 2. Align memory size */ + /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ + memory_blocks = context->m_cost; + + if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes) { + memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes; + } + + segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS); + /* Ensure that all segments have equal length */ + memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS); + + instance.version = context->version; + instance.memory = NULL; + instance.passes = context->t_cost; + instance.memory_blocks = memory_blocks; + instance.segment_length = segment_length; + instance.lane_length = segment_length * ARGON2_SYNC_POINTS; + instance.lanes = context->lanes; + instance.threads = context->threads; + instance.type = type; + + /* 3. Initialization: Hashing inputs, allocating memory, filling first + * blocks + */ + result = initialize(&instance, context); + + if (ARGON2_OK != result) { + return result; + } + + /* 4. Filling memory */ + result = fill_memory_blocks(&instance); + + if (ARGON2_OK != result) { + return result; + } + /* 5. Finalization */ + finalize(context, &instance); + + return ARGON2_OK; +} + +int cryptonite_argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, const size_t saltlen, + void *hash, const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type, + const uint32_t version){ + + argon2_context context; + int result; + uint8_t *out; + + if (hashlen > ARGON2_MAX_OUTLEN) { + return ARGON2_OUTPUT_TOO_LONG; + } + + if (hashlen < ARGON2_MIN_OUTLEN) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + out = malloc(hashlen); + if (!out) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + context.out = (uint8_t *)out; + context.outlen = (uint32_t)hashlen; + context.pwd = CONST_CAST(uint8_t *)pwd; + context.pwdlen = (uint32_t)pwdlen; + context.salt = CONST_CAST(uint8_t *)salt; + context.saltlen = (uint32_t)saltlen; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.t_cost = t_cost; + context.m_cost = m_cost; + context.lanes = parallelism; + context.threads = parallelism; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = ARGON2_DEFAULT_FLAGS; + context.version = version; + + result = cryptonite_argon2_ctx(&context, type); + + if (result != ARGON2_OK) { + clear_internal_memory(out, hashlen); + free(out); + return result; + } + + /* if raw hash requested, write it */ + if (hash) { + memcpy(hash, out, hashlen); + } + + /* if encoding requested, write it */ + if (encoded && encodedlen) { + if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) { + clear_internal_memory(out, hashlen); /* wipe buffers if error */ + clear_internal_memory(encoded, encodedlen); + free(out); + return ARGON2_ENCODING_FAIL; + } + } + clear_internal_memory(out, hashlen); + free(out); + + return ARGON2_OK; +} + +int cryptonite_argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return cryptonite_argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_i, + ARGON2_VERSION_NUMBER); +} + +int cryptonite_argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + + return cryptonite_argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER); +} + +int cryptonite_argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return cryptonite_argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_d, + ARGON2_VERSION_NUMBER); +} + +int cryptonite_argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + + return cryptonite_argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER); +} + +int cryptonite_argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, const size_t hashlen, + char *encoded, const size_t encodedlen) { + + return cryptonite_argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + NULL, hashlen, encoded, encodedlen, Argon2_id, + ARGON2_VERSION_NUMBER); +} + +int cryptonite_argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, const size_t hashlen) { + return cryptonite_argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, + hash, hashlen, NULL, 0, Argon2_id, + ARGON2_VERSION_NUMBER); +} + +static int cryptonite_argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) { + size_t i; + uint8_t d = 0U; + + for (i = 0U; i < len; i++) { + d |= b1[i] ^ b2[i]; + } + return (int)((1 & ((d - 1) >> 8)) - 1); +} + +int cryptonite_argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen, + argon2_type type) { + + argon2_context ctx; + uint8_t *desired_result = NULL; + + int ret = ARGON2_OK; + + size_t encoded_len; + uint32_t max_field_len; + + if (encoded == NULL) { + return ARGON2_DECODING_FAIL; + } + + encoded_len = strlen(encoded); + if (encoded_len > UINT32_MAX) { + return ARGON2_DECODING_FAIL; + } + + /* No field can be longer than the encoded length */ + max_field_len = (uint32_t)encoded_len; + + ctx.saltlen = max_field_len; + ctx.outlen = max_field_len; + + ctx.salt = malloc(ctx.saltlen); + ctx.out = malloc(ctx.outlen); + if (!ctx.salt || !ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + ctx.pwd = (uint8_t *)pwd; + ctx.pwdlen = pwdlen; + + ret = decode_string(&ctx, encoded, type); + if (ret != ARGON2_OK) { + goto fail; + } + + /* Set aside the desired result, and get a new buffer. */ + desired_result = ctx.out; + ctx.out = malloc(ctx.outlen); + if (!ctx.out) { + ret = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + ret = cryptonite_argon2_verify_ctx(&ctx, (char *)desired_result, type); + if (ret != ARGON2_OK) { + goto fail; + } + +fail: + free(ctx.salt); + free(ctx.out); + free(desired_result); + + return ret; +} + +int cryptonite_argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return cryptonite_argon2_verify(encoded, pwd, pwdlen, Argon2_i); +} + +int cryptonite_argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return cryptonite_argon2_verify(encoded, pwd, pwdlen, Argon2_d); +} + +int cryptonite_argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) { + + return cryptonite_argon2_verify(encoded, pwd, pwdlen, Argon2_id); +} + +int cryptonite_argon2d_ctx(argon2_context *context) { + return cryptonite_argon2_ctx(context, Argon2_d); +} + +int cryptonite_argon2i_ctx(argon2_context *context) { + return cryptonite_argon2_ctx(context, Argon2_i); +} + +int cryptonite_argon2id_ctx(argon2_context *context) { + return cryptonite_argon2_ctx(context, Argon2_id); +} + +int cryptonite_argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type) { + int ret = cryptonite_argon2_ctx(context, type); + if (ret != ARGON2_OK) { + return ret; + } + + if (cryptonite_argon2_compare((uint8_t *)hash, context->out, context->outlen)) { + return ARGON2_VERIFY_MISMATCH; + } + + return ARGON2_OK; +} + +int cryptonite_argon2d_verify_ctx(argon2_context *context, const char *hash) { + return cryptonite_argon2_verify_ctx(context, hash, Argon2_d); +} + +int cryptonite_argon2i_verify_ctx(argon2_context *context, const char *hash) { + return cryptonite_argon2_verify_ctx(context, hash, Argon2_i); +} + +int cryptonite_argon2id_verify_ctx(argon2_context *context, const char *hash) { + return cryptonite_argon2_verify_ctx(context, hash, Argon2_id); +} + +const char *cryptonite_argon2_error_message(int error_code) { + switch (error_code) { + case ARGON2_OK: + return "OK"; + case ARGON2_OUTPUT_PTR_NULL: + return "Output pointer is NULL"; + case ARGON2_OUTPUT_TOO_SHORT: + return "Output is too short"; + case ARGON2_OUTPUT_TOO_LONG: + return "Output is too long"; + case ARGON2_PWD_TOO_SHORT: + return "Password is too short"; + case ARGON2_PWD_TOO_LONG: + return "Password is too long"; + case ARGON2_SALT_TOO_SHORT: + return "Salt is too short"; + case ARGON2_SALT_TOO_LONG: + return "Salt is too long"; + case ARGON2_AD_TOO_SHORT: + return "Associated data is too short"; + case ARGON2_AD_TOO_LONG: + return "Associated data is too long"; + case ARGON2_SECRET_TOO_SHORT: + return "Secret is too short"; + case ARGON2_SECRET_TOO_LONG: + return "Secret is too long"; + case ARGON2_TIME_TOO_SMALL: + return "Time cost is too small"; + case ARGON2_TIME_TOO_LARGE: + return "Time cost is too large"; + case ARGON2_MEMORY_TOO_LITTLE: + return "Memory cost is too small"; + case ARGON2_MEMORY_TOO_MUCH: + return "Memory cost is too large"; + case ARGON2_LANES_TOO_FEW: + return "Too few lanes"; + case ARGON2_LANES_TOO_MANY: + return "Too many lanes"; + case ARGON2_PWD_PTR_MISMATCH: + return "Password pointer is NULL, but password length is not 0"; + case ARGON2_SALT_PTR_MISMATCH: + return "Salt pointer is NULL, but salt length is not 0"; + case ARGON2_SECRET_PTR_MISMATCH: + return "Secret pointer is NULL, but secret length is not 0"; + case ARGON2_AD_PTR_MISMATCH: + return "Associated data pointer is NULL, but ad length is not 0"; + case ARGON2_MEMORY_ALLOCATION_ERROR: + return "Memory allocation error"; + case ARGON2_FREE_MEMORY_CBK_NULL: + return "The free memory callback is NULL"; + case ARGON2_ALLOCATE_MEMORY_CBK_NULL: + return "The allocate memory callback is NULL"; + case ARGON2_INCORRECT_PARAMETER: + return "Argon2_Context context is NULL"; + case ARGON2_INCORRECT_TYPE: + return "There is no such version of Argon2"; + case ARGON2_OUT_PTR_MISMATCH: + return "Output pointer mismatch"; + case ARGON2_THREADS_TOO_FEW: + return "Not enough threads"; + case ARGON2_THREADS_TOO_MANY: + return "Too many threads"; + case ARGON2_MISSING_ARGS: + return "Missing arguments"; + case ARGON2_ENCODING_FAIL: + return "Encoding failed"; + case ARGON2_DECODING_FAIL: + return "Decoding failed"; + case ARGON2_THREAD_FAIL: + return "Threading failure"; + case ARGON2_DECODING_LENGTH_FAIL: + return "Some of encoded parameters are too long or too short"; + case ARGON2_VERIFY_MISMATCH: + return "The password does not match the supplied hash"; + default: + return "Unknown error code"; + } +} + +size_t cryptonite_argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism, + uint32_t saltlen, uint32_t hashlen, argon2_type type) { + return strlen("$$v=$m=,t=,p=$$") + strlen(cryptonite_argon2_type2string(type, 0)) + + numlen(t_cost) + numlen(m_cost) + numlen(parallelism) + + b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) + 1; +} diff --git a/cbits/argon2/argon2.h b/cbits/argon2/argon2.h new file mode 100644 index 0000000..595bfde --- /dev/null +++ b/cbits/argon2/argon2.h @@ -0,0 +1,435 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ARGON2_H +#define ARGON2_H + +#include +#include +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Symbols visibility control */ +#ifdef A2_VISCTL +#define ARGON2_PUBLIC __attribute__((visibility("default"))) +#elif _MSC_VER +#define ARGON2_PUBLIC __declspec(dllexport) +#else +#define ARGON2_PUBLIC +#endif + +/* + * Argon2 input parameter restrictions + */ + +/* Minimum and maximum number of lanes (degree of parallelism) */ +#define ARGON2_MIN_LANES UINT32_C(1) +#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) + +/* Minimum and maximum number of threads */ +#define ARGON2_MIN_THREADS UINT32_C(1) +#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) + +/* Number of synchronization points between lanes per pass */ +#define ARGON2_SYNC_POINTS UINT32_C(4) + +/* Minimum and maximum digest size in bytes */ +#define ARGON2_MIN_OUTLEN UINT32_C(4) +#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ +#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ + +#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) +/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ +#define ARGON2_MAX_MEMORY_BITS \ + ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) +#define ARGON2_MAX_MEMORY \ + ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) + +/* Minimum and maximum number of passes */ +#define ARGON2_MIN_TIME UINT32_C(1) +#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum password length in bytes */ +#define ARGON2_MIN_PWD_LENGTH UINT32_C(0) +#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum associated data length in bytes */ +#define ARGON2_MIN_AD_LENGTH UINT32_C(0) +#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum salt length in bytes */ +#define ARGON2_MIN_SALT_LENGTH UINT32_C(8) +#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) + +/* Minimum and maximum key length in bytes */ +#define ARGON2_MIN_SECRET UINT32_C(0) +#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) + +/* Flags to determine which fields are securely wiped (default = no wipe). */ +#define ARGON2_DEFAULT_FLAGS UINT32_C(0) +#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) +#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) + +/* Global flag to determine if we are wiping internal memory buffers. This flag + * is defined in core.c and deafults to 1 (wipe internal memory). */ +//extern int FLAG_clear_internal_memory; + +/* Error codes */ +typedef enum Argon2_ErrorCodes { + ARGON2_OK = 0, + + ARGON2_OUTPUT_PTR_NULL = -1, + + ARGON2_OUTPUT_TOO_SHORT = -2, + ARGON2_OUTPUT_TOO_LONG = -3, + + ARGON2_PWD_TOO_SHORT = -4, + ARGON2_PWD_TOO_LONG = -5, + + ARGON2_SALT_TOO_SHORT = -6, + ARGON2_SALT_TOO_LONG = -7, + + ARGON2_AD_TOO_SHORT = -8, + ARGON2_AD_TOO_LONG = -9, + + ARGON2_SECRET_TOO_SHORT = -10, + ARGON2_SECRET_TOO_LONG = -11, + + ARGON2_TIME_TOO_SMALL = -12, + ARGON2_TIME_TOO_LARGE = -13, + + ARGON2_MEMORY_TOO_LITTLE = -14, + ARGON2_MEMORY_TOO_MUCH = -15, + + ARGON2_LANES_TOO_FEW = -16, + ARGON2_LANES_TOO_MANY = -17, + + ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */ + ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */ + ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ + ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */ + + ARGON2_MEMORY_ALLOCATION_ERROR = -22, + + ARGON2_FREE_MEMORY_CBK_NULL = -23, + ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, + + ARGON2_INCORRECT_PARAMETER = -25, + ARGON2_INCORRECT_TYPE = -26, + + ARGON2_OUT_PTR_MISMATCH = -27, + + ARGON2_THREADS_TOO_FEW = -28, + ARGON2_THREADS_TOO_MANY = -29, + + ARGON2_MISSING_ARGS = -30, + + ARGON2_ENCODING_FAIL = -31, + + ARGON2_DECODING_FAIL = -32, + + ARGON2_THREAD_FAIL = -33, + + ARGON2_DECODING_LENGTH_FAIL = -34, + + ARGON2_VERIFY_MISMATCH = -35 +} argon2_error_codes; + +/* Memory allocator types --- for external allocation */ +typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); +typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); + +/* Argon2 external data structures */ + +/* + ***** + * Context: structure to hold Argon2 inputs: + * output array and its length, + * password and its length, + * salt and its length, + * secret and its length, + * associated data and its length, + * number of passes, amount of used memory (in KBytes, can be rounded up a bit) + * number of parallel threads that will be run. + * All the parameters above affect the output hash value. + * Additionally, two function pointers can be provided to allocate and + * deallocate the memory (if NULL, memory will be allocated internally). + * Also, three flags indicate whether to erase password, secret as soon as they + * are pre-hashed (and thus not needed anymore), and the entire memory + ***** + * Simplest situation: you have output array out[8], password is stored in + * pwd[32], salt is stored in salt[16], you do not have keys nor associated + * data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with + * 4 parallel lanes. + * You want to erase the password, but you're OK with last pass not being + * erased. You want to use the default memory allocator. + * Then you initialize: + Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) + */ +typedef struct Argon2_Context { + uint8_t *out; /* output array */ + uint32_t outlen; /* digest length */ + + uint8_t *pwd; /* password array */ + uint32_t pwdlen; /* password length */ + + uint8_t *salt; /* salt array */ + uint32_t saltlen; /* salt length */ + + uint8_t *secret; /* key array */ + uint32_t secretlen; /* key length */ + + uint8_t *ad; /* associated data array */ + uint32_t adlen; /* associated data length */ + + uint32_t t_cost; /* number of passes */ + uint32_t m_cost; /* amount of memory requested (KB) */ + uint32_t lanes; /* number of lanes */ + uint32_t threads; /* maximum number of threads */ + + uint32_t version; /* version number */ + + allocate_fptr allocate_cbk; /* pointer to memory allocator */ + deallocate_fptr free_cbk; /* pointer to memory deallocator */ + + uint32_t flags; /* array of bool options */ +} argon2_context; + +/* Argon2 primitive type */ +typedef enum Argon2_type { + Argon2_d = 0, + Argon2_i = 1, + Argon2_id = 2 +} argon2_type; + +/* Version of the algorithm */ +typedef enum Argon2_version { + ARGON2_VERSION_10 = 0x10, + ARGON2_VERSION_13 = 0x13, + ARGON2_VERSION_NUMBER = ARGON2_VERSION_13 +} argon2_version; + +/* + * Function that gives the string representation of an argon2_type. + * @param type The argon2_type that we want the string for + * @param uppercase Whether the string should have the first letter uppercase + * @return NULL if invalid type, otherwise the string representation. + */ +ARGON2_PUBLIC const char *cryptonite_argon2_type2string(argon2_type type, int uppercase); + +/* + * Function that performs memory-hard hashing with certain degree of parallelism + * @param context Pointer to the Argon2 internal structure + * @return Error code if smth is wrong, ARGON2_OK otherwise + */ +ARGON2_PUBLIC int cryptonite_argon2_ctx(argon2_context *context, argon2_type type); + +/** + * Hashes a password with Argon2i, producing an encoded hash + * @param t_cost Number of iterations + * @param m_cost Sets memory usage to m_cost kibibytes + * @param parallelism Number of threads and compute lanes + * @param pwd Pointer to password + * @param pwdlen Password size in bytes + * @param salt Pointer to salt + * @param saltlen Salt size in bytes + * @param hashlen Desired length of the hash in bytes + * @param encoded Buffer where to write the encoded hash + * @param encodedlen Size of the buffer (thus max size of the encoded hash) + * @pre Different parallelism levels will give different results + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int cryptonite_argon2i_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +/** + * Hashes a password with Argon2i, producing a raw hash by allocating memory at + * @hash + * @param t_cost Number of iterations + * @param m_cost Sets memory usage to m_cost kibibytes + * @param parallelism Number of threads and compute lanes + * @param pwd Pointer to password + * @param pwdlen Password size in bytes + * @param salt Pointer to salt + * @param saltlen Salt size in bytes + * @param hash Buffer where to write the raw hash - updated by the function + * @param hashlen Desired length of the hash in bytes + * @pre Different parallelism levels will give different results + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int cryptonite_argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +ARGON2_PUBLIC int cryptonite_argon2d_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +ARGON2_PUBLIC int cryptonite_argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +ARGON2_PUBLIC int cryptonite_argon2id_hash_encoded(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, + const void *pwd, const size_t pwdlen, + const void *salt, const size_t saltlen, + const size_t hashlen, char *encoded, + const size_t encodedlen); + +ARGON2_PUBLIC int cryptonite_argon2id_hash_raw(const uint32_t t_cost, + const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int cryptonite_argon2_hash(const uint32_t t_cost, const uint32_t m_cost, + const uint32_t parallelism, const void *pwd, + const size_t pwdlen, const void *salt, + const size_t saltlen, void *hash, + const size_t hashlen, char *encoded, + const size_t encodedlen, argon2_type type, + const uint32_t version); + +/** + * Verifies a password against an encoded string + * Encoded string is restricted as in validate_inputs() + * @param encoded String encoding parameters, salt, hash + * @param pwd Pointer to password + * @pre Returns ARGON2_OK if successful + */ +ARGON2_PUBLIC int cryptonite_argon2i_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +ARGON2_PUBLIC int cryptonite_argon2d_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +ARGON2_PUBLIC int cryptonite_argon2id_verify(const char *encoded, const void *pwd, + const size_t pwdlen); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int cryptonite_argon2_verify(const char *encoded, const void *pwd, + const size_t pwdlen, argon2_type type); + +/** + * Argon2d: Version of Argon2 that picks memory blocks depending + * on the password and salt. Only for side-channel-free + * environment!! + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int cryptonite_argon2d_ctx(argon2_context *context); + +/** + * Argon2i: Version of Argon2 that picks memory blocks + * independent on the password and salt. Good for side-channels, + * but worse w.r.t. tradeoff attacks if only one pass is used. + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int cryptonite_argon2i_ctx(argon2_context *context); + +/** + * Argon2id: Version of Argon2 where the first half-pass over memory is + * password-independent, the rest are password-dependent (on the password and + * salt). OK against side channels (they reduce to 1/2-pass Argon2i), and + * better with w.r.t. tradeoff attacks (similar to Argon2d). + ***** + * @param context Pointer to current Argon2 context + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int cryptonite_argon2id_ctx(argon2_context *context); + +/** + * Verify if a given password is correct for Argon2d hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int cryptonite_argon2d_verify_ctx(argon2_context *context, const char *hash); + +/** + * Verify if a given password is correct for Argon2i hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int cryptonite_argon2i_verify_ctx(argon2_context *context, const char *hash); + +/** + * Verify if a given password is correct for Argon2id hashing + * @param context Pointer to current Argon2 context + * @param hash The password hash to verify. The length of the hash is + * specified by the context outlen member + * @return Zero if successful, a non zero error code otherwise + */ +ARGON2_PUBLIC int cryptonite_argon2id_verify_ctx(argon2_context *context, + const char *hash); + +/* generic function underlying the above ones */ +ARGON2_PUBLIC int cryptonite_argon2_verify_ctx(argon2_context *context, const char *hash, + argon2_type type); + +/** + * Get the associated error message for given error code + * @return The error message associated with the given error code + */ +ARGON2_PUBLIC const char *cryptonite_argon2_error_message(int error_code); + +/** + * Returns the encoded hash length for the given input parameters + * @param t_cost Number of iterations + * @param m_cost Memory usage in kibibytes + * @param parallelism Number of threads; used to compute lanes + * @param saltlen Salt size in bytes + * @param hashlen Hash size in bytes + * @param type The argon2_type that we want the encoded length for + * @return The encoded hash length in bytes + */ +ARGON2_PUBLIC size_t cryptonite_argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, + uint32_t parallelism, uint32_t saltlen, + uint32_t hashlen, argon2_type type); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/cbits/argon2/bench.c b/cbits/argon2/bench.c new file mode 100644 index 0000000..9ec6a4a --- /dev/null +++ b/cbits/argon2/bench.c @@ -0,0 +1,111 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include +#include +#include +#ifdef _MSC_VER +#include +#endif + +#include "argon2.h" + +static uint64_t rdtsc(void) { +#ifdef _MSC_VER + return __rdtsc(); +#else +#if defined(__amd64__) || defined(__x86_64__) + uint64_t rax, rdx; + __asm__ __volatile__("rdtsc" : "=a"(rax), "=d"(rdx) : :); + return (rdx << 32) | rax; +#elif defined(__i386__) || defined(__i386) || defined(__X86__) + uint64_t rax; + __asm__ __volatile__("rdtsc" : "=A"(rax) : :); + return rax; +#else +#error "Not implemented!" +#endif +#endif +} + +/* + * Benchmarks Argon2 with salt length 16, password length 16, t_cost 1, + and different m_cost and threads + */ +static void benchmark() { +#define BENCH_OUTLEN 16 +#define BENCH_INLEN 16 + const uint32_t inlen = BENCH_INLEN; + const unsigned outlen = BENCH_OUTLEN; + unsigned char out[BENCH_OUTLEN]; + unsigned char pwd_array[BENCH_INLEN]; + unsigned char salt_array[BENCH_INLEN]; +#undef BENCH_INLEN +#undef BENCH_OUTLEN + + uint32_t t_cost = 3; + uint32_t m_cost; + uint32_t thread_test[4] = {1, 2, 4, 8}; + argon2_type types[3] = {Argon2_i, Argon2_d, Argon2_id}; + + memset(pwd_array, 0, inlen); + memset(salt_array, 1, inlen); + + for (m_cost = (uint32_t)1 << 10; m_cost <= (uint32_t)1 << 22; m_cost *= 2) { + unsigned i; + for (i = 0; i < 4; ++i) { + double run_time = 0; + uint32_t thread_n = thread_test[i]; + + unsigned j; + for (j = 0; j < 3; ++j) { + clock_t start_time, stop_time; + uint64_t start_cycles, stop_cycles; + uint64_t delta; + double mcycles; + + argon2_type type = types[j]; + start_time = clock(); + start_cycles = rdtsc(); + + argon2_hash(t_cost, m_cost, thread_n, pwd_array, inlen, + salt_array, inlen, out, outlen, NULL, 0, type, + ARGON2_VERSION_NUMBER); + + stop_cycles = rdtsc(); + stop_time = clock(); + + delta = (stop_cycles - start_cycles) / (m_cost); + mcycles = (double)(stop_cycles - start_cycles) / (1UL << 20); + run_time += ((double)stop_time - start_time) / (CLOCKS_PER_SEC); + + printf("%s %d iterations %d MiB %d threads: %2.2f cpb %2.2f " + "Mcycles \n", argon2_type2string(type, 1), t_cost, + m_cost >> 10, thread_n, (float)delta / 1024, mcycles); + } + + printf("%2.4f seconds\n\n", run_time); + } + } +} + +int main() { + benchmark(); + return ARGON2_OK; +} diff --git a/cbits/argon2/blamka-round-opt.h b/cbits/argon2/blamka-round-opt.h new file mode 100644 index 0000000..577e1d0 --- /dev/null +++ b/cbits/argon2/blamka-round-opt.h @@ -0,0 +1,180 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef BLAKE_ROUND_MKA_OPT_H +#define BLAKE_ROUND_MKA_OPT_H + +#include "blake2-impl.h" + +#include +#if defined(__SSSE3__) +#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ +#endif + +#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__)) +#include +#endif + +#if !defined(__XOP__) +#if defined(__SSSE3__) +#define r16 \ + (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)) +#define r24 \ + (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) \ + ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \ + : (-(c) == 24) \ + ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) \ + ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) \ + ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \ + _mm_slli_epi64((x), 64 - (-(c)))) +#else /* defined(__SSE2__) */ +#define _mm_roti_epi64(r, c) \ + _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c)))) +#endif +#else +#endif + +static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { + const __m128i z = _mm_mul_epu32(x, y); + return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z)); +} + +#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -32); \ + D1 = _mm_roti_epi64(D1, -32); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -24); \ + B1 = _mm_roti_epi64(B1, -24); \ + } while ((void)0, 0) + +#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + A0 = fBlaMka(A0, B0); \ + A1 = fBlaMka(A1, B1); \ + \ + D0 = _mm_xor_si128(D0, A0); \ + D1 = _mm_xor_si128(D1, A1); \ + \ + D0 = _mm_roti_epi64(D0, -16); \ + D1 = _mm_roti_epi64(D1, -16); \ + \ + C0 = fBlaMka(C0, D0); \ + C1 = fBlaMka(C1, D1); \ + \ + B0 = _mm_xor_si128(B0, C0); \ + B1 = _mm_xor_si128(B1, C1); \ + \ + B0 = _mm_roti_epi64(B0, -63); \ + B1 = _mm_roti_epi64(B1, -63); \ + } while ((void)0, 0) + +#if defined(__SSSE3__) +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \ + __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D1, D0, 8); \ + t1 = _mm_alignr_epi8(D0, D1, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \ + __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \ + B0 = t0; \ + B1 = t1; \ + \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + \ + t0 = _mm_alignr_epi8(D0, D1, 8); \ + t1 = _mm_alignr_epi8(D1, D0, 8); \ + D0 = t1; \ + D1 = t0; \ + } while ((void)0, 0) +#else /* SSE2 */ +#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0 = D0; \ + __m128i t1 = B0; \ + D0 = C0; \ + C0 = C1; \ + C1 = D0; \ + D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \ + D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \ + B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \ + B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) + +#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \ + do { \ + __m128i t0, t1; \ + t0 = C0; \ + C0 = C1; \ + C1 = t0; \ + t0 = B0; \ + t1 = D0; \ + B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \ + B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \ + D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \ + D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \ + } while ((void)0, 0) +#endif + +#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \ + do { \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + G1(A0, B0, C0, D0, A1, B1, C1, D1); \ + G2(A0, B0, C0, D0, A1, B1, C1, D1); \ + \ + UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \ + } while ((void)0, 0) + +#endif diff --git a/cbits/argon2/blamka-round-ref.h b/cbits/argon2/blamka-round-ref.h new file mode 100644 index 0000000..db9864d --- /dev/null +++ b/cbits/argon2/blamka-round-ref.h @@ -0,0 +1,56 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef BLAKE_ROUND_MKA_H +#define BLAKE_ROUND_MKA_H + +#include "blake2.h" +#include "blake2-impl.h" + +/*designed by the Lyra PHC team */ +static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { + const uint64_t m = UINT64_C(0xFFFFFFFF); + const uint64_t xy = (x & m) * (y & m); + return x + y + 2 * xy; +} + +#define G(a, b, c, d) \ + do { \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 32); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 24); \ + a = fBlaMka(a, b); \ + d = rotr64(d ^ a, 16); \ + c = fBlaMka(c, d); \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \ + v12, v13, v14, v15) \ + do { \ + G(v0, v4, v8, v12); \ + G(v1, v5, v9, v13); \ + G(v2, v6, v10, v14); \ + G(v3, v7, v11, v15); \ + G(v0, v5, v10, v15); \ + G(v1, v6, v11, v12); \ + G(v2, v7, v8, v13); \ + G(v3, v4, v9, v14); \ + } while ((void)0, 0) + +#endif diff --git a/cbits/argon2/core.c b/cbits/argon2/core.c new file mode 100644 index 0000000..1c6cad1 --- /dev/null +++ b/cbits/argon2/core.c @@ -0,0 +1,641 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +/*For memory wiping*/ +#ifdef _MSC_VER +#include +#include /* For SecureZeroMemory */ +#endif +#if defined __STDC_LIB_EXT1__ +#define __STDC_WANT_LIB_EXT1__ 1 +#endif +#define VC_GE_2005(version) (version >= 1400) + +#include +#include +#include +#include + +#include "core.h" +#include "thread.c" +#include "blake2.h" +#include "blake2-impl.h" + +#ifdef GENKAT +#include "genkat.h" +#endif + +#ifdef SUPPORT_SSE +#include "opt.c" +#else +#include "ref.c" +#endif + +#if defined(__clang__) +#if __has_attribute(optnone) +#define NOT_OPTIMIZED __attribute__((optnone)) +#endif +#elif defined(__GNUC__) +#define GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#if GCC_VERSION >= 40400 +#define NOT_OPTIMIZED __attribute__((optimize("O0"))) +#endif +#endif +#ifndef NOT_OPTIMIZED +#define NOT_OPTIMIZED +#endif + +/***************Instance and Position constructors**********/ +static void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } + +static void copy_block(block *dst, const block *src) { + memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); +} + +static void xor_block(block *dst, const block *src) { + int i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] ^= src->v[i]; + } +} + +static void load_block(block *dst, const void *input) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); + } +} + +static void store_block(void *output, const block *src) { + unsigned i; + for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); + } +} + +/***************Memory functions*****************/ +static +int allocate_memory(const argon2_context *context, uint8_t **memory, + size_t num, size_t size) { + size_t memory_size = num*size; + if (memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 1. Check for multiplication overflow */ + if (size != 0 && memory_size / size != num) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + /* 2. Try to allocate with appropriate allocator */ + if (context->allocate_cbk) { + (context->allocate_cbk)(memory, memory_size); + } else { + *memory = malloc(memory_size); + } + + if (*memory == NULL) { + return ARGON2_MEMORY_ALLOCATION_ERROR; + } + + return ARGON2_OK; +} +static +void free_memory(const argon2_context *context, uint8_t *memory, + size_t num, size_t size) { + size_t memory_size = num*size; + clear_internal_memory(memory, memory_size); + if (context->free_cbk) { + (context->free_cbk)(memory, memory_size); + } else { + free(memory); + } +} + +void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) { +#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) + SecureZeroMemory(v, n); +#elif defined memset_s + memset_s(v, n, 0, n); +#elif defined(__OpenBSD__) + explicit_bzero(v, n); +#else + static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; + memset_sec(v, 0, n); +#endif +} + +/* Memory clear flag defaults to true. */ +static int FLAG_clear_internal_memory = 1; +static void clear_internal_memory(void *v, size_t n) { + if (FLAG_clear_internal_memory && v) { + secure_wipe_memory(v, n); + } +} + +static void finalize(const argon2_context *context, argon2_instance_t *instance) { + if (context != NULL && instance != NULL) { + block blockhash; + uint32_t l; + + copy_block(&blockhash, instance->memory + instance->lane_length - 1); + + /* XOR the last blocks */ + for (l = 1; l < instance->lanes; ++l) { + uint32_t last_block_in_lane = + l * instance->lane_length + (instance->lane_length - 1); + xor_block(&blockhash, instance->memory + last_block_in_lane); + } + + /* Hash the result */ + { + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + store_block(blockhash_bytes, &blockhash); + blake2b_long(context->out, context->outlen, blockhash_bytes, + ARGON2_BLOCK_SIZE); + /* clear blockhash and blockhash_bytes */ + clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); + } + +#ifdef GENKAT + print_tag(context->out, context->outlen); +#endif + + free_memory(context, (uint8_t *)instance->memory, + instance->memory_blocks, sizeof(block)); + } +} + +static uint32_t index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane) { + /* + * Pass 0: + * This lane : all already finished segments plus already constructed + * blocks in this segment + * Other lanes : all already finished segments + * Pass 1+: + * This lane : (SYNC_POINTS - 1) last segments plus already constructed + * blocks in this segment + * Other lanes : (SYNC_POINTS - 1) last segments + */ + uint32_t reference_area_size; + uint64_t relative_position; + uint32_t start_position, absolute_position; + + if (0 == position->pass) { + /* First pass */ + if (0 == position->slice) { + /* First slice */ + reference_area_size = + position->index - 1; /* all but the previous */ + } else { + if (same_lane) { + /* The same lane => add current segment */ + reference_area_size = + position->slice * instance->segment_length + + position->index - 1; + } else { + reference_area_size = + position->slice * instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + } else { + /* Second pass */ + if (same_lane) { + reference_area_size = instance->lane_length - + instance->segment_length + position->index - + 1; + } else { + reference_area_size = instance->lane_length - + instance->segment_length + + ((position->index == 0) ? (-1) : 0); + } + } + + /* 1.2.4. Mapping pseudo_rand to 0.. and produce + * relative position */ + relative_position = pseudo_rand; + relative_position = relative_position * relative_position >> 32; + relative_position = reference_area_size - 1 - + (reference_area_size * relative_position >> 32); + + /* 1.2.5 Computing starting position */ + start_position = 0; + + if (0 != position->pass) { + start_position = (position->slice == ARGON2_SYNC_POINTS - 1) + ? 0 + : (position->slice + 1) * instance->segment_length; + } + + /* 1.2.6. Computing absolute position */ + absolute_position = (start_position + relative_position) % + instance->lane_length; /* absolute position */ + return absolute_position; +} + +/* Single-threaded version for p=1 case */ +static int fill_memory_blocks_st(argon2_instance_t *instance) { + uint32_t r, s, l; + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position = {r, l, (uint8_t)s, 0}; + fill_segment(instance, position); + } + } +#ifdef GENKAT + internal_kat(instance, r); /* Print all memory blocks */ +#endif + } + return ARGON2_OK; +} + +#if !defined(ARGON2_NO_THREADS) + +#ifdef _WIN32 +static unsigned __stdcall fill_segment_thr(void *thread_data) +#else +static void *fill_segment_thr(void *thread_data) +#endif +{ + argon2_thread_data *my_data = thread_data; + fill_segment(my_data->instance_ptr, my_data->pos); + argon2_thread_exit(); + return 0; +} + +/* Multi-threaded version for p > 1 case */ +static int fill_memory_blocks_mt(argon2_instance_t *instance) { + uint32_t r, s; + argon2_thread_handle_t *thread = NULL; + argon2_thread_data *thr_data = NULL; + int rc = ARGON2_OK; + + /* 1. Allocating space for threads */ + thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t)); + if (thread == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + thr_data = calloc(instance->lanes, sizeof(argon2_thread_data)); + if (thr_data == NULL) { + rc = ARGON2_MEMORY_ALLOCATION_ERROR; + goto fail; + } + + for (r = 0; r < instance->passes; ++r) { + for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { + uint32_t l; + + /* 2. Calling threads */ + for (l = 0; l < instance->lanes; ++l) { + argon2_position_t position; + + /* 2.1 Join a thread if limit is exceeded */ + if (l >= instance->threads) { + if (argon2_thread_join(thread[l - instance->threads])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + + /* 2.2 Create thread */ + position.pass = r; + position.lane = l; + position.slice = (uint8_t)s; + position.index = 0; + thr_data[l].instance_ptr = + instance; /* preparing the thread input */ + memcpy(&(thr_data[l].pos), &position, + sizeof(argon2_position_t)); + if (argon2_thread_create(&thread[l], &fill_segment_thr, + (void *)&thr_data[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + + /* fill_segment(instance, position); */ + /*Non-thread equivalent of the lines above */ + } + + /* 3. Joining remaining threads */ + for (l = instance->lanes - instance->threads; l < instance->lanes; + ++l) { + if (argon2_thread_join(thread[l])) { + rc = ARGON2_THREAD_FAIL; + goto fail; + } + } + } + +#ifdef GENKAT + internal_kat(instance, r); /* Print all memory blocks */ +#endif + } + +fail: + if (thread != NULL) { + free(thread); + } + if (thr_data != NULL) { + free(thr_data); + } + return rc; +} + +#endif /* ARGON2_NO_THREADS */ +static +int fill_memory_blocks(argon2_instance_t *instance) { + if (instance == NULL || instance->lanes == 0) { + return ARGON2_INCORRECT_PARAMETER; + } +#if defined(ARGON2_NO_THREADS) + return fill_memory_blocks_st(instance); +#else + return instance->threads == 1 ? + fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance); +#endif +} +static +int validate_inputs(const argon2_context *context) { + if (NULL == context) { + return ARGON2_INCORRECT_PARAMETER; + } + + if (NULL == context->out) { + return ARGON2_OUTPUT_PTR_NULL; + } + + /* Validate output length */ + if (ARGON2_MIN_OUTLEN > context->outlen) { + return ARGON2_OUTPUT_TOO_SHORT; + } + + if (ARGON2_MAX_OUTLEN < context->outlen) { + return ARGON2_OUTPUT_TOO_LONG; + } + + /* Validate password (required param) */ + if (NULL == context->pwd) { + if (0 != context->pwdlen) { + return ARGON2_PWD_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { + return ARGON2_PWD_TOO_SHORT; + } + + if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { + return ARGON2_PWD_TOO_LONG; + } + + /* Validate salt (required param) */ + if (NULL == context->salt) { + if (0 != context->saltlen) { + return ARGON2_SALT_PTR_MISMATCH; + } + } + + if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { + return ARGON2_SALT_TOO_SHORT; + } + + if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { + return ARGON2_SALT_TOO_LONG; + } + + /* Validate secret (optional param) */ + if (NULL == context->secret) { + if (0 != context->secretlen) { + return ARGON2_SECRET_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_SECRET > context->secretlen) { + return ARGON2_SECRET_TOO_SHORT; + } + if (ARGON2_MAX_SECRET < context->secretlen) { + return ARGON2_SECRET_TOO_LONG; + } + } + + /* Validate associated data (optional param) */ + if (NULL == context->ad) { + if (0 != context->adlen) { + return ARGON2_AD_PTR_MISMATCH; + } + } else { + if (ARGON2_MIN_AD_LENGTH > context->adlen) { + return ARGON2_AD_TOO_SHORT; + } + if (ARGON2_MAX_AD_LENGTH < context->adlen) { + return ARGON2_AD_TOO_LONG; + } + } + + /* Validate memory cost */ + if (ARGON2_MIN_MEMORY > context->m_cost) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + if (ARGON2_MAX_MEMORY < context->m_cost) { + return ARGON2_MEMORY_TOO_MUCH; + } + + if (context->m_cost < 8 * context->lanes) { + return ARGON2_MEMORY_TOO_LITTLE; + } + + /* Validate time cost */ + if (ARGON2_MIN_TIME > context->t_cost) { + return ARGON2_TIME_TOO_SMALL; + } + + if (ARGON2_MAX_TIME < context->t_cost) { + return ARGON2_TIME_TOO_LARGE; + } + + /* Validate lanes */ + if (ARGON2_MIN_LANES > context->lanes) { + return ARGON2_LANES_TOO_FEW; + } + + if (ARGON2_MAX_LANES < context->lanes) { + return ARGON2_LANES_TOO_MANY; + } + + /* Validate threads */ + if (ARGON2_MIN_THREADS > context->threads) { + return ARGON2_THREADS_TOO_FEW; + } + + if (ARGON2_MAX_THREADS < context->threads) { + return ARGON2_THREADS_TOO_MANY; + } + + if (NULL != context->allocate_cbk && NULL == context->free_cbk) { + return ARGON2_FREE_MEMORY_CBK_NULL; + } + + if (NULL == context->allocate_cbk && NULL != context->free_cbk) { + return ARGON2_ALLOCATE_MEMORY_CBK_NULL; + } + + return ARGON2_OK; +} +static +void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { + uint32_t l; + /* Make the first and second block in each lane as G(H0||i||0) or + G(H0||i||1) */ + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + for (l = 0; l < instance->lanes; ++l) { + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 0], + blockhash_bytes); + + store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); + blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, + ARGON2_PREHASH_SEED_LENGTH); + load_block(&instance->memory[l * instance->lane_length + 1], + blockhash_bytes); + } + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); +} +static +void initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type) { + blake2b_state BlakeHash; + uint8_t value[sizeof(uint32_t)]; + + if (NULL == context || NULL == blockhash) { + return; + } + + blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); + + store32(&value, context->lanes); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->outlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->m_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->t_cost); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->version); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, (uint32_t)type); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + store32(&value, context->pwdlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->pwd != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, + context->pwdlen); + + if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { + secure_wipe_memory(context->pwd, context->pwdlen); + context->pwdlen = 0; + } + } + + store32(&value, context->saltlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->salt != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->salt, + context->saltlen); + } + + store32(&value, context->secretlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->secret != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->secret, + context->secretlen); + + if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { + secure_wipe_memory(context->secret, context->secretlen); + context->secretlen = 0; + } + } + + store32(&value, context->adlen); + blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); + + if (context->ad != NULL) { + blake2b_update(&BlakeHash, (const uint8_t *)context->ad, + context->adlen); + } + + blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); +} +static +int initialize(argon2_instance_t *instance, argon2_context *context) { + uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; + int result = ARGON2_OK; + + if (instance == NULL || context == NULL) + return ARGON2_INCORRECT_PARAMETER; + instance->context_ptr = context; + + /* 1. Memory allocation */ + result = allocate_memory(context, (uint8_t **)&(instance->memory), + instance->memory_blocks, sizeof(block)); + if (result != ARGON2_OK) { + return result; + } + + /* 2. Initial hashing */ + /* H_0 + 8 extra bytes to produce the first blocks */ + /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ + /* Hashing all inputs */ + initial_hash(blockhash, context, instance->type); + /* Zeroing 8 extra bytes */ + clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, + ARGON2_PREHASH_SEED_LENGTH - + ARGON2_PREHASH_DIGEST_LENGTH); + +#ifdef GENKAT + initial_kat(blockhash, context, instance->type); +#endif + + /* 3. Creating first blocks, we always have at least two blocks in a slice + */ + fill_first_blocks(blockhash, instance); + /* Clearing the hash */ + clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); + + return ARGON2_OK; +} diff --git a/cbits/argon2/core.h b/cbits/argon2/core.h new file mode 100644 index 0000000..888cc3c --- /dev/null +++ b/cbits/argon2/core.h @@ -0,0 +1,234 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ARGON2_CORE_H +#define ARGON2_CORE_H + +#include "argon2.h" + +#if defined(_MSC_VER) +#define ALIGN(n) __declspec(align(16)) +#elif defined(__GNUC__) || defined(__clang) +#define ALIGN(x) __attribute__((__aligned__(x))) +#else +#define ALIGN(x) +#endif + +#define CONST_CAST(x) (x)(uintptr_t) + +/**********************Argon2 internal constants*******************************/ + +enum argon2_core_constants { + /* Memory block size in bytes */ + ARGON2_BLOCK_SIZE = 1024, + ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, + ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, + + /* Number of pseudo-random values generated by one call to Blake in Argon2i + to + generate reference block positions */ + ARGON2_ADDRESSES_IN_BLOCK = 128, + + /* Pre-hashing digest length and its extension*/ + ARGON2_PREHASH_DIGEST_LENGTH = 64, + ARGON2_PREHASH_SEED_LENGTH = 72 +}; + +/*************************Argon2 internal data types***********************/ + +/* + * Structure for the (1KB) memory block implemented as 128 64-bit words. + * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no + * bounds checking). + */ +typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; + +/*****************Functions that work with the block******************/ + +/* Initialize each byte of the block with @in */ +static void init_block_value(block *b, uint8_t in); + +/* Copy block @src to block @dst */ +static void copy_block(block *dst, const block *src); + +/* XOR @src onto @dst bytewise */ +static void xor_block(block *dst, const block *src); + +/* + * Argon2 instance: memory pointer, number of passes, amount of memory, type, + * and derived values. + * Used to evaluate the number and location of blocks to construct in each + * thread + */ +typedef struct Argon2_instance_t { + block *memory; /* Memory pointer */ + uint32_t version; + uint32_t passes; /* Number of passes */ + uint32_t memory_blocks; /* Number of blocks in memory */ + uint32_t segment_length; + uint32_t lane_length; + uint32_t lanes; + uint32_t threads; + argon2_type type; + int print_internals; /* whether to print the memory blocks */ + argon2_context *context_ptr; /* points back to original context */ +} argon2_instance_t; + +/* + * Argon2 position: where we construct the block right now. Used to distribute + * work between threads. + */ +typedef struct Argon2_position_t { + uint32_t pass; + uint32_t lane; + uint8_t slice; + uint32_t index; +} argon2_position_t; + +/*Struct that holds the inputs for thread handling FillSegment*/ +typedef struct Argon2_thread_data { + argon2_instance_t *instance_ptr; + argon2_position_t pos; +} argon2_thread_data; + +/*************************Argon2 core functions********************************/ + +/* Allocates memory to the given pointer, uses the appropriate allocator as + * specified in the context. Total allocated memory is num*size. + * @param context argon2_context which specifies the allocator + * @param memory pointer to the pointer to the memory + * @param size the size in bytes for each element to be allocated + * @param num the number of elements to be allocated + * @return ARGON2_OK if @memory is a valid pointer and memory is allocated + */ +static int allocate_memory(const argon2_context *context, uint8_t **memory, + size_t num, size_t size); + +/* + * Frees memory at the given pointer, uses the appropriate deallocator as + * specified in the context. Also cleans the memory using clear_internal_memory. + * @param context argon2_context which specifies the deallocator + * @param memory pointer to buffer to be freed + * @param size the size in bytes for each element to be deallocated + * @param num the number of elements to be deallocated + */ +static void free_memory(const argon2_context *context, uint8_t *memory, + size_t num, size_t size); + +/* Function that securely cleans the memory. This ignores any flags set + * regarding clearing memory. Usually one just calls clear_internal_memory. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +static void secure_wipe_memory(void *v, size_t n); + +/* Function that securely clears the memory if FLAG_clear_internal_memory is + * set. If the flag isn't set, this function does nothing. + * @param mem Pointer to the memory + * @param s Memory size in bytes + */ +static void clear_internal_memory(void *v, size_t n); + +/* + * Computes absolute position of reference block in the lane following a skewed + * distribution and using a pseudo-random value as input + * @param instance Pointer to the current instance + * @param position Pointer to the current position + * @param pseudo_rand 32-bit pseudo-random value used to determine the position + * @param same_lane Indicates if the block will be taken from the current lane. + * If so we can reference the current segment + * @pre All pointers must be valid + */ +static uint32_t index_alpha(const argon2_instance_t *instance, + const argon2_position_t *position, uint32_t pseudo_rand, + int same_lane); + +/* + * Function that validates all inputs against predefined restrictions and return + * an error code + * @param context Pointer to current Argon2 context + * @return ARGON2_OK if everything is all right, otherwise one of error codes + * (all defined in + */ +static int validate_inputs(const argon2_context *context); + +/* + * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears + * password and secret if needed + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param blockhash Buffer for pre-hashing digest + * @param type Argon2 type + * @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes + * allocated + */ +static void initial_hash(uint8_t *blockhash, argon2_context *context, + argon2_type type); + +/* + * Function creates first 2 blocks per lane + * @param instance Pointer to the current instance + * @param blockhash Pointer to the pre-hashing digest + * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values + */ +static void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); + +/* + * Function allocates memory, hashes the inputs with Blake, and creates first + * two blocks. Returns the pointer to the main memory with 2 blocks per lane + * initialized + * @param context Pointer to the Argon2 internal structure containing memory + * pointer, and parameters for time and space requirements. + * @param instance Current Argon2 instance + * @return Zero if successful, -1 if memory failed to allocate. @context->state + * will be modified if successful. + */ +static int initialize(argon2_instance_t *instance, argon2_context *context); + +/* + * XORing the last block of each lane, hashing it, making the tag. Deallocates + * the memory. + * @param context Pointer to current Argon2 context (use only the out parameters + * from it) + * @param instance Pointer to current instance of Argon2 + * @pre instance->state must point to necessary amount of memory + * @pre context->out must point to outlen bytes of memory + * @pre if context->free_cbk is not NULL, it should point to a function that + * deallocates memory + */ +static void finalize(const argon2_context *context, argon2_instance_t *instance); + +/* + * Function that fills the segment using previous segments also from other + * threads + * @param context current context + * @param instance Pointer to the current instance + * @param position Current position + * @pre all block pointers must be valid + */ +static void fill_segment(const argon2_instance_t *instance, + argon2_position_t position); + +/* + * Function that fills the entire memory t_cost times based on the first two + * blocks in each lane + * @param instance Pointer to the current instance + * @return ARGON2_OK if successful, @context->state + */ +static int fill_memory_blocks(argon2_instance_t *instance); + +#endif diff --git a/cbits/argon2/encoding.c b/cbits/argon2/encoding.c new file mode 100644 index 0000000..44952a7 --- /dev/null +++ b/cbits/argon2/encoding.c @@ -0,0 +1,450 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include +#include +#include "encoding.h" +#include "core.h" + +/* + * Example code for a decoder and encoder of "hash strings", with Argon2 + * parameters. + * + * This code comprises three sections: + * + * -- The first section contains generic Base64 encoding and decoding + * functions. It is conceptually applicable to any hash function + * implementation that uses Base64 to encode and decode parameters, + * salts and outputs. It could be made into a library, provided that + * the relevant functions are made public (non-static) and be given + * reasonable names to avoid collisions with other functions. + * + * -- The second section is specific to Argon2. It encodes and decodes + * the parameters, salts and outputs. It does not compute the hash + * itself. + * + * The code was originally written by Thomas Pornin , + * to whom comments and remarks may be sent. It is released under what + * should amount to Public Domain or its closest equivalent; the + * following mantra is supposed to incarnate that fact with all the + * proper legal rituals: + * + * --------------------------------------------------------------------- + * This file is provided under the terms of Creative Commons CC0 1.0 + * Public Domain Dedication. To the extent possible under law, the + * author (Thomas Pornin) has waived all copyright and related or + * neighboring rights to this file. This work is published from: Canada. + * --------------------------------------------------------------------- + * + * Copyright (c) 2015 Thomas Pornin + */ + +/* ==================================================================== */ +/* + * Common code; could be shared between different hash functions. + * + * Note: the Base64 functions below assume that uppercase letters (resp. + * lowercase letters) have consecutive numerical codes, that fit on 8 + * bits. All modern systems use ASCII-compatible charsets, where these + * properties are true. If you are stuck with a dinosaur of a system + * that still defaults to EBCDIC then you already have much bigger + * interoperability issues to deal with. + */ + +/* + * Some macros for constant-time comparisons. These work over values in + * the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true". + */ +#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF) +#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF) +#define GE(x, y) (GT(y, x) ^ 0xFF) +#define LT(x, y) GT(y, x) +#define LE(x, y) GE(y, x) + +/* + * Convert value x (0..63) to corresponding Base64 character. + */ +static int b64_byte_to_char(unsigned x) { + return (LT(x, 26) & (x + 'A')) | + (GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) | + (GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') | + (EQ(x, 63) & '/'); +} + +/* + * Convert character c to the corresponding 6-bit value. If character c + * is not a Base64 character, then 0xFF (255) is returned. + */ +static unsigned b64_char_to_byte(int c) { + unsigned x; + + x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) | + (GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) | + (GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) | + (EQ(c, '/') & 63); + return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF)); +} + +/* + * Convert some bytes to Base64. 'dst_len' is the length (in characters) + * of the output buffer 'dst'; if that buffer is not large enough to + * receive the result (including the terminating 0), then (size_t)-1 + * is returned. Otherwise, the zero-terminated Base64 string is written + * in the buffer, and the output length (counted WITHOUT the terminating + * zero) is returned. + */ +static size_t to_base64(char *dst, size_t dst_len, const void *src, + size_t src_len) { + size_t olen; + const unsigned char *buf; + unsigned acc, acc_len; + + olen = (src_len / 3) << 2; + switch (src_len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + if (dst_len <= olen) { + return (size_t)-1; + } + acc = 0; + acc_len = 0; + buf = (const unsigned char *)src; + while (src_len-- > 0) { + acc = (acc << 8) + (*buf++); + acc_len += 8; + while (acc_len >= 6) { + acc_len -= 6; + *dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F); + } + } + if (acc_len > 0) { + *dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F); + } + *dst++ = 0; + return olen; +} + +/* + * Decode Base64 chars into bytes. The '*dst_len' value must initially + * contain the length of the output buffer '*dst'; when the decoding + * ends, the actual number of decoded bytes is written back in + * '*dst_len'. + * + * Decoding stops when a non-Base64 character is encountered, or when + * the output buffer capacity is exceeded. If an error occurred (output + * buffer is too small, invalid last characters leading to unprocessed + * buffered bits), then NULL is returned; otherwise, the returned value + * points to the first non-Base64 character in the source stream, which + * may be the terminating zero. + */ +static const char *from_base64(void *dst, size_t *dst_len, const char *src) { + size_t len; + unsigned char *buf; + unsigned acc, acc_len; + + buf = (unsigned char *)dst; + len = 0; + acc = 0; + acc_len = 0; + for (;;) { + unsigned d; + + d = b64_char_to_byte(*src); + if (d == 0xFF) { + break; + } + src++; + acc = (acc << 6) + d; + acc_len += 6; + if (acc_len >= 8) { + acc_len -= 8; + if ((len++) >= *dst_len) { + return NULL; + } + *buf++ = (acc >> acc_len) & 0xFF; + } + } + + /* + * If the input length is equal to 1 modulo 4 (which is + * invalid), then there will remain 6 unprocessed bits; + * otherwise, only 0, 2 or 4 bits are buffered. The buffered + * bits must also all be zero. + */ + if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) { + return NULL; + } + *dst_len = len; + return src; +} + +/* + * Decode decimal integer from 'str'; the value is written in '*v'. + * Returned value is a pointer to the next non-decimal character in the + * string. If there is no digit at all, or the value encoding is not + * minimal (extra leading zeros), or the value does not fit in an + * 'unsigned long', then NULL is returned. + */ +static const char *decode_decimal(const char *str, unsigned long *v) { + const char *orig; + unsigned long acc; + + acc = 0; + for (orig = str;; str++) { + int c; + + c = *str; + if (c < '0' || c > '9') { + break; + } + c -= '0'; + if (acc > (ULONG_MAX / 10)) { + return NULL; + } + acc *= 10; + if ((unsigned long)c > (ULONG_MAX - acc)) { + return NULL; + } + acc += (unsigned long)c; + } + if (str == orig || (*orig == '0' && str != (orig + 1))) { + return NULL; + } + *v = acc; + return str; +} + +/* ==================================================================== */ +/* + * Code specific to Argon2. + * + * The code below applies the following format: + * + * $argon2[$v=]$m=,t=,p=$$ + * + * where is either 'd', 'id', or 'i', is a decimal integer (positive, + * fits in an 'unsigned long'), and is Base64-encoded data (no '=' padding + * characters, no newline or whitespace). + * + * The last two binary chunks (encoded in Base64) are, in that order, + * the salt and the output. Both are required. The binary salt length and the + * output length must be in the allowed ranges defined in argon2.h. + * + * The ctx struct must contain buffers large enough to hold the salt and pwd + * when it is fed into decode_string. + */ + +static int decode_string(argon2_context *ctx, const char *str, argon2_type type) { + +/* check for prefix */ +#define CC(prefix) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) != 0) { \ + return ARGON2_DECODING_FAIL; \ + } \ + str += cc_len; \ + } while ((void)0, 0) + +/* optional prefix checking with supplied code */ +#define CC_opt(prefix, code) \ + do { \ + size_t cc_len = strlen(prefix); \ + if (strncmp(str, prefix, cc_len) == 0) { \ + str += cc_len; \ + { code; } \ + } \ + } while ((void)0, 0) + +/* Decoding prefix into decimal */ +#define DECIMAL(x) \ + do { \ + unsigned long dec_x; \ + str = decode_decimal(str, &dec_x); \ + if (str == NULL) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (x) = dec_x; \ + } while ((void)0, 0) + +/* Decoding base64 into a binary buffer */ +#define BIN(buf, max_len, len) \ + do { \ + size_t bin_len = (max_len); \ + str = from_base64(buf, &bin_len, str); \ + if (str == NULL || bin_len > UINT32_MAX) { \ + return ARGON2_DECODING_FAIL; \ + } \ + (len) = (uint32_t)bin_len; \ + } while ((void)0, 0) + + size_t maxsaltlen = ctx->saltlen; + size_t maxoutlen = ctx->outlen; + int validation_result; + const char* type_string; + + /* We should start with the argon2_type we are using */ + type_string = cryptonite_argon2_type2string(type, 0); + if (!type_string) { + return ARGON2_INCORRECT_TYPE; + } + + CC("$"); + CC(type_string); + + /* Reading the version number if the default is suppressed */ + ctx->version = ARGON2_VERSION_10; + CC_opt("$v=", DECIMAL(ctx->version)); + + CC("$m="); + DECIMAL(ctx->m_cost); + CC(",t="); + DECIMAL(ctx->t_cost); + CC(",p="); + DECIMAL(ctx->lanes); + ctx->threads = ctx->lanes; + + CC("$"); + BIN(ctx->salt, maxsaltlen, ctx->saltlen); + CC("$"); + BIN(ctx->out, maxoutlen, ctx->outlen); + + /* The rest of the fields get the default values */ + ctx->secret = NULL; + ctx->secretlen = 0; + ctx->ad = NULL; + ctx->adlen = 0; + ctx->allocate_cbk = NULL; + ctx->free_cbk = NULL; + ctx->flags = ARGON2_DEFAULT_FLAGS; + + /* On return, must have valid context */ + validation_result = validate_inputs(ctx); + if (validation_result != ARGON2_OK) { + return validation_result; + } + + /* Can't have any additional characters */ + if (*str == 0) { + return ARGON2_OK; + } else { + return ARGON2_DECODING_FAIL; + } +#undef CC +#undef CC_opt +#undef DECIMAL +#undef BIN +} + +static int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type) { +#define SS(str) \ + do { \ + size_t pp_len = strlen(str); \ + if (pp_len >= dst_len) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + memcpy(dst, str, pp_len + 1); \ + dst += pp_len; \ + dst_len -= pp_len; \ + } while ((void)0, 0) + +#define SX(x) \ + do { \ + char tmp[30]; \ + sprintf(tmp, "%lu", (unsigned long)(x)); \ + SS(tmp); \ + } while ((void)0, 0) + +#define SB(buf, len) \ + do { \ + size_t sb_len = to_base64(dst, dst_len, buf, len); \ + if (sb_len == (size_t)-1) { \ + return ARGON2_ENCODING_FAIL; \ + } \ + dst += sb_len; \ + dst_len -= sb_len; \ + } while ((void)0, 0) + + const char* type_string = cryptonite_argon2_type2string(type, 0); + int validation_result = validate_inputs(ctx); + + if (!type_string) { + return ARGON2_ENCODING_FAIL; + } + + if (validation_result != ARGON2_OK) { + return validation_result; + } + + + SS("$"); + SS(type_string); + + SS("$v="); + SX(ctx->version); + + SS("$m="); + SX(ctx->m_cost); + SS(",t="); + SX(ctx->t_cost); + SS(",p="); + SX(ctx->lanes); + + SS("$"); + SB(ctx->salt, ctx->saltlen); + + SS("$"); + SB(ctx->out, ctx->outlen); + return ARGON2_OK; + +#undef SS +#undef SX +#undef SB +} + +static size_t b64len(uint32_t len) { + size_t olen = ((size_t)len / 3) << 2; + + switch (len % 3) { + case 2: + olen++; + /* fall through */ + case 1: + olen += 2; + break; + } + + return olen; +} + +static size_t numlen(uint32_t num) { + size_t len = 1; + while (num >= 10) { + ++len; + num = num / 10; + } + return len; +} + diff --git a/cbits/argon2/encoding.h b/cbits/argon2/encoding.h new file mode 100644 index 0000000..325a6e3 --- /dev/null +++ b/cbits/argon2/encoding.h @@ -0,0 +1,57 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ENCODING_H +#define ENCODING_H +#include "argon2.h" + +#define ARGON2_MAX_DECODED_LANES UINT32_C(255) +#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8) +#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12) + +/* +* encode an Argon2 hash string into the provided buffer. 'dst_len' +* contains the size, in characters, of the 'dst' buffer; if 'dst_len' +* is less than the number of required characters (including the +* terminating 0), then this function returns ARGON2_ENCODING_ERROR. +* +* on success, ARGON2_OK is returned. +*/ +static int encode_string(char *dst, size_t dst_len, argon2_context *ctx, + argon2_type type); + +/* +* Decodes an Argon2 hash string into the provided structure 'ctx'. +* The only fields that must be set prior to this call are ctx.saltlen and +* ctx.outlen (which must be the maximal salt and out length values that are +* allowed), ctx.salt and ctx.out (which must be buffers of the specified +* length), and ctx.pwd and ctx.pwdlen which must hold a valid password. +* +* Invalid input string causes an error. On success, the ctx is valid and all +* fields have been initialized. +* +* Returned value is ARGON2_OK on success, other ARGON2_ codes on error. +*/ +static int decode_string(argon2_context *ctx, const char *str, argon2_type type); + +/* Returns the length of the encoded byte stream with length len */ +static size_t b64len(uint32_t len); + +/* Returns the length of the encoded number num */ +static size_t numlen(uint32_t num); + +#endif diff --git a/cbits/argon2/opt.c b/cbits/argon2/opt.c new file mode 100644 index 0000000..0102bdc --- /dev/null +++ b/cbits/argon2/opt.c @@ -0,0 +1,186 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include + +#include "argon2.h" +#include "opt.h" + +#include "blake2/blake2.h" +#include "blake2/blamka-round-opt.h" + +static void fill_block(__m128i *state, const block *ref_block, block *next_block, + int with_xor) { + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + unsigned int i; + + if (with_xor) { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + block_XY[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); + } + } else { + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); + } + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128(state[i], block_XY[i]); + _mm_storeu_si128((__m128i *)next_block->v + i, state[i]); + } +} + +static void next_addresses(block *address_block, block *input_block) { + /*Temporary zero-initialized blocks*/ + __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; + __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; + + memset(zero_block, 0, sizeof(zero_block)); + memset(zero2_block, 0, sizeof(zero2_block)); + + /*Increasing index counter*/ + input_block->v[6]++; + + /*First iteration of G*/ + fill_block(zero_block, input_block, address_block, 0); + + /*Second iteration of G*/ + fill_block(zero2_block, address_block, address_block, 0); +} + +static void fill_segment(const argon2_instance_t *instance, + argon2_position_t position) { + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index, i; + __m128i state[64]; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = + (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + if (ARGON2_VERSION_10 == instance->version) { + /* version 1.2.1 and earlier: overwrite, not XOR */ + fill_block(state, ref_block, curr_block, 0); + } else { + if(0 == position.pass) { + fill_block(state, ref_block, curr_block, 0); + } else { + fill_block(state, ref_block, curr_block, 1); + } + } + } +} diff --git a/cbits/argon2/opt.h b/cbits/argon2/opt.h new file mode 100644 index 0000000..825bbfc --- /dev/null +++ b/cbits/argon2/opt.h @@ -0,0 +1,35 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ARGON2_OPT_H +#define ARGON2_OPT_H + +#include "core.h" +#include + +/* + * Function fills a new memory block and optionally XORs the old block over the new one. + * Memory must be initialized. + * @param state Pointer to the just produced block. Content will be updated(!) + * @param ref_block Pointer to the reference block + * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block + * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) + * @pre all block pointers must be valid + */ +static void fill_block(__m128i *s, const block *ref_block, block *next_block, int with_xor); + +#endif /* ARGON2_OPT_H */ diff --git a/cbits/argon2/ref.c b/cbits/argon2/ref.c new file mode 100644 index 0000000..f30e294 --- /dev/null +++ b/cbits/argon2/ref.c @@ -0,0 +1,185 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#include +#include +#include + +#include "argon2.h" +#include "ref.h" + +#include "blamka-round-ref.h" +#include "blake2-impl.h" +#include "blake2.h" + + +static void fill_block(const block *prev_block, const block *ref_block, + block *next_block, int with_xor) { + block blockR, block_tmp; + unsigned i; + + copy_block(&blockR, ref_block); + xor_block(&blockR, prev_block); + copy_block(&block_tmp, &blockR); + /* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */ + if (with_xor) { + /* Saving the next block contents for XOR over: */ + xor_block(&block_tmp, next_block); + /* Now blockR = ref_block + prev_block and + block_tmp = ref_block + prev_block + next_block */ + } + + /* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then + (16,17,..31)... finally (112,113,...127) */ + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND_NOMSG( + blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2], + blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5], + blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8], + blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11], + blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14], + blockR.v[16 * i + 15]); + } + + /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then + (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ + for (i = 0; i < 8; i++) { + BLAKE2_ROUND_NOMSG( + blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16], + blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33], + blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64], + blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81], + blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112], + blockR.v[2 * i + 113]); + } + + copy_block(next_block, &block_tmp); + xor_block(next_block, &blockR); +} + +static void next_addresses(block *address_block, block *input_block, + const block *zero_block) { + input_block->v[6]++; + fill_block(zero_block, input_block, address_block, 0); + fill_block(zero_block, address_block, address_block, 0); +} + +static void fill_segment(const argon2_instance_t *instance, + argon2_position_t position) { + block *ref_block = NULL, *curr_block = NULL; + block address_block, input_block, zero_block; + uint64_t pseudo_rand, ref_index, ref_lane; + uint32_t prev_offset, curr_offset; + uint32_t starting_index; + uint32_t i; + int data_independent_addressing; + + if (instance == NULL) { + return; + } + + data_independent_addressing = + (instance->type == Argon2_i) || + (instance->type == Argon2_id && (position.pass == 0) && + (position.slice < ARGON2_SYNC_POINTS / 2)); + + if (data_independent_addressing) { + init_block_value(&zero_block, 0); + init_block_value(&input_block, 0); + + input_block.v[0] = position.pass; + input_block.v[1] = position.lane; + input_block.v[2] = position.slice; + input_block.v[3] = instance->memory_blocks; + input_block.v[4] = instance->passes; + input_block.v[5] = instance->type; + } + + starting_index = 0; + + if ((0 == position.pass) && (0 == position.slice)) { + starting_index = 2; /* we have already generated the first two blocks */ + + /* Don't forget to generate the first block of addresses: */ + if (data_independent_addressing) { + next_addresses(&address_block, &input_block, &zero_block); + } + } + + /* Offset of the current block */ + curr_offset = position.lane * instance->lane_length + + position.slice * instance->segment_length + starting_index; + + if (0 == curr_offset % instance->lane_length) { + /* Last block in this lane */ + prev_offset = curr_offset + instance->lane_length - 1; + } else { + /* Previous block */ + prev_offset = curr_offset - 1; + } + + for (i = starting_index; i < instance->segment_length; + ++i, ++curr_offset, ++prev_offset) { + /*1.1 Rotating prev_offset if needed */ + if (curr_offset % instance->lane_length == 1) { + prev_offset = curr_offset - 1; + } + + /* 1.2 Computing the index of the reference block */ + /* 1.2.1 Taking pseudo-random value from the previous block */ + if (data_independent_addressing) { + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + next_addresses(&address_block, &input_block, &zero_block); + } + pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; + } else { + pseudo_rand = instance->memory[prev_offset].v[0]; + } + + /* 1.2.2 Computing the lane of the reference block */ + ref_lane = ((pseudo_rand >> 32)) % instance->lanes; + + if ((position.pass == 0) && (position.slice == 0)) { + /* Can not reference other lanes yet */ + ref_lane = position.lane; + } + + /* 1.2.3 Computing the number of possible reference block within the + * lane. + */ + position.index = i; + ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, + ref_lane == position.lane); + + /* 2 Creating a new block */ + ref_block = + instance->memory + instance->lane_length * ref_lane + ref_index; + curr_block = instance->memory + curr_offset; + if (ARGON2_VERSION_10 == instance->version) { + /* version 1.2.1 and earlier: overwrite, not XOR */ + fill_block(instance->memory + prev_offset, ref_block, curr_block, 0); + } else { + if(0 == position.pass) { + fill_block(instance->memory + prev_offset, ref_block, + curr_block, 0); + } else { + fill_block(instance->memory + prev_offset, ref_block, + curr_block, 1); + } + } + } +} diff --git a/cbits/argon2/ref.h b/cbits/argon2/ref.h new file mode 100644 index 0000000..676b454 --- /dev/null +++ b/cbits/argon2/ref.h @@ -0,0 +1,35 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ARGON2_REF_H +#define ARGON2_REF_H + +#include "core.h" + +/* + * Function fills a new memory block and optionally XORs the old block over the new one. + * @next_block must be initialized. + * @param prev_block Pointer to the previous block + * @param ref_block Pointer to the reference block + * @param next_block Pointer to the block to be constructed + * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) + * @pre all block pointers must be valid + */ +static void fill_block(const block *prev_block, const block *ref_block, + block *next_block, int with_xor); + +#endif /* ARGON2_REF_H */ diff --git a/cbits/argon2/thread.c b/cbits/argon2/thread.c new file mode 100644 index 0000000..1686481 --- /dev/null +++ b/cbits/argon2/thread.c @@ -0,0 +1,57 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#if !defined(ARGON2_NO_THREADS) + +#include "thread.h" +#if defined(_WIN32) +#include +#endif + +static int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args) { + if (NULL == handle || func == NULL) { + return -1; + } +#if defined(_WIN32) + *handle = _beginthreadex(NULL, 0, func, args, 0, NULL); + return *handle != 0 ? 0 : -1; +#else + return pthread_create(handle, NULL, func, args); +#endif +} + +static int argon2_thread_join(argon2_thread_handle_t handle) { +#if defined(_WIN32) + if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) { + return CloseHandle((HANDLE)handle) != 0 ? 0 : -1; + } + return -1; +#else + return pthread_join(handle, NULL); +#endif +} + +static void argon2_thread_exit(void) { +#if defined(_WIN32) + _endthreadex(0); +#else + pthread_exit(NULL); +#endif +} + +#endif /* ARGON2_NO_THREADS */ diff --git a/cbits/argon2/thread.h b/cbits/argon2/thread.h new file mode 100644 index 0000000..d91882c --- /dev/null +++ b/cbits/argon2/thread.h @@ -0,0 +1,67 @@ +/* + * Argon2 reference source code package - reference C implementations + * + * Copyright 2015 + * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves + * + * You may use this work under the terms of a Creative Commons CC0 1.0 + * License/Waiver or the Apache Public License 2.0, at your option. The terms of + * these licenses can be found at: + * + * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + * + * You should have received a copy of both of these licenses along with this + * software. If not, they may be obtained at the above URLs. + */ + +#ifndef ARGON2_THREAD_H +#define ARGON2_THREAD_H + +#if !defined(ARGON2_NO_THREADS) + +/* + Here we implement an abstraction layer for the simpĺe requirements + of the Argon2 code. We only require 3 primitives---thread creation, + joining, and termination---so full emulation of the pthreads API + is unwarranted. Currently we wrap pthreads and Win32 threads. + + The API defines 2 types: the function pointer type, + argon2_thread_func_t, + and the type of the thread handle---argon2_thread_handle_t. +*/ +#if defined(_WIN32) +#include +typedef unsigned(__stdcall *argon2_thread_func_t)(void *); +typedef uintptr_t argon2_thread_handle_t; +#else +#include +typedef void *(*argon2_thread_func_t)(void *); +typedef pthread_t argon2_thread_handle_t; +#endif + +/* Creates a thread + * @param handle pointer to a thread handle, which is the output of this + * function. Must not be NULL. + * @param func A function pointer for the thread's entry point. Must not be + * NULL. + * @param args Pointer that is passed as an argument to @func. May be NULL. + * @return 0 if @handle and @func are valid pointers and a thread is successfuly + * created. + */ +static int argon2_thread_create(argon2_thread_handle_t *handle, + argon2_thread_func_t func, void *args); + +/* Waits for a thread to terminate + * @param handle Handle to a thread created with argon2_thread_create. + * @return 0 if @handle is a valid handle, and joining completed successfully. +*/ +static int argon2_thread_join(argon2_thread_handle_t handle); + +/* Terminate the current thread. Must be run inside a thread created by + * argon2_thread_create. +*/ +static void argon2_thread_exit(void); + +#endif /* ARGON2_NO_THREADS */ +#endif diff --git a/cryptonite.cabal b/cryptonite.cabal index de95e37..90d9587 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -44,6 +44,8 @@ extra-source-files: cbits/*.h cbits/p256/*.h cbits/blake2/ref/*.h cbits/blake2/sse/*.h + cbits/argon2/*.h + cbits/argon2/*.c cbits/aes/x86ni_impl.c tests/*.hs @@ -66,6 +68,11 @@ Flag support_pclmuldq Default: False Manual: True +Flag support_sse + Description: Use SSE optimized version when existing (BLAKE2, ARGON2) + Default: False + Manual: True + Flag support_blake2_sse Description: Use SSE optimized version of BLAKE2. Default: False @@ -294,6 +301,12 @@ Library , cbits/blake2/ref/blake2bp-ref.c include-dirs: cbits/blake2/ref + if arch(x86_64) || flag(support_sse) + CPP-options: -DSUPPORT_SSE + + C-sources: cbits/argon2/argon2.c + include-dirs: cbits/argon2 + if os(windows) cpp-options: -DWINDOWS Build-Depends: Win32