From c4c0802f8066d156e83b5b32ed0a3923a15cdbd0 Mon Sep 17 00:00:00 2001 From: Vincent Hanquez Date: Sun, 6 Jul 2014 15:44:54 +0100 Subject: [PATCH] Add working implementation of poly1305 with 1 test vector. --- Crypto/MAC/Poly1305.hs | 8 +- README.md | 3 + cbits/cryptonite_poly1305.c | 164 +++++++++++++++++++++++++++++++++++- cbits/cryptonite_poly1305.h | 7 +- cryptonite.cabal | 3 +- tests/Tests.hs | 13 +++ 6 files changed, 192 insertions(+), 6 deletions(-) diff --git a/Crypto/MAC/Poly1305.hs b/Crypto/MAC/Poly1305.hs index 93410fb..5f031da 100644 --- a/Crypto/MAC/Poly1305.hs +++ b/Crypto/MAC/Poly1305.hs @@ -11,7 +11,7 @@ -- module Crypto.MAC.Poly1305 ( Ctx - , Auth + , Auth(..) -- * Incremental MAC Functions , initialize -- :: Ctx @@ -39,6 +39,8 @@ newtype Ctx = Ctx SecureMem -- | Poly1305 Auth newtype Auth = Auth ByteString +instance Eq Auth where + (Auth a1) == (Auth a2) = constEqBytes a1 a2 instance Byteable Auth where toBytes (Auth b) = b @@ -59,7 +61,7 @@ initialize key | byteableLength key /= 32 = error "Poly1305: key length expected 32 bytes" | otherwise = Ctx $ unsafePerformIO $ do withBytePtr key $ \keyPtr -> - createSecureMem 64 {- FIXME -} $ \ctxPtr -> + createSecureMem 84 $ \ctxPtr -> c_poly1305_init (castPtr ctxPtr) keyPtr {-# NOINLINE initialize #-} @@ -92,6 +94,6 @@ finalize (Ctx prevCtx) = Auth $ B.unsafeCreate 16 $ \dst -> do withSecureMemPtr ctx $ \ctxPtr -> c_poly1305_finalize dst (castPtr ctxPtr) {-# NOINLINE finalize #-} --- | Auth +-- | One-pass authorization creation auth :: Byteable key => key -> ByteString -> Auth auth key = finalize . update (initialize key) diff --git a/README.md b/README.md index c0a2c9d..f340cbd 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,9 @@ Links * [ChaCha](http://cr.yp.to/chacha.html) * [ChaCha-test-vectors](https://github.com/secworks/chacha_testvectors.git) +* [Poly1305](http://cr.yp.to/mac.html) +* [Poly1305-test-vectors](http://tools.ietf.org/html/draft-nir-cfrg-chacha20-poly1305-06#page-12) + TODO ---- diff --git a/cbits/cryptonite_poly1305.c b/cbits/cryptonite_poly1305.c index d23698e..4863e8a 100644 --- a/cbits/cryptonite_poly1305.c +++ b/cbits/cryptonite_poly1305.c @@ -26,18 +26,180 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * The following code contains some code copied from and inspired by poly1305-donna + * in poly1305_do_chunk and poly1305_finalize which is licensed under MIT or PUBLIC DOMAIN. + * see [poly1305-donna](https://github.com/floodyberry/poly1305-donna) + * */ + #include +#include #include "cryptonite_poly1305.h" +#include "cryptonite_bitfn.h" + +static inline uint32_t load32(uint8_t *p) +{ + return (le32_to_cpu(*((uint32_t *) p))); +} + +static void poly1305_do_chunk(poly1305_ctx *ctx, uint8_t *data, int blocks, int final) +{ + /* following is a cleanup copy of code available poly1305-donna */ + const uint32_t hibit = (final) ? 0 : (1 << 24); /* 1 << 128 */ + uint32_t r0,r1,r2,r3,r4; + uint32_t s1,s2,s3,s4; + uint32_t h0,h1,h2,h3,h4; + uint64_t d0,d1,d2,d3,d4; + uint32_t c; + + /* load r[i], h[i] */ + h0 = ctx->h[0]; h1 = ctx->h[1]; h2 = ctx->h[2]; h3 = ctx->h[3]; h4 = ctx->h[4]; + r0 = ctx->r[0]; r1 = ctx->r[1]; r2 = ctx->r[2]; r3 = ctx->r[3]; r4 = ctx->r[4]; + + /* s[i] = r[i] * 5 */ + s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; + + while (blocks--) { + h0 += (load32(data+ 0) ) & 0x3ffffff; + h1 += (load32(data+ 3) >> 2) & 0x3ffffff; + h2 += (load32(data+ 6) >> 4) & 0x3ffffff; + h3 += (load32(data+ 9) >> 6) & 0x3ffffff; + h4 += (load32(data+12) >> 8) | hibit; + + d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1); + d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2); + d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + ((uint64_t)h4 * s3); + d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + ((uint64_t)h4 * s4); + d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + ((uint64_t)h4 * r0); + + c = (uint32_t)(d0 >> 26); h0 = (uint32_t)d0 & 0x3ffffff; + d1 += c; c = (uint32_t)(d1 >> 26); h1 = (uint32_t)d1 & 0x3ffffff; + d2 += c; c = (uint32_t)(d2 >> 26); h2 = (uint32_t)d2 & 0x3ffffff; + d3 += c; c = (uint32_t)(d3 >> 26); h3 = (uint32_t)d3 & 0x3ffffff; + d4 += c; c = (uint32_t)(d4 >> 26); h4 = (uint32_t)d4 & 0x3ffffff; + h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; + h1 += c; + + data += 16; + } + + /* store h[i] */ + ctx->h[0] = h0; ctx->h[1] = h1; ctx->h[2] = h2; ctx->h[3] = h3; ctx->h[4] = h4; +} void cryptonite_poly1305_init(poly1305_ctx *ctx, poly1305_key *key) { + uint8_t *k = (uint8_t *) key; + + memset(ctx, 0, sizeof(poly1305_ctx)); + + ctx->r[0] = (load32(&k[ 0]) ) & 0x3ffffff; + ctx->r[1] = (load32(&k[ 3]) >> 2) & 0x3ffff03; + ctx->r[2] = (load32(&k[ 6]) >> 4) & 0x3ffc0ff; + ctx->r[3] = (load32(&k[ 9]) >> 6) & 0x3f03fff; + ctx->r[4] = (load32(&k[12]) >> 8) & 0x00fffff; + + ctx->pad[0] = load32(&k[16]); + ctx->pad[1] = load32(&k[20]); + ctx->pad[2] = load32(&k[24]); + ctx->pad[3] = load32(&k[28]); + + ctx->index = 0; } void cryptonite_poly1305_update(poly1305_ctx *ctx, uint8_t *data, uint32_t length) { + uint32_t to_fill, nb_blocks_bytes; + + to_fill = 16 - ctx->index; + + /* process partial buffer if there's enough data to make a block */ + if (ctx->index && length >= to_fill) { + memcpy(ctx->buf + ctx->index, data, to_fill); + poly1305_do_chunk(ctx, ctx->buf, 1, 0); + ctx->index = 0; + } + + /* process as much 16-block as possible */ + nb_blocks_bytes = length & ~(16 - 1); + poly1305_do_chunk(ctx, data, nb_blocks_bytes >> 4, 0); + data += nb_blocks_bytes; + length &= 0xf; + + /* fill the remaining bytes in the partial buffer */ + if (length) { + memcpy(ctx->buf + ctx->index, data, length); + ctx->index = length; + } } -void cryptonite_poly1305_finalize(poly1305_mac mac, poly1305_ctx *ctx) +void cryptonite_poly1305_finalize(poly1305_mac mac8, poly1305_ctx *ctx) { + uint32_t h0,h1,h2,h3,h4,c; + uint32_t g0,g1,g2,g3,g4; + uint64_t f; + uint32_t mask; + uint32_t *mac = (uint32_t *) mac8; + int i; + + if (ctx->index) { + /* append partial final buffer with 10* then process */ + ctx->buf[ctx->index] = 0x1; + for (i = ctx->index + 1; i < 16; i++); + ctx->buf[i] = 0x0; + poly1305_do_chunk(ctx, ctx->buf, 1, 1); + } + + /* following is a cleanup copy of code available poly1305-donna */ + + /* fully carry h */ + h0 = ctx->h[0]; h1 = ctx->h[1]; h2 = ctx->h[2]; h3 = ctx->h[3]; h4 = ctx->h[4]; + + c = h1 >> 26; h1 = h1 & 0x3ffffff; + h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; + h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; + h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; + h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; + g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; + g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; + g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; + g4 = h4 + c - (1 << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (uint64_t)h0 + ctx->pad[0]; + mac[0] = cpu_to_le32((uint32_t) f); + + f = (uint64_t)h1 + ctx->pad[1] + (f >> 32); + mac[1] = cpu_to_le32((uint32_t) f); + + f = (uint64_t)h2 + ctx->pad[2] + (f >> 32); + mac[2] = cpu_to_le32((uint32_t) f); + + f = (uint64_t)h3 + ctx->pad[3] + (f >> 32); + mac[3] = cpu_to_le32((uint32_t) f); } diff --git a/cbits/cryptonite_poly1305.h b/cbits/cryptonite_poly1305.h index f28d48d..992f4ea 100644 --- a/cbits/cryptonite_poly1305.h +++ b/cbits/cryptonite_poly1305.h @@ -30,9 +30,14 @@ #ifndef CRYPTONITE_POLY1305_H # define CRYPTONITE_POLY1305_H +/* 8*8+1*16+1*4 = 84 */ typedef struct { - uint8_t block[16]; /* previous blocks */ + uint32_t r[5]; + uint32_t h[5]; + uint32_t pad[4]; + uint32_t index; + uint8_t buf[16]; /* previous partial block */ } poly1305_ctx; typedef uint8_t poly1305_mac[16]; diff --git a/cryptonite.cabal b/cryptonite.cabal index 6698a03..22339e9 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -21,7 +21,7 @@ source-repository head Library Exposed-modules: Crypto.Cipher.ChaCha - Other-modules: Crypto.MAC.Poly1305 + Crypto.MAC.Poly1305 Build-depends: base >= 4 && < 5 , bytestring , securemem @@ -39,6 +39,7 @@ Test-Suite test-cryptonite Main-is: Tests.hs Build-Depends: base >= 3 && < 5 , bytestring + , byteable , mtl , tasty , tasty-quickcheck diff --git a/tests/Tests.hs b/tests/Tests.hs index be37825..e036f60 100644 --- a/tests/Tests.hs +++ b/tests/Tests.hs @@ -1,12 +1,15 @@ {-# LANGUAGE OverloadedStrings #-} module Main where +import Data.Byteable +import Data.ByteString (ByteString) import qualified Data.ByteString as B import Test.Tasty import Test.Tasty.QuickCheck import Test.Tasty.HUnit import qualified Crypto.Cipher.ChaCha as ChaCha +import qualified Crypto.MAC.Poly1305 as Poly1305 b8_128_k0_i0 = "\xe2\x8a\x5f\xa4\xa6\x7f\x8c\x5d\xef\xed\x3e\x6f\xb7\x30\x34\x86\xaa\x84\x27\xd3\x14\x19\xa7\x29\x57\x2d\x77\x79\x53\x49\x11\x20\xb6\x4a\xb8\xe7\x2b\x8d\xeb\x85\xcd\x6a\xea\x7c\xb6\x08\x9a\x10\x18\x24\xbe\xeb\x08\x81\x4a\x42\x8a\xab\x1f\xa2\xc8\x16\x08\x1b\x8a\x26\xaf\x44\x8a\x1b\xa9\x06\x36\x8f\xd8\xc8\x38\x31\xc1\x8c\xec\x8c\xed\x81\x1a\x02\x8e\x67\x5b\x8d\x2b\xe8\xfc\xe0\x81\x16\x5c\xea\xe9\xf1\xd1\xb7\xa9\x75\x49\x77\x49\x48\x05\x69\xce\xb8\x3d\xe6\xa0\xa5\x87\xd4\x98\x4f\x19\x92\x5f\x5d\x33\x8e\x43\x0d" @@ -25,6 +28,9 @@ b12_256_k0_i0 = b20_256_k0_i0 = "\x76\xb8\xe0\xad\xa0\xf1\x3d\x90\x40\x5d\x6a\xe5\x53\x86\xbd\x28\xbd\xd2\x19\xb8\xa0\x8d\xed\x1a\xa8\x36\xef\xcc\x8b\x77\x0d\xc7\xda\x41\x59\x7c\x51\x57\x48\x8d\x77\x24\xe0\x3f\xb8\xd8\x4a\x37\x6a\x43\xb8\xf4\x15\x18\xa1\x1c\xc3\x87\xb6\x69\xb2\xee\x65\x86\x9f\x07\xe7\xbe\x55\x51\x38\x7a\x98\xba\x97\x7c\x73\x2d\x08\x0d\xcb\x0f\x29\xa0\x48\xe3\x65\x69\x12\xc6\x53\x3e\x32\xee\x7a\xed\x29\xb7\x21\x76\x9c\xe6\x4e\x43\xd5\x71\x33\xb0\x74\xd8\x39\xd5\x31\xed\x1f\x28\x51\x0a\xfb\x45\xac\xe1\x0a\x1f\x4b\x79\x4d\x6f" +instance Show Poly1305.Auth where + show = show . toBytes + tests = testGroup "cryptonite" [ testGroup "ChaCha" [ testCase "8-128-K0-I0" (chachaRunSimple b8_128_k0_i0 8 16 8) @@ -34,6 +40,13 @@ tests = testGroup "cryptonite" , testCase "12-256-K0-I0" (chachaRunSimple b12_256_k0_i0 12 32 8) , testCase "20-256-K0-I0" (chachaRunSimple b20_256_k0_i0 20 32 8) ] + , testGroup "Poly1305" + [ testCase "V0" $ + let key = "\x85\xd6\xbe\x78\x57\x55\x6d\x33\x7f\x44\x52\xfe\x42\xd5\x06\xa8\x01\x03\x80\x8a\xfb\x0d\xb2\xfd\x4a\xbf\xf6\xaf\x41\x49\xf5\x1b" :: ByteString + msg = "Cryptographic Forum Research Group" + tag = Poly1305.Auth "\xa8\x06\x1d\xc1\x30\x51\x36\xc6\xc2\x2b\x8b\xaf\x0c\x01\x27\xa9" + in tag @=? Poly1305.auth key msg + ] ] where chachaRunSimple expected rounds klen nonceLen = let chacha = ChaCha.initialize rounds (B.replicate klen 0) (B.replicate nonceLen 0)