diff --git a/Crypto/ECC/Ed25519.hs b/Crypto/ECC/Ed25519.hs index 0a18223..d9fea9a 100644 --- a/Crypto/ECC/Ed25519.hs +++ b/Crypto/ECC/Ed25519.hs @@ -26,6 +26,7 @@ module Crypto.ECC.Ed25519 , pointAdd , pointDouble , pointMul + , pointMulW , pointsMulVarTime ) where @@ -197,7 +198,7 @@ pointDouble (Point a) = withByteArray a $ \pa -> ed25519_point_double out pa --- | Scalar multiplication over Ed25519. +-- | Scalar multiplication over Ed25519 (double-add always). pointMul :: Scalar -> Point -> Point pointMul (Scalar scalar) (Point base) = Point $ B.allocAndFreeze pointArraySize $ \out -> @@ -205,6 +206,14 @@ pointMul (Scalar scalar) (Point base) = withByteArray base $ \pbase -> ed25519_point_scalarmul out pbase pscalar +-- | Scalar multiplication over Ed25519 (4-bit fixed window). +pointMulW :: Scalar -> Point -> Point +pointMulW (Scalar scalar) (Point base) = + Point $ B.allocAndFreeze pointArraySize $ \out -> + withByteArray scalar $ \pscalar -> + withByteArray base $ \pbase -> + ed25519_point_scalarmul_w out pbase pscalar + -- | Multiply the point @p@ with @s2@ and add a lifted to curve value @s1@. -- -- @ @@ -290,6 +299,12 @@ foreign import ccall "cryptonite_ed25519_point_scalarmul" -> Ptr Scalar -- scalar -> IO () +foreign import ccall "cryptonite_ed25519_point_scalarmul_w" + ed25519_point_scalarmul_w :: Ptr Point -- scaled + -> Ptr Point -- base + -> Ptr Scalar -- scalar + -> IO () + foreign import ccall "cryptonite_ed25519_base_double_scalarmul_vartime" ed25519_base_double_scalarmul_vartime :: Ptr Point -- combo -> Ptr Scalar -- scalar1 diff --git a/cbits/ed25519/ed25519-cryptonite-exts.h b/cbits/ed25519/ed25519-cryptonite-exts.h index 78a657a..57df83c 100644 --- a/cbits/ed25519/ed25519-cryptonite-exts.h +++ b/cbits/ed25519/ed25519-cryptonite-exts.h @@ -139,6 +139,62 @@ ED25519_FN(ed25519_point_scalarmul) (ge25519 *r, const ge25519 *p, const bignum2 } } +void +ED25519_FN(ed25519_point_scalarmul_w) (ge25519 *r, const ge25519 *p, const bignum256modm s) { + ge25519_pniels mult[16]; + ge25519_p1p1 t; + unsigned char ss[32]; + + // transform scalar as little-endian number + contract256_modm(ss, s); + + // initialize r to identity, i.e. ge25519 (0, 1, 1, 0) + memset(r, 0, sizeof(ge25519)); + r->y[0] = 1; + r->z[0] = 1; + + // initialize mult[0] to identity, i.e. ge25519_pniels (1, 1, 1, 0) + memset(&mult[0], 0, sizeof(ge25519_pniels)); + mult->ysubx[0] = 1; + mult->xaddy[0] = 1; + mult->z[0] = 1; + + // precompute other multiples of P: 1.P, 2.P, ..., 15.P + ge25519_full_to_pniels(&mult[1], p); + for (int i = 2; i < 16; i++) { + ge25519_pnielsadd(&mult[i], p, &mult[i-1]); + } + + // 4-bit fixed window, still 256 doublings but 64 additions + // + // NOTE: direct indexed access to 'mult' table leaks data through + // CPU cache but provides 33% speedup compared to naive unvectored + // table lookup with unint32 constant-time conditional selection + for (int i = 31; i >= 0; i--) { + // higher bits in ss[i] + ge25519_pnielsadd_p1p1(&t, r, &mult[ss[i] >> 4], 0); + ge25519_p1p1_to_partial(r, &t); + + ge25519_double_partial(r, r); + ge25519_double_partial(r, r); + ge25519_double_partial(r, r); + ge25519_double(r, r); + + // lower bits in ss[i] + ge25519_pnielsadd_p1p1(&t, r, &mult[ss[i] & 0x0F], 0); + if (i > 0) { + ge25519_p1p1_to_partial(r, &t); + + ge25519_double_partial(r, r); + ge25519_double_partial(r, r); + ge25519_double_partial(r, r); + ge25519_double(r, r); + } else { + ge25519_p1p1_to_full(r, &t); + } + } +} + void ED25519_FN(ed25519_base_double_scalarmul_vartime) (ge25519 *r, const bignum256modm s1, const ge25519 *p2, const bignum256modm s2) { // computes [s1]basepoint + [s2]p2