|
|
@@ -82,19 +82,36 @@ typedef struct ec_pre_comp_st {
|
|
|
} EC_PRE_COMP;
|
|
|
|
|
|
/* Functions implemented in assembly */
|
|
|
+/*
|
|
|
+ * Most of below mentioned functions *preserve* the property of inputs
|
|
|
+ * being fully reduced, i.e. being in [0, modulus) range. Simply put if
|
|
|
+ * inputs are fully reduced, then output is too. Note that reverse is
|
|
|
+ * not true, in sense that given partially reduced inputs output can be
|
|
|
+ * either, not unlikely reduced. And "most" in first sentence refers to
|
|
|
+ * the fact that given the calculations flow one can tolerate that
|
|
|
+ * addition, 1st function below, produces partially reduced result *if*
|
|
|
+ * multiplications by 2 and 3, which customarily use addition, fully
|
|
|
+ * reduce it. This effectively gives two options: a) addition produces
|
|
|
+ * fully reduced result [as long as inputs are, just like remaining
|
|
|
+ * functions]; b) addition is allowed to produce partially reduced
|
|
|
+ * result, but multiplications by 2 and 3 perform additional reduction
|
|
|
+ * step. Choice between the two can be platform-specific, but it was a)
|
|
|
+ * in all cases so far...
|
|
|
+ */
|
|
|
+/* Modular add: res = a+b mod P */
|
|
|
+void ecp_nistz256_add(BN_ULONG res[P256_LIMBS],
|
|
|
+ const BN_ULONG a[P256_LIMBS],
|
|
|
+ const BN_ULONG b[P256_LIMBS]);
|
|
|
/* Modular mul by 2: res = 2*a mod P */
|
|
|
void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS],
|
|
|
const BN_ULONG a[P256_LIMBS]);
|
|
|
-/* Modular div by 2: res = a/2 mod P */
|
|
|
-void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS],
|
|
|
- const BN_ULONG a[P256_LIMBS]);
|
|
|
/* Modular mul by 3: res = 3*a mod P */
|
|
|
void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS],
|
|
|
const BN_ULONG a[P256_LIMBS]);
|
|
|
-/* Modular add: res = a+b mod P */
|
|
|
-void ecp_nistz256_add(BN_ULONG res[P256_LIMBS],
|
|
|
- const BN_ULONG a[P256_LIMBS],
|
|
|
- const BN_ULONG b[P256_LIMBS]);
|
|
|
+
|
|
|
+/* Modular div by 2: res = a/2 mod P */
|
|
|
+void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS],
|
|
|
+ const BN_ULONG a[P256_LIMBS]);
|
|
|
/* Modular sub: res = a-b mod P */
|
|
|
void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS],
|
|
|
const BN_ULONG a[P256_LIMBS],
|
|
|
@@ -205,21 +222,29 @@ static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS],
|
|
|
return is_zero(res);
|
|
|
}
|
|
|
|
|
|
-static BN_ULONG is_one(const BN_ULONG a[P256_LIMBS])
|
|
|
+static BN_ULONG is_one(const BIGNUM *z)
|
|
|
{
|
|
|
- BN_ULONG res;
|
|
|
-
|
|
|
- res = a[0] ^ ONE[0];
|
|
|
- res |= a[1] ^ ONE[1];
|
|
|
- res |= a[2] ^ ONE[2];
|
|
|
- res |= a[3] ^ ONE[3];
|
|
|
- if (P256_LIMBS == 8) {
|
|
|
- res |= a[4] ^ ONE[4];
|
|
|
- res |= a[5] ^ ONE[5];
|
|
|
- res |= a[6] ^ ONE[6];
|
|
|
+ BN_ULONG res = 0;
|
|
|
+ BN_ULONG *a = z->d;
|
|
|
+
|
|
|
+ if (z->top == (P256_LIMBS - P256_LIMBS / 8)) {
|
|
|
+ res = a[0] ^ ONE[0];
|
|
|
+ res |= a[1] ^ ONE[1];
|
|
|
+ res |= a[2] ^ ONE[2];
|
|
|
+ res |= a[3] ^ ONE[3];
|
|
|
+ if (P256_LIMBS == 8) {
|
|
|
+ res |= a[4] ^ ONE[4];
|
|
|
+ res |= a[5] ^ ONE[5];
|
|
|
+ res |= a[6] ^ ONE[6];
|
|
|
+ /*
|
|
|
+ * no check for a[7] (being zero) on 32-bit platforms,
|
|
|
+ * because value of "one" takes only 7 limbs.
|
|
|
+ */
|
|
|
+ }
|
|
|
+ res = is_zero(res);
|
|
|
}
|
|
|
|
|
|
- return is_zero(res);
|
|
|
+ return res;
|
|
|
}
|
|
|
|
|
|
static int ecp_nistz256_set_words(BIGNUM *a, BN_ULONG words[P256_LIMBS])
|
|
|
@@ -315,19 +340,16 @@ static void ecp_nistz256_point_add(P256_POINT *r,
|
|
|
const BN_ULONG *in2_y = b->Y;
|
|
|
const BN_ULONG *in2_z = b->Z;
|
|
|
|
|
|
- /* We encode infinity as (0,0), which is not on the curve,
|
|
|
- * so it is OK. */
|
|
|
- in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
|
|
|
- in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
|
|
|
+ /*
|
|
|
+ * Infinity in encoded as (,,0)
|
|
|
+ */
|
|
|
+ in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
|
|
|
if (P256_LIMBS == 8)
|
|
|
- in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
|
|
|
- in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
|
|
|
+ in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
|
|
|
|
|
|
- in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
|
|
|
- in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
|
|
|
+ in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]);
|
|
|
if (P256_LIMBS == 8)
|
|
|
- in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
|
|
|
- in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
|
|
|
+ in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]);
|
|
|
|
|
|
in1infty = is_zero(in1infty);
|
|
|
in2infty = is_zero(in2infty);
|
|
|
@@ -416,15 +438,16 @@ static void ecp_nistz256_point_add_affine(P256_POINT *r,
|
|
|
const BN_ULONG *in2_y = b->Y;
|
|
|
|
|
|
/*
|
|
|
- * In affine representation we encode infty as (0,0), which is not on the
|
|
|
- * curve, so it is OK
|
|
|
+ * Infinity in encoded as (,,0)
|
|
|
*/
|
|
|
- in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
|
|
|
- in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
|
|
|
+ in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
|
|
|
if (P256_LIMBS == 8)
|
|
|
- in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
|
|
|
- in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
|
|
|
+ in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
|
|
|
|
|
|
+ /*
|
|
|
+ * In affine representation we encode infinity as (0,0), which is
|
|
|
+ * not on the curve, so it is OK
|
|
|
+ */
|
|
|
in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
|
|
|
in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
|
|
|
if (P256_LIMBS == 8)
|
|
|
@@ -741,9 +764,8 @@ static int ecp_nistz256_is_affine_G(const EC_POINT *generator)
|
|
|
{
|
|
|
return (generator->X.top == P256_LIMBS) &&
|
|
|
(generator->Y.top == P256_LIMBS) &&
|
|
|
- (generator->Z.top == (P256_LIMBS - P256_LIMBS / 8)) &&
|
|
|
is_equal(generator->X.d, def_xG) &&
|
|
|
- is_equal(generator->Y.d, def_yG) && is_one(generator->Z.d);
|
|
|
+ is_equal(generator->Y.d, def_yG) && is_one(&generator->Z);
|
|
|
}
|
|
|
|
|
|
static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx)
|
|
|
@@ -1249,6 +1271,8 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group,
|
|
|
} else
|
|
|
#endif
|
|
|
{
|
|
|
+ BN_ULONG infty;
|
|
|
+
|
|
|
/* First window */
|
|
|
wvalue = (p_str[0] << 1) & mask;
|
|
|
index += window_size;
|
|
|
@@ -1260,7 +1284,30 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group,
|
|
|
ecp_nistz256_neg(p.p.Z, p.p.Y);
|
|
|
copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
|
|
|
|
|
|
- memcpy(p.p.Z, ONE, sizeof(ONE));
|
|
|
+ /*
|
|
|
+ * Since affine infinity is encoded as (0,0) and
|
|
|
+ * Jacobian ias (,,0), we need to harmonize them
|
|
|
+ * by assigning "one" or zero to Z.
|
|
|
+ */
|
|
|
+ infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] |
|
|
|
+ p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]);
|
|
|
+ if (P256_LIMBS == 8)
|
|
|
+ infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] |
|
|
|
+ p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]);
|
|
|
+
|
|
|
+ infty = 0 - is_zero(infty);
|
|
|
+ infty = ~infty;
|
|
|
+
|
|
|
+ p.p.Z[0] = ONE[0] & infty;
|
|
|
+ p.p.Z[1] = ONE[1] & infty;
|
|
|
+ p.p.Z[2] = ONE[2] & infty;
|
|
|
+ p.p.Z[3] = ONE[3] & infty;
|
|
|
+ if (P256_LIMBS == 8) {
|
|
|
+ p.p.Z[4] = ONE[4] & infty;
|
|
|
+ p.p.Z[5] = ONE[5] & infty;
|
|
|
+ p.p.Z[6] = ONE[6] & infty;
|
|
|
+ p.p.Z[7] = ONE[7] & infty;
|
|
|
+ }
|
|
|
|
|
|
for (i = 1; i < 37; i++) {
|
|
|
unsigned int off = (index - 1) / 8;
|
|
|
@@ -1331,7 +1378,7 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group,
|
|
|
!ecp_nistz256_set_words(&r->Z, p.p.Z)) {
|
|
|
goto err;
|
|
|
}
|
|
|
- r->Z_is_one = is_one(p.p.Z) & 1;
|
|
|
+ r->Z_is_one = is_one(&r->Z) & 1;
|
|
|
|
|
|
ret = 1;
|
|
|
|