|  | @@ -82,19 +82,36 @@ typedef struct ec_pre_comp_st {
 | 
	
		
			
				|  |  |  } EC_PRE_COMP;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  /* Functions implemented in assembly */
 | 
	
		
			
				|  |  | +/*
 | 
	
		
			
				|  |  | + * Most of below mentioned functions *preserve* the property of inputs
 | 
	
		
			
				|  |  | + * being fully reduced, i.e. being in [0, modulus) range. Simply put if
 | 
	
		
			
				|  |  | + * inputs are fully reduced, then output is too. Note that reverse is
 | 
	
		
			
				|  |  | + * not true, in sense that given partially reduced inputs output can be
 | 
	
		
			
				|  |  | + * either, not unlikely reduced. And "most" in first sentence refers to
 | 
	
		
			
				|  |  | + * the fact that given the calculations flow one can tolerate that
 | 
	
		
			
				|  |  | + * addition, 1st function below, produces partially reduced result *if*
 | 
	
		
			
				|  |  | + * multiplications by 2 and 3, which customarily use addition, fully
 | 
	
		
			
				|  |  | + * reduce it. This effectively gives two options: a) addition produces
 | 
	
		
			
				|  |  | + * fully reduced result [as long as inputs are, just like remaining
 | 
	
		
			
				|  |  | + * functions]; b) addition is allowed to produce partially reduced
 | 
	
		
			
				|  |  | + * result, but multiplications by 2 and 3 perform additional reduction
 | 
	
		
			
				|  |  | + * step. Choice between the two can be platform-specific, but it was a)
 | 
	
		
			
				|  |  | + * in all cases so far...
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +/* Modular add: res = a+b mod P   */
 | 
	
		
			
				|  |  | +void ecp_nistz256_add(BN_ULONG res[P256_LIMBS],
 | 
	
		
			
				|  |  | +                      const BN_ULONG a[P256_LIMBS],
 | 
	
		
			
				|  |  | +                      const BN_ULONG b[P256_LIMBS]);
 | 
	
		
			
				|  |  |  /* Modular mul by 2: res = 2*a mod P */
 | 
	
		
			
				|  |  |  void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS],
 | 
	
		
			
				|  |  |                             const BN_ULONG a[P256_LIMBS]);
 | 
	
		
			
				|  |  | -/* Modular div by 2: res = a/2 mod P */
 | 
	
		
			
				|  |  | -void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS],
 | 
	
		
			
				|  |  | -                           const BN_ULONG a[P256_LIMBS]);
 | 
	
		
			
				|  |  |  /* Modular mul by 3: res = 3*a mod P */
 | 
	
		
			
				|  |  |  void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS],
 | 
	
		
			
				|  |  |                             const BN_ULONG a[P256_LIMBS]);
 | 
	
		
			
				|  |  | -/* Modular add: res = a+b mod P   */
 | 
	
		
			
				|  |  | -void ecp_nistz256_add(BN_ULONG res[P256_LIMBS],
 | 
	
		
			
				|  |  | -                      const BN_ULONG a[P256_LIMBS],
 | 
	
		
			
				|  |  | -                      const BN_ULONG b[P256_LIMBS]);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/* Modular div by 2: res = a/2 mod P */
 | 
	
		
			
				|  |  | +void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS],
 | 
	
		
			
				|  |  | +                           const BN_ULONG a[P256_LIMBS]);
 | 
	
		
			
				|  |  |  /* Modular sub: res = a-b mod P   */
 | 
	
		
			
				|  |  |  void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS],
 | 
	
		
			
				|  |  |                        const BN_ULONG a[P256_LIMBS],
 | 
	
	
		
			
				|  | @@ -205,21 +222,29 @@ static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS],
 | 
	
		
			
				|  |  |      return is_zero(res);
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -static BN_ULONG is_one(const BN_ULONG a[P256_LIMBS])
 | 
	
		
			
				|  |  | +static BN_ULONG is_one(const BIGNUM *z)
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  | -    BN_ULONG res;
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    res = a[0] ^ ONE[0];
 | 
	
		
			
				|  |  | -    res |= a[1] ^ ONE[1];
 | 
	
		
			
				|  |  | -    res |= a[2] ^ ONE[2];
 | 
	
		
			
				|  |  | -    res |= a[3] ^ ONE[3];
 | 
	
		
			
				|  |  | -    if (P256_LIMBS == 8) {
 | 
	
		
			
				|  |  | -        res |= a[4] ^ ONE[4];
 | 
	
		
			
				|  |  | -        res |= a[5] ^ ONE[5];
 | 
	
		
			
				|  |  | -        res |= a[6] ^ ONE[6];
 | 
	
		
			
				|  |  | +    BN_ULONG res = 0;
 | 
	
		
			
				|  |  | +    BN_ULONG *a = z->d;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    if (z->top == (P256_LIMBS - P256_LIMBS / 8)) {
 | 
	
		
			
				|  |  | +        res = a[0] ^ ONE[0];
 | 
	
		
			
				|  |  | +        res |= a[1] ^ ONE[1];
 | 
	
		
			
				|  |  | +        res |= a[2] ^ ONE[2];
 | 
	
		
			
				|  |  | +        res |= a[3] ^ ONE[3];
 | 
	
		
			
				|  |  | +        if (P256_LIMBS == 8) {
 | 
	
		
			
				|  |  | +            res |= a[4] ^ ONE[4];
 | 
	
		
			
				|  |  | +            res |= a[5] ^ ONE[5];
 | 
	
		
			
				|  |  | +            res |= a[6] ^ ONE[6];
 | 
	
		
			
				|  |  | +            /*
 | 
	
		
			
				|  |  | +             * no check for a[7] (being zero) on 32-bit platforms,
 | 
	
		
			
				|  |  | +             * because value of "one" takes only 7 limbs.
 | 
	
		
			
				|  |  | +             */
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        res = is_zero(res);
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    return is_zero(res);
 | 
	
		
			
				|  |  | +    return res;
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  static int ecp_nistz256_set_words(BIGNUM *a, BN_ULONG words[P256_LIMBS])
 | 
	
	
		
			
				|  | @@ -315,19 +340,16 @@ static void ecp_nistz256_point_add(P256_POINT *r,
 | 
	
		
			
				|  |  |      const BN_ULONG *in2_y = b->Y;
 | 
	
		
			
				|  |  |      const BN_ULONG *in2_z = b->Z;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    /* We encode infinity as (0,0), which is not on the curve,
 | 
	
		
			
				|  |  | -     * so it is OK. */
 | 
	
		
			
				|  |  | -    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
 | 
	
		
			
				|  |  | -                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
 | 
	
		
			
				|  |  | +    /*
 | 
	
		
			
				|  |  | +     * Infinity in encoded as (,,0)
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
 | 
	
		
			
				|  |  |      if (P256_LIMBS == 8)
 | 
	
		
			
				|  |  | -        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
 | 
	
		
			
				|  |  | -                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
 | 
	
		
			
				|  |  | +        in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
 | 
	
		
			
				|  |  | -                in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
 | 
	
		
			
				|  |  | +    in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]);
 | 
	
		
			
				|  |  |      if (P256_LIMBS == 8)
 | 
	
		
			
				|  |  | -        in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
 | 
	
		
			
				|  |  | -                     in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
 | 
	
		
			
				|  |  | +        in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      in1infty = is_zero(in1infty);
 | 
	
		
			
				|  |  |      in2infty = is_zero(in2infty);
 | 
	
	
		
			
				|  | @@ -416,15 +438,16 @@ static void ecp_nistz256_point_add_affine(P256_POINT *r,
 | 
	
		
			
				|  |  |      const BN_ULONG *in2_y = b->Y;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      /*
 | 
	
		
			
				|  |  | -     * In affine representation we encode infty as (0,0), which is not on the
 | 
	
		
			
				|  |  | -     * curve, so it is OK
 | 
	
		
			
				|  |  | +     * Infinity in encoded as (,,0)
 | 
	
		
			
				|  |  |       */
 | 
	
		
			
				|  |  | -    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
 | 
	
		
			
				|  |  | -                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
 | 
	
		
			
				|  |  | +    in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
 | 
	
		
			
				|  |  |      if (P256_LIMBS == 8)
 | 
	
		
			
				|  |  | -        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
 | 
	
		
			
				|  |  | -                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
 | 
	
		
			
				|  |  | +        in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    /*
 | 
	
		
			
				|  |  | +     * In affine representation we encode infinity as (0,0), which is
 | 
	
		
			
				|  |  | +     * not on the curve, so it is OK
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  |      in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
 | 
	
		
			
				|  |  |                  in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
 | 
	
		
			
				|  |  |      if (P256_LIMBS == 8)
 | 
	
	
		
			
				|  | @@ -741,9 +764,8 @@ static int ecp_nistz256_is_affine_G(const EC_POINT *generator)
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |      return (generator->X.top == P256_LIMBS) &&
 | 
	
		
			
				|  |  |          (generator->Y.top == P256_LIMBS) &&
 | 
	
		
			
				|  |  | -        (generator->Z.top == (P256_LIMBS - P256_LIMBS / 8)) &&
 | 
	
		
			
				|  |  |          is_equal(generator->X.d, def_xG) &&
 | 
	
		
			
				|  |  | -        is_equal(generator->Y.d, def_yG) && is_one(generator->Z.d);
 | 
	
		
			
				|  |  | +        is_equal(generator->Y.d, def_yG) && is_one(&generator->Z);
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx)
 | 
	
	
		
			
				|  | @@ -1249,6 +1271,8 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group,
 | 
	
		
			
				|  |  |              } else
 | 
	
		
			
				|  |  |  #endif
 | 
	
		
			
				|  |  |              {
 | 
	
		
			
				|  |  | +                BN_ULONG infty;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |                  /* First window */
 | 
	
		
			
				|  |  |                  wvalue = (p_str[0] << 1) & mask;
 | 
	
		
			
				|  |  |                  index += window_size;
 | 
	
	
		
			
				|  | @@ -1260,7 +1284,30 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group,
 | 
	
		
			
				|  |  |                  ecp_nistz256_neg(p.p.Z, p.p.Y);
 | 
	
		
			
				|  |  |                  copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -                memcpy(p.p.Z, ONE, sizeof(ONE));
 | 
	
		
			
				|  |  | +                /*
 | 
	
		
			
				|  |  | +                 * Since affine infinity is encoded as (0,0) and
 | 
	
		
			
				|  |  | +                 * Jacobian ias (,,0), we need to harmonize them
 | 
	
		
			
				|  |  | +                 * by assigning "one" or zero to Z.
 | 
	
		
			
				|  |  | +                 */
 | 
	
		
			
				|  |  | +                infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] |
 | 
	
		
			
				|  |  | +                         p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]);
 | 
	
		
			
				|  |  | +                if (P256_LIMBS == 8)
 | 
	
		
			
				|  |  | +                    infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] |
 | 
	
		
			
				|  |  | +                              p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                infty = 0 - is_zero(infty);
 | 
	
		
			
				|  |  | +                infty = ~infty;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                p.p.Z[0] = ONE[0] & infty;
 | 
	
		
			
				|  |  | +                p.p.Z[1] = ONE[1] & infty;
 | 
	
		
			
				|  |  | +                p.p.Z[2] = ONE[2] & infty;
 | 
	
		
			
				|  |  | +                p.p.Z[3] = ONE[3] & infty;
 | 
	
		
			
				|  |  | +                if (P256_LIMBS == 8) {
 | 
	
		
			
				|  |  | +                    p.p.Z[4] = ONE[4] & infty;
 | 
	
		
			
				|  |  | +                    p.p.Z[5] = ONE[5] & infty;
 | 
	
		
			
				|  |  | +                    p.p.Z[6] = ONE[6] & infty;
 | 
	
		
			
				|  |  | +                    p.p.Z[7] = ONE[7] & infty;
 | 
	
		
			
				|  |  | +                }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |                  for (i = 1; i < 37; i++) {
 | 
	
		
			
				|  |  |                      unsigned int off = (index - 1) / 8;
 | 
	
	
		
			
				|  | @@ -1331,7 +1378,7 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group,
 | 
	
		
			
				|  |  |          !ecp_nistz256_set_words(&r->Z, p.p.Z)) {
 | 
	
		
			
				|  |  |          goto err;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  | -    r->Z_is_one = is_one(p.p.Z) & 1;
 | 
	
		
			
				|  |  | +    r->Z_is_one = is_one(&r->Z) & 1;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      ret = 1;
 | 
	
		
			
				|  |  |  
 |