/*******************************************************************\ * ModInv.c * Test program for modular inverse algorithms based on xGCD * using GMP 4.1.2 compiled to Win32 DLL * * Version 1.0: April 23, 2004 by Laszlo Hars * Initial version * Version 1.1: April 29, 2004 by Laszlo Hars * Modified Left-Shift alg written * Unified calling of ModInvX * Options for * odd modulus * ensuring existence of inverse * result verification * Version 1.2: May 1, 2004 by Laszlo Hars * Cost counting moved into arithmetic and shift macros * Shifts agregated * Version 1.3: July 20, 2004 by Laszlo Hars * New optimization tricks added (..X, ..D, ..Y) * Fixed bug: shift macros calculated cost after parameter change * Fixed bug: in len2 binary length was used instead of word length * Version 1.4: Aug 29, 2004 by Laszlo Hars * Shifting Euclidean algorithms (E,F) added * LEN(0) = 0 (was 1) * Version 1.4: Sep 2, 2004 by Laszlo Hars * Hybrid versions added (G,H) \*******************************************************************/ #include #include #include #include #include #include #define __gmp_bits_per_limb BITS_PER_MP_LIMB #include "gmp.h" #define BITS_PER_MP_LIMB 32 // General macros #define ABS(x) ((x) < 0 ?-(x) : (x)) #define MIN(x,y) ((x) < (y) ? (x) : (y)) #define MAX(x,y) ((x) > (y) ? (x) : (y)) #define MA3(x,y,z) ((x) > (y) ? ((x)>(z)?(x):(z)) : ((y)>(z)?(y):(z))) #define assert(x){if(!(x)){printf("Assert error\n"); return 1;}} typedef unsigned __int16 uint16; typedef unsigned __int32 uint32; typedef __int32 int32; typedef unsigned __int64 uint64; typedef __int64 int64; // Globals: #iterations; bit complexity of subtractions, shifts; hystogram of shift lengths uint64 Iters = 0, Sub[2]={0,0}, Sft[2]={0,0}, Hst[2][4]={{0,0,0,0},{0,0,0,0}}; int uv; // temp store for testing for U_V mpz_t R,S,T,U,V,W;// global temporary mpz variables // mpz macros #define ST0(X) mpz_set_ui(X,0UL) #define ST1(X) mpz_set_ui(X,1UL) #define SGN(X) mpz_sgn(X) #define EVN(X) mpz_even_p(X) #define ODD(X) mpz_odd_p(X) #define LS0(X) mpz_scan1(X,0) #define CMP(X,Y) mpz_cmp(X,Y) #define SET(X,Y) mpz_set(X,Y) #define SWP(X,Y) mpz_swap(X,Y) #define NEG(X,Y) mpz_neg(X,Y) #define BIT(X,B) mpz_tstbit(X,B) #define LEN(X) (SGN(X)==0? 0 : mpz_sizeinbase(X,2)) #define U_V(Y) ((Y->_mp_d==U->_mp_d)||(Y->_mp_d==V->_mp_d)) // mpz macros with cost, 0's in operand bit position 0..L-1 #define SHR(X,Y,S,L) {if((S)>0){Sft[uv=U_V(Y)]+=MAX(LEN(Y),L)-(L); Hst[uv][(S)>3?0:(S)]++;}\ mpz_tdiv_q_2exp(X,Y,S);} #define SHL(X,Y,S,L) {if((S)>0){Sft[uv=U_V(Y)]+=MAX(LEN(Y),L)-(L); Hst[uv][(S)>3?0:(S)]++;}\ mpz_mul_2exp(X,Y,S);} #define ADD(X,Y,Z,L) {Sub[U_V(Y)] += MA3(LEN(Y),LEN(Z),L)-(L); mpz_add(X,Y,Z);} #define SUB(X,Y,Z,L) {Sub[U_V(Y)] += MA3(LEN(Y),LEN(Z),L)-(L); mpz_sub(X,Y,Z);} #define A_S(U,V,R,S,L){if (SGN(U)==SGN(V)){SUB(U,U,V,L); SUB(R,R,S,0);}\ else {ADD(U,U,V,L); ADD(R,R,S,0);}} /**********************************************************************\ * UL0 - # Unsigned Least significant 0 * x: GMP multi-precision integer \**********************************************************************/ uint32 UL0(mpz_t x) { uint32 u; if (x->_mp_size < 0) { x->_mp_size = -x->_mp_size; u = mpz_scan1(x,0); x->_mp_size = -x->_mp_size; return u; } return mpz_scan1(x,0); } /**********************************************************************\ * print_16 - prints x in hexadecimal. Leading space if positive * x: GMP multi-precision integer to be printed \**********************************************************************/ void print_16(mpz_t x) { char *STR; if( mpz_sgn(x) >= 0) printf(" "); else printf("-"); mpz_abs(x,x); STR = mpz_get_str(NULL, 16, x); printf("%s\n",STR); } /**********************************************************************\ * ModInvE - shift-Euclidean modular inverse [SE] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S, T, W: global temporary variables \**********************************************************************/ void ModInvE( mpz_t B, mpz_t M, mpz_t A) { int sfts; if (CMP(A,M) < 0) { // U > V SET(U,M); ST0(R); SET(V,A); ST1(S); } else { SET(V,M); ST0(S); SET(U,A); ST1(R); } while(LEN(V) > 1) { Iters++; sfts = LEN(U)-LEN(V); SHL(T,V,sfts,0); SHL(W,S,sfts,0); A_S(U,T,R,W,0); if (LEN(U) < LEN(V)) { SWP(U,V); SWP(R,S); } } if (SGN(V)==0) { ST0(B); return; } if (SGN(V) <0) NEG(S,S); if ( CMP(S,M)>0) SUB(B,S,M,0) else if(SGN(S)<0) ADD(B,S,M,0) else SET(B,S); } /**********************************************************************\ * MSW The MS word (left justified) of an mpz integer * X mpz_t * L2 binary length \**********************************************************************/ unsigned long MSW(mpz_t X, int L2) { unsigned long *last = X->_mp_d + ABS(X->_mp_size) - 1, L = ((L2-1)&(BITS_PER_MP_LIMB-1)) + 1; // 1..32; if (L == BITS_PER_MP_LIMB) return *last; if (L2 < BITS_PER_MP_LIMB) return (*last)<<(BITS_PER_MP_LIMB-L); else return ((*last)<<(BITS_PER_MP_LIMB-L)) | ((*(last-1))>>L); } /**********************************************************************\ * log2 index of the leftmost 1-bit * x uint32 * For P6 single assembler instruction BSR * http://www.jjj.de/bitwizardry/files/bithigh.h \**********************************************************************/ int log2(uint32 x) { int r = 0; if ( x & 0xffff0000 ) { x >>=16; r +=16; } if ( x & 0x0000ff00 ) { x >>= 8; r += 8; } if ( x & 0x000000f0 ) { x >>= 4; r += 4; } if ( x & 0x0000000c ) { x >>= 2; r += 2; } if ( x & 0x00000002 ) { r += 1; } return r; } /**********************************************************************\ * ModInvF - shift-Euclidean modular inverse with optimum shift[SE3] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S, T, W: global temporary variables \**********************************************************************/ void ModInvF( mpz_t B, mpz_t M, mpz_t A) { int lu, lv, sfts, mu, mv, mm, m0, mp, hm, hp, lm, L0, lp; if (CMP(A,M) < 0) { // U > V SET(U,M); ST0(R); SET(V,A); ST1(S); } else { SET(V,M); ST0(S); SET(U,A); ST1(R); } lu = LEN(U); lv = LEN(V); for(;;) { Iters++; // check if sfts, sfts+1 or sfts-1 reduces U the most sfts = lu-lv; mu = MSW(U,lu)>>3; mv = MSW(V,lv)>>3; mm = ABS(mu-mv/2); lm = log2(mm-1); hm = log2(mm+1); mp = ABS(mu-mv*2); lp = log2(mp-2); hp = log2(mp+2); m0 = ABS(mu-mv); L0 = log2(m0-1); if ((hp < L0) && (hp < lm)) ++sfts; else if ((hm < L0) && (hm < lp) && (sfts > 0)) --sfts; SHL(T,V,sfts,0); SHL(W,S,sfts,0); A_S(U,T,R,W,0); lu = LEN(U); if (lu < 2) break; if (lu < lv) { SWP(U,V); SWP(R,S); sfts = lu; lu = lv; lv = sfts; } } if (SGN(U)== 0) { ST0(B); return; } if(SGN(U)<0) NEG(B,R); else SET(B,R); while(CMP(B,M)>0) SUB(B,B,M,0); while(SGN(B) < 0) ADD(B,B,M,0); } /**********************************************************************\ * ModInvG - shift-Euclidean with optimum shift, check for right shift [no improvement] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S, T, W: global temporary variables \**********************************************************************/ void ModInvG( mpz_t B, mpz_t M, mpz_t A) { int lu, lv, sfts, mu, mv, mm, m0, mp, hm, hp, lm, L0, lp; uint32 u0, r0; if (CMP(A,M) < 0) { // U > V SET(U,M); ST0(R); SET(V,A); ST1(S); } else { SET(V,M); ST0(S); SET(U,A); ST1(R); } lu = LEN(U); lv = LEN(V); for(;;) { Iters++; // check for right shifts: with odd M never happens! if ((BIT(U,0)==BIT(V,0)) && (BIT(R,0)==BIT(S,0))) { printf("\nszuret\n"); A_S(U,V,R,S,0); u0 = UL0(U); r0 = UL0(R); sfts = MIN(u0,r0); SHR(U,U,sfts,u0); SHR(R,R,sfts,r0); lu = LEN(U); if (lu < 2) break; if (lu < lv) { SWP(U,V); SWP(R,S); // LEN(U) >= LEN(V) sfts = lu; lu = lv; lv = sfts; } } // check if sfts, sfts+1 or sfts-1 reduces U the most sfts = lu-lv; mu = MSW(U,lu)>>3; mv = MSW(V,lv)>>3; mm = ABS(mu-mv/2); lm = log2(mm-1); hm = log2(mm+1); mp = ABS(mu-mv*2); lp = log2(mp-2); hp = log2(mp+2); m0 = ABS(mu-mv); L0 = log2(m0-1); if ((hp < L0) && (hp < lm)) ++sfts; else if ((hm < L0) && (hm < lp) && (sfts > 0)) --sfts; SHL(T,V,sfts,0); SHL(W,S,sfts,0); A_S(U,T,R,W,0); lu = LEN(U); if (lu < 2) break; if (lu < lv) { SWP(U,V); SWP(R,S); // LEN(U) >= LEN(V) sfts = lu; lu = lv; lv = sfts; } } if (SGN(U)== 0) { ST0(B); return; } if(SGN(U)<0) NEG(B,R); else SET(B,R); while(CMP(B,M)>0) SUB(B,B,M,0); while(SGN(B) < 0) ADD(B,B,M,0); } /**********************************************************************\ * ModInvH - Hybrid: delayed Right shift with MS reduce [to be written] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S, T, W: global temporary variables \**********************************************************************/ void ModInvH( mpz_t B, mpz_t M, mpz_t A) { ModInvG(B,M,A); } /**********************************************************************\ * ModInvL - Left-shift binary modular inverse [LS1] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvL( mpz_t B, mpz_t M, mpz_t A) { int j, cu = 0, cv = 0, sfts; uint32 n = (uint32)MAX(LEN(M),LEN(A)); SET(U,M); ST0(R); SET(V,A); ST1(S); while( (LEN(U)>(cu+1)) && (LEN(V)>(cv+1)) ) { Iters++; if (LEN(U) < n) { sfts = n-LEN(U); SHL(U,U,sfts,cu) j = MIN(sfts,MAX(0,cv-cu)); cu += sfts; SHR(S,S,j,j) SHL(R,R,sfts-j,0) } if (LEN(V) < n) { sfts = n-LEN(V); SHL(V,V,sfts,cv) j = MIN(sfts,MAX(0,cu-cv)); cv += sfts; SHR(R,R,j,j) SHL(S,S,sfts-j,0) } if (cu<=cv) A_S(U,V,R,S,cu) else A_S(V,U,S,R,cv) if((SGN(U)==0)||(SGN(V)==0)) { ST0(B); return; } } if (LEN(V)==(cv+1)) { SWP(U,V); SWP(R,S); } if (SGN(U) < 0) if (SGN(R)<0) NEG(R,R); else SUB(R,M,R,0) if (SGN(R)<0) ADD(B,M,R,0) else SET(B,R); } /**********************************************************************\ * len2 (H)igh and (L)ow estimate of binary length of a|X|-b|Y| * L and H very rarely differ * doubles are used for code simplicity, instead of bit counting \**********************************************************************/ void len2(int a, mpz_t X, int b, mpz_t Y, int *L, int *H) { double d, x1, x2, y1, y2; int ix = ABS(X->_mp_size)-1, iy = ABS(Y->_mp_size)-1; if (ix < 1) x1 = 0; else x1 = *(X->_mp_d+ix-1); if (ix < 0) x2 = 0; else x2 = (uint64)*(X->_mp_d+ix) << BITS_PER_MP_LIMB; if (iy < 1) y1 = 0; else y1 = *(Y->_mp_d+iy-1); if (iy < 0) y2 = 0; else y2 = (uint64)*(Y->_mp_d+iy) << BITS_PER_MP_LIMB; d = fabs(a*(x1+x2) - b*((y1+1)+y2)); // subtract upper bound *L = d < 2.0 ? 1 : (int)_logb(d)+1; d = fabs(a*((x1+1)+x2) - b*(y1+y2)); // subtract from upper bound *H = d < 2.0 ? 1 : (int)_logb(d)+1; } /**********************************************************************\ * ModInvX - Extra Doubling Left-shift binary modular inverse [LS3] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvX( mpz_t B, mpz_t M, mpz_t A) { int cu = 0, cv = 0, j, sfts, L11, L12, L21, H11, H12, H21; uint32 n = (uint32)MAX(LEN(M),LEN(A)); SET(U,M); ST0(R); SET(V,A); ST1(S); while( (LEN(U)>(cu+1)) && (LEN(V)>(cv+1)) ) { Iters++; if (LEN(U) < n) { sfts = n-LEN(U); SHL(U,U,sfts,cu) j = MIN(sfts,MAX(0,cv-cu)); cu += sfts; SHR(S,S,j,j) SHL(R,R,sfts-j,0) } if (LEN(V) < n) { sfts = n-LEN(V); SHL(V,V,sfts,cv) j = MIN(sfts,MAX(0,cu-cv)); cv += sfts; SHR(R,R,j,j) SHL(S,S,sfts-j,0) } len2(1,U,1,V,&L11,&H11); len2(2,U,1,V,&L21,&H21); len2(1,U,2,V,&L12,&H12); if (cu<=cv) if ((cu0)&& (H21-1 0)) { SHR(U,U,1,cu); SHL(S,S,1,0); cu--; A_S(U,V,R,S,cu) } else { A_S(U,V,R,S,cu) } else //cu>cv if ((H12-1 0)) { SHR(V,V,1,cv); SHL(R,R,1,0); cv--; A_S(V,U,S,R,cv) } else { A_S(V,U,S,R,cv) } if ((SGN(U)==0)||(SGN(V)==0)) { ST0(B); return; } } if (LEN(V)==(cv+1)) { SWP(U,V); SWP(R,S); } if (SGN(U) < 0) if (SGN(R)<0) NEG(R,R); else SUB(R,M,R,0) if (SGN(R)<0) ADD(B,M,R,0) else SET(B,R); } /**********************************************************************\ * ModInvM - Modified Left-shift binary modular inverse * 13.6% fewer iterations. When helps, 2 additions are replaced by two shifts * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvM( mpz_t B, mpz_t M, mpz_t A) { int cu = 0, cv = 0, j, sfts, L11, L12, L21, H11, H12, H21; uint32 n = (uint32)MAX(LEN(M),LEN(A)); SET(U,M); ST0(R); SET(V,A); ST1(S); while( (LEN(U)>(cu+1)) && (LEN(V)>(cv+1)) ) { Iters++; if (LEN(U) < n) { sfts = n-LEN(U); SHL(U,U,sfts,cu) j = MIN(sfts,MAX(0,cv-cu)); cu += sfts; SHR(S,S,j,j) SHL(R,R,sfts-j,0) } if (LEN(V) < n) { sfts = n-LEN(V); SHL(V,V,sfts,cv) j = MIN(sfts,MAX(0,cu-cv)); cv += sfts; SHR(R,R,j,j) SHL(S,S,sfts-j,0) } len2(1,U,1,V,&L11,&H11); len2(2,U,1,V,&L21,&H21); len2(1,U,2,V,&L12,&H12); if (cu<=cv) { if ((cu 0)) { SHR(U,U,1,cu); SHL(S,S,1,0); cu--;} if (SGN(U)==SGN(V)) { SUB(U,U,V,cu); SUB(R,R,S,0);} else { ADD(U,U,V,cu); ADD(R,R,S,0);} } else { //cu>cv if ((H12-1 0)) { SHR(V,V,1,cv); SHL(R,R,1,0); cv--;} if (SGN(U)==SGN(V)) { SUB(V,V,U,cv); SUB(S,S,R,0);} else { ADD(V,V,U,cv); ADD(S,S,R,0);} } if ((SGN(U)==0)||(SGN(V)==0)) { ST0(B); return; } } if (LEN(V)==(cv+1)) { SWP(U,V); SWP(R,S); } if (SGN(U) < 0) if (SGN(R)<0) NEG(R,R); else SUB(R,M,R,0) if (SGN(R)<0) ADD(B,M,R,0) else SET(B,R); } /**********************************************************************\ * ModInvR - Right-shift binary modular inverse [RS1] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvR( mpz_t B, mpz_t M, mpz_t A) { int sw = 0, sfts, i, j; uint32 n = (uint32)MAX(LEN(M),LEN(A)); if (EVN(M)) { SWP(M,A); sw = 1;} if (EVN(M)) { ST0(B); if(sw) SWP(M,A); return;} SET(U,M); ST0(R); SET(V,A); ST1(S); sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { i += j=MIN(sfts-i,LS0(S)); SHR(S,S,j,j) if (i >= sfts) break; if (CMP(S,M) > 0) SUB(S,S,M,0) else ADD(S,S,M,0) } for(;;) { Iters++; // each subtraction is followed by shifts if (CMP(U,V) > 0) { SUB(U,U,V,0) SUB(R,R,S,0) sfts = LS0(U); SHR(U,U,sfts,sfts) // combined shift for(i = 0; ;) { i += j=MIN(sfts-i,LS0(R)); SHR(R,R,j,j) if (i >= sfts) break; if (CMP(R,M)>0) SUB(R,R,M,0) else ADD(R,R,M,0) } } else { SUB(V,V,U,0) if (SGN(V)==0) break; SUB(S,S,R,0) sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { i += j=MIN(sfts-i,LS0(S)); SHR(S,S,j,j) if (i >= sfts) break; if (CMP(S,M)>0) SUB(S,S,M,0) else ADD(S,S,M,0) } } } if (LEN(U)>1) { ST0(B); if(sw) SWP(M,A); return; } if (CMP(R,M) >= 0) SUB(R,R,M,0); if (SGN(R) < 0) ADD(R,R,M,0); if (sw==0) SET(B,R); else { SWP(M,A); mpz_mul(S,M,R); mpz_sub_ui(S,S,1); mpz_divexact(S,S,A); mpz_sub(B,M,S); if (CMP(B,M) >= 0) SUB(B,B,M,0);} } /**********************************************************************\ * ModInvD - Delayed halving Right-shift binary modular inverse [RSDH] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvD( mpz_t B, mpz_t M, mpz_t A) { int sw = 0, sfts, i = 0, j, k = 0; uint32 n = (uint32)MAX(LEN(M),LEN(A)); if (EVN(M)) { SWP(M,A); sw = 1;} if (EVN(M)) { ST0(B); if(sw) SWP(M,A); return;} SET(U,M); ST0(R); SET(V,A); ST1(S); k += sfts=LS0(V); SHR(V,V,sfts,sfts) // combined shift, making U, V odd SHL(R,R,sfts,0) for(;;) { Iters++; // each subtraction is followed by shifts if (CMP(U,V) > 0) { SUB(U,U,V,0) SUB(R,R,S,0) k += sfts=LS0(U); SHR(U,U,sfts,sfts) SHL(S,S,sfts,0) } else { SUB(V,V,U,0) if(SGN(V)==0) break; SUB(S,S,R,0) k += sfts=LS0(V); SHR(V,V,sfts,sfts) SHL(R,R,sfts,0) } } if (LEN(U)>1) { ST0(B); if(sw) SWP(M,A); return; } if (CMP(R,M) >= 0) SUB(R,R,M,0) if (SGN(R) < 0) ADD(R,R,M,0) for(;;) { i += j=MIN(k-i,LS0(R)); SHR(R,R,j,j) if(i >= k) break; ADD(R,R,M,0) } if (sw==0) SET(B,R); else { SWP(M,A); mpz_mul(S,M,R); mpz_sub_ui(S,S,1); mpz_divexact(S,S,A); mpz_sub(B,M,S); if (CMP(B,M) >= 0) SUB(B,B,M,0);} } /**********************************************************************\ * ModInvY - delaY halving Plus/minus Right-shift binary modular inverse [RSDH+-] * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvY( mpz_t B, mpz_t M, mpz_t A) { int b1, sw = 0, sfts, i = 0, j, k = 0; uint32 n = (uint32)MAX(LEN(M),LEN(A)); if (EVN(M)) { SWP(M,A); sw = 1;} if (EVN(M)) { ST0(B); if(sw) SWP(M,A); return;} b1 = BIT(M,1); SET(U,M); ST0(R); SET(V,A); ST1(S); k += sfts=LS0(V); SHR(V,V,sfts,sfts) // combined shift, making U, V odd SHL(R,R,sfts,0) for(;;) { Iters++; // each subtraction is followed by shifts if (CMP(U,V) > 0) { if (BIT(U,1)==BIT(V,1)) { SUB(U,U,V,0) SUB(R,R,S,0) } else { ADD(U,U,V,0) ADD(R,R,S,0) } k += sfts=LS0(U); SHR(U,U,sfts,sfts) SHL(S,S,sfts,0) } else { if (BIT(U,1)==BIT(V,1)) { SUB(V,V,U,0) if(SGN(V)==0)break; SUB(S,S,R,0) } else { ADD(V,V,U,0) ADD(S,S,R,0) } k += sfts=LS0(V); SHR(V,V,sfts,sfts) SHL(R,R,sfts,0) } } if (LEN(U)>1) { ST0(B); if(sw) SWP(M,A); return; } if (CMP(R,M) >= 0) SUB(R,R,M,0) for(;;) { i += j=MIN(k-i,LS0(R)); SHR(R,R,j,j) if(i >= k) break; if((i1 bits to shift, clear 2 LS bits SUB(R,R,M,0) else ADD(R,R,M,0) } if (SGN(R) < 0) ADD(R,R,M,0) if (sw==0) SET(B,R); else { SWP(M,A); mpz_mul(S,M,R); mpz_sub_ui(S,S,1); mpz_divexact(S,S,A); mpz_sub(B,M,S); if (CMP(B,M) >= 0) SUB(B,B,M,0);} } /**********************************************************************\ * ModInvP - Plus-Minus (Right-shift) binary modular inverse [RS2+-] * - the +/- trick is applied at reduction AND at parity fix for halving * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvP( mpz_t B, mpz_t M, mpz_t A) { int sw = 0, sfts, i, j, b1; uint32 n = (uint32)MAX(LEN(M),LEN(A)); if (EVN(M)) { SWP(M,A); sw = 1;} if (EVN(M)) { ST0(B); if(sw) SWP(M,A); return;} SET(U,M); ST0(R); SET(V,A); ST1(S); b1 = BIT(M,1); sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(S)); i+=j; SHR(S,S,j,j) if (i >= sfts) break; if (i < sfts-1) // at least 2 more bits to shift if (BIT(S,1)==b1) SUB(S,S,M,0) else ADD(S,S,M,0)// clear 2 bits else if (CMP(S,M) > 0) SUB(S,S,M,0) else ADD(S,S,M,0)// prevent growth } for(;;) { Iters++; // each subtraction is followed by shifts if (CMP(U,V) > 0) { if (BIT(U,1)==BIT(V,1)){ SUB(U,U,V,0) SUB(R,R,S,0) } else { ADD(U,U,V,0) ADD(R,R,S,0) } sfts = LS0(U); SHR(U,U,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(R)); i+=j; SHR(R,R,j,j) if (i >= sfts) break; if (i < sfts-1) // at least 2 more bits to shift if (BIT(R,1)==b1) SUB(R,R,M,0) else ADD(R,R,M,0) else if (CMP(R,M) > 0) SUB(R,R,M,0) else ADD(R,R,M,0) } } else { // U <= V if (BIT(U,1)==BIT(V,1)){ SUB(V,V,U,0) if(SGN(V)==0)break; SUB(S,S,R,0) } else { ADD(V,V,U,0) ADD(S,S,R,0) } sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(S)); i+=j; SHR(S,S,j,j) if (i >= sfts) break; if (i < sfts-1) // at least 2 more bits to shift if (BIT(S,1)==b1) SUB(S,S,M,0) else ADD(S,S,M,0) else if (CMP(S,M) > 0) SUB(S,S,M,0) else ADD(S,S,M,0) } } } if (LEN(U)>1) { ST0(B); if(sw) SWP(M,A); return; } if (CMP(R,M) >= 0) SUB(R,R,M,0); if (SGN(R) < 0) ADD(R,R,M,0); if (sw==0) SET(B,R); else { SWP(M,A); mpz_mul(S,M,R); mpz_sub_ui(S,S,1); mpz_divexact(S,S,A); mpz_sub(B,M,S); if (CMP(B,M) >= 0) SUB(B,B,M,0);} } /**********************************************************************\ * ModInvQ - Plus-Minus (Right-shift) binary modular inverse [RS+-] * - with the +/- trick at reduction only * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvQ( mpz_t B, mpz_t M, mpz_t A) { int sw = 0, sfts, i, j, b1; uint32 n = (uint32)MAX(LEN(M),LEN(A)); if (EVN(M)) { SWP(M,A); sw = 1;} if (EVN(M)) { ST0(B); if(sw) SWP(M,A); return;} SET(U,M); ST0(R); SET(V,A); ST1(S); b1 = BIT(M,1); sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(S)); i+=j; SHR(S,S,j,j) if (i >= sfts) break; if (CMP(S,M)> 0) SUB(S,S,M,0) else ADD(S,S,M,0) // prevent growth } for(;;) { Iters++; // each subtraction is followed by shifts if (CMP(U,V) > 0) { if (BIT(U,1) == BIT(V,1)) { SUB(U,U,V,0) SUB(R,R,S,0) } else { ADD(U,U,V,0) ADD(R,R,S,0) } sfts = LS0(U); SHR(U,U,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(R)); i+=j; SHR(R,R,j,j) if (i >= sfts) break; if (CMP(R,M)> 0) SUB(R,R,M,0) else ADD(R,R,M,0) } } else { // U <= V if (BIT(U,1) == BIT(V,1)) { SUB(V,V,U,0) if (SGN(V)==0) break; SUB(S,S,R,0) } else { ADD(V,V,U,0) ADD(S,S,R,0) } sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(S)); i+=j; SHR(S,S,j,j) if (i >= sfts) break; if (CMP(S,M)> 0) SUB(S,S,M,0) else ADD(S,S,M,0) } } } if (LEN(U)>1) { ST0(B); if(sw) SWP(M,A); return; } if (CMP(R,M) >= 0) SUB(R,R,M,0); if (SGN(R) < 0) ADD(R,R,M,0); if (sw==0) SET(B,R); else { SWP(M,A); mpz_mul(S,M,R); mpz_sub_ui(S,S,1); mpz_divexact(S,S,A); mpz_sub(B,M,S); if (CMP(B,M) >= 0) SUB(B,B,M,0);} } /**********************************************************************\ * ModInvS - Plus-Minus (Right-shift) binary modular inverse * - the +/- trick is applied at parity fix for halving * B: OUTPUT A^-1 mod M * M: INPUT modulus * A: INPUT * U, V, R, S: global temporary variables \**********************************************************************/ void ModInvS( mpz_t B, mpz_t M, mpz_t A) { int sw = 0, sfts, i, j, b1; uint32 n = (uint32)MAX(LEN(M),LEN(A)); if (EVN(M)) { SWP(M,A); sw = 1;} if (EVN(M)) { ST0(B); if(sw) SWP(M,A); return;} SET(U,M); ST0(R); SET(V,A); ST1(S); b1 = BIT(M,1); sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(S)); i+=j; SHR(S,S,j,j) if (i >= sfts) break; if (i < sfts-1) // at least 2 more bits to shift if (BIT(S,1)==b1) SUB(S,S,M,0) else ADD(S,S,M,0)// clear 2 bits else if (CMP(S,M) > 0) SUB(S,S,M,0) else ADD(S,S,M,0)// prevent growth } for(;;) { Iters++; // each subtraction is followed by shifts if (CMP(U,V) > 0) { SUB(U,U,V,0) SUB(R,R,S,0) sfts = LS0(U); SHR(U,U,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(R)); i+=j; SHR(R,R,j,j) if (i >= sfts) break; if (i < sfts-1) // at least 2 more bits to shift if (BIT(R,1)==b1) SUB(R,R,M,0) else ADD(R,R,M,0) else if (CMP(R,M) > 0) SUB(R,R,M,0) else ADD(R,R,M,0) } } else { // U <= V SUB(V,V,U,0) if (SGN(V)==0) break; SUB(S,S,R,0) sfts = LS0(V); SHR(V,V,sfts,sfts) // combined shift for(i = 0; ;) { j = MIN(sfts-i,LS0(S)); i+=j; SHR(S,S,j,j) if (i >= sfts) break; if (i < sfts-1) // at least 2 more bits to shift if (BIT(S,1)==b1) SUB(S,S,M,0) else ADD(S,S,M,0) else if (CMP(S,M) > 0) SUB(S,S,M,0) else ADD(S,S,M,0) } } } if (LEN(U)>1) { ST0(B); if(sw) SWP(M,A); return; } if (CMP(R,M) >= 0) SUB(R,R,M,0); if (SGN(R) < 0) ADD(R,R,M,0); if (sw==0) SET(B,R); else { SWP(M,A); mpz_mul(S,M,R); mpz_sub_ui(S,S,1); mpz_divexact(S,S,A); mpz_sub(B,M,S); if (CMP(B,M) >= 0) SUB(B,B,M,0);} } /**********************************************************************\ * main - Command line parameters: 'Alg' n seed repeat oddM * Generate n-bit random numbers, call Alg and Prints results \**********************************************************************/ int main(int argc, char* argv[]) { int i, n, seed, rept, oddM, vrfy, invX; char Alg; mpz_t A, B, M, X; gmp_randstate_t state; typedef void fv(mpz_t,mpz_t,mpz_t), (*pfv)(mpz_t,mpz_t,mpz_t); pfv ModInv; // Get parameters if ( argc != 8) { printf("Input: ALG Length Seed #Repeat OddM InvExists Verify\n"); return 1; } Alg = **++argv ; // algorithm ID: L, P, R, ... n = atoi(*++argv); // argument lengths seed = atoi(*++argv); // random number seed 0, 1, ... rept = atof(*++argv); // repetitions oddM = atoi(*++argv)==0 ? 2 : 3; // force M to be odd invX = atoi(*++argv)==0 ? 0 : 1; // runs when inverse exists vrfy = atoi(*++argv)==0 ? 0 : 1; // verify result? // Initialize data, Allocate storage mpz_init(A); mpz_init(B); mpz_init(M); mpz_init(X); mpz_init(W); mpz_init(U); mpz_init(V); mpz_init(R); mpz_init(S); mpz_init(T); gmp_randinit_default(state); // Do Work gmp_randseed_ui(state,seed); printf("Alg = %c Len = %d Seed = %d Repeat = %d OddM = %d InvExist = %d Verify = %d\n", Alg, n, seed, rept, oddM-2, invX, vrfy); switch (Alg) { case 'L': ModInv = *ModInvL; break; case 'M': ModInv = *ModInvM; break; case 'R': ModInv = *ModInvR; break; case 'P': ModInv = *ModInvP; break; case 'Q': ModInv = *ModInvQ; break; case 'S': ModInv = *ModInvS; break; case 'X': ModInv = *ModInvX; break; case 'D': ModInv = *ModInvD; break; case 'Y': ModInv = *ModInvY; break; case 'E': ModInv = *ModInvE; break; case 'F': ModInv = *ModInvF; break; case 'G': ModInv = *ModInvG; break; case 'H': ModInv = *ModInvH; break; default : printf("Unknown algorithm\n"); return 1; } printf("Iteration "); for( i = 0; i < rept; i++) { if (i%50000 == 0) printf("%1d",(i/50000)%10); while(1) { mpz_urandomb(A,state,n); mpz_add_ui(A,A,1); mpz_urandomb(M,state,n); // mpz_rrandomb for algorithm verification if (ODD(M)) mpz_add_ui(M,M,2); else mpz_add_ui(M,M,oddM); if (invX) { // chose params such that inverse exists mpz_gcd(X,A,M); if (LEN(X)==1) break; } else break; } if (i == 103) i = 103; ModInv(B,M,A); if (vrfy) { if (mpz_invert(X,A,M)==0) ST0(X); if (mpz_cmp(X,B)!=0 ) { printf("\n>>> Wrong result at Iteration %d\n",i); printf("M = "); print_16(M); printf("A = "); print_16(A); printf("B = "); print_16(B); printf("X = "); print_16(X); break;} } } // Print stat printf("\n Iters SftUV SftRS SubUV SubRS\n"); printf("%11.6f %15.6f %15.6f %15.6f %15.6f\n", (double)Iters/rept,(double)(Sft[1])/rept,(double)(Sft[0])/rept, (double)(Sub[1])/rept,(double)(Sub[0])/rept); printf(" UV1 UV2 UV3 UVx\n"); printf("%15.6f %15.6f %15.6f %15.6f\n", (double)(Hst[1][1])/rept,(double)(Hst[1][2])/rept, (double)(Hst[1][3])/rept,(double)(Hst[1][0])/rept); printf(" RS1 RS2 RS3 RSx\n"); printf("%15.6f %15.6f %15.6f %15.6f\n", (double)(Hst[0][1])/rept,(double)(Hst[0][2])/rept, (double)(Hst[0][3])/rept,(double)(Hst[0][0])/rept); return 0; }