/* This is a minimal, ludicrously inefficient library to perform modular arithmetic on multiple-precision natural numbers represented by an array of MP_SIZE uint64_t integers using 128-bit arithmetic. The only purpose of this code is to make the jump code of (G)MWC generators self contained. Using any proper multiprecision library will be more efficient, albeit at this size the increase in speed will not be so dramatic (in my tests, less than fourfold). This code is intentially not documented to discourage general usage. */ #include static int cmp(const uint64_t * const a, const uint64_t * const b) { int i; for(i = MP_SIZE; i-- != 0; ) { if (a[i] < b[i]) return -1; if (a[i] > b[i]) return 1; } return 0; } // Assumes a >= b static void _sub(uint64_t * const a, const uint64_t * const b) { int borrow = 0; for(int i = 0; i < MP_SIZE; i++) { __int128_t d = (__int128_t)a[i] - (__int128_t)b[i] - (__int128_t)borrow; borrow = d < 0; a[i] = ((__int128_t)UINT64_MAX + 1) + d; } } static void rem(uint64_t * const a, const uint64_t * const m) { for(;;) { if (cmp(a, m) < 0) return; _sub(a, m); } } static void add(uint64_t * const a, const uint64_t * const b, const uint64_t * const m) { int carry = 0; for(int i = 0; i < MP_SIZE; i++) { __uint128_t s = (__uint128_t)a[i] + (__uint128_t)b[i] + (__uint128_t)carry; carry = s > UINT64_MAX; a[i] = s; } if (m) rem(a, m); } static void sub(uint64_t * const a, const uint64_t * const b, const uint64_t * const m) { if (cmp(a, b) >= 0) _sub(a, b); else { uint64_t t[MP_SIZE]; memcpy(t, m, sizeof t); _sub(t, b); add(a, t, m); } } static void mul(uint64_t * const a, const uint64_t * const b, const uint64_t * const m) { uint64_t r[MP_SIZE] = {}, t[MP_SIZE]; memcpy(t, a, sizeof t); int d; for(d = MP_SIZE; d-- != 0 && b[d] == 0;); d++; for(int i = 0; i < d * 64; i++) { if (b[i >> 6] & (UINT64_C(1) << (i & 63))) add(r, t, m); add(t, t, m); } memcpy(a, r, sizeof r); }