/* =================================================================== * * Copyright (c) 2019, Helder Eijs * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * =================================================================== */ #include #include #include #include #include "multiply.h" /** * Add a 64-bit value x to y/sum_mid/sum_hi */ #if defined(_WIN64) && (_MSC_VER>=1900) #include #define ADD192(y, x) do { \ unsigned char c = 0; \ c = _addcarry_u64(c, x, y, &y); \ c = _addcarry_u64(c, 0, sum_mid, &sum_mid); \ _addcarry_u64(c, 0, sum_hi, &sum_hi); \ } while (0) #else #define ADD192(y, x) do { \ uint64_t c; \ y += x; \ sum_mid += (c = (y < x)); \ sum_hi += sum_mid < c; \ } while (0) #endif void inline addmul128(uint64_t *t, uint64_t *scratchpad, const uint64_t *a, uint64_t b0, uint64_t b1, size_t t_words, size_t a_nw) { uint64_t sum_low, sum_mid, sum_hi; uint64_t pr_low, pr_high, aim1; size_t i; assert(t_words >= a_nw + 2); if (a_nw == 0) { return; } /** LSW **/ DP_MULT(a[0], b0, sum_low, sum_mid); sum_hi = 0; ADD192(t[0], sum_low); sum_low = sum_mid; sum_mid = sum_hi; sum_hi = 0; aim1 = a[0]; for (i=1; i<(a_nw-1)/4*4+1;) { /** I **/ DP_MULT(aim1, b1, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; DP_MULT(a[i], b0, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; ADD192(t[i], sum_low); sum_low = sum_mid; sum_mid = sum_hi; sum_hi = 0; aim1 = a[i]; i++; /** II **/ DP_MULT(aim1, b1, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; DP_MULT(a[i], b0, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; ADD192(t[i], sum_low); sum_low = sum_mid; sum_mid = sum_hi; sum_hi = 0; aim1 = a[i]; i++; /** III **/ DP_MULT(aim1, b1, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; DP_MULT(a[i], b0, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; ADD192(t[i], sum_low); sum_low = sum_mid; sum_mid = sum_hi; sum_hi = 0; aim1 = a[i]; i++; /** IV **/ DP_MULT(aim1, b1, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; DP_MULT(a[i], b0, pr_low, pr_high); ADD192(sum_low, pr_low); sum_mid += pr_high; sum_hi += sum_mid < pr_high; ADD192(t[i], sum_low); sum_low = sum_mid; sum_mid = sum_hi; sum_hi = 0; aim1 = a[i]; i++; } /** Execute 0 to 3 times **/ for (; i0; j++) { t[j] += (uint64_t)carry; carry = t[j] < carry; } } /** Double mix-products and add squares **/ carry = 0; for (i=0, j=0; i> 63))); carry = (t[j+1] >> 63) + (sum_hi < tmp); sum_lo += (tmp = (t[j] << 1)); sum_hi += (tmp2 = (sum_lo < tmp)); carry += sum_hi < tmp2; t[j] = sum_lo; t[j+1] = sum_hi; } assert(carry == 0); }