IEEE 754 conformant sqrt() implementation for double type
我正在尝试实现
1 2 3 4 5 6 7 8 9 10 11 12 | double __ieee754_sqrt(double x) { double z; /* get reciprocal of the square root (6.75 bits accuracy) */ __asm(" QSEED.DF %0,%1 \ ":"=e" (z):"e" (x):); z = 1 / z; z = ( z + x / z) / 2; /* 1st Newton-Raphson iteration */ z = ( z + x / z) / 2; /* 2nd Newton-Raphson iteration */ z = ( z + x / z) / 2; /* 3rd Newton-Raphson iteration */ z = ( z + x / z) / 2; /* 4th Newton-Raphson iteration */ return z; } |
但是,paranoia.c(链接,链接)测试抱怨:
1 2 | Square root is neither chopped nor correctly rounded. Observed errors run from -6.0493828e-01 to 5.0000000e-01 ulps. |
问题:如何为
UPD。硬件本身不支持
UPD2。
在着手构建自己的实现之前,建议先在互联网上搜索以查看是否有合适的和经过测试的开源代码。
通用迭代算法对互逆的平方根使用无除法迭代,以达到所需的精度,然后将自变量与参数相乘以计算平方根,最后使用所需的舍入模式进行舍入。平方根倒数的迭代可以使用具有二次收敛性的牛顿-拉夫逊迭代(大约将正确位数增加一倍)或具有三次收敛性的哈雷迭代(将正确位数增加大约三倍)。尽管存在高阶迭代,但通常不使用它们。
为使代码简单,建议在二进制浮点算术中将参数减小为包含两个连续二进制数的单个窄间隔。请注意,由于需要进行指数操作,因此通常不会实现最高性能。出于性能原因,双精度实现的初始迭代通常以单精度执行。
在下面的示例性ISO-C99实现中,我将展示如何沿着这些直线实现正确取整的双精度平方根。我假设
非常重要的是,我假设处理器硬件提供了融合的乘法加法指令,并且已通过标准数学库函数
由于OP未指定目标体系结构或未提供起始近似值的详细信息,因此我在下面使用基于区间[0.25,1]的多项式最小极大近似的我自己的起始近似,所有非异常参数均已减小到该区间。
该算法(尤其是舍入逻辑)依赖于Peter Markstein的思想,因此,我有理由相信该算法在构造上是正确的。我在这里仅执行了非常基本的测试。最佳行业实践是从数学上证明此类算法的正确性,例如,请参阅David Russinoff和John Harrison的出版物。紧要关头,一个人可能可以通过对两个连续的Binade进行详尽的测试(如今这是可行的,一个小型集群运行了几天),再加上对所有Binade进行基于随机和基于模式的测试。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> #include <math.h> /* Approximate 1/sqrt(a) on [0.25, 1] with an accuracy of about 7 bits */ float qseedf (float a) { float r; r = -2.43845296f; r = fmaf (r, a, 6.22994471f); r = fmaf (r, a, -5.91090727f); r = fmaf (r, a, 3.11237526f); return r; } double my_sqrt (double a) { const double QNAN_INDEFINITE = 0.0 / 0.0; const double half = 0.5; const double three_eighth = 0.375; double refined_rsqrt_approx, sqrt_approx, sqrt_residual, result, b; double rsqrt_approx, rsqrt_approx_err, rsqrt_approx_squared, reduced_arg; float argf, approxf, approxf_err; int e, t, f; /* handle normal cases */ if ((a >= 0) && (a < INFINITY)) { /* compute exponent adjustments */ b = frexp (a, &e); t = e - 2*512; f = t / 2; t = t - 2 * f; f = f + 512; /* map argument into the primary approximation interval [0.25,1) */ reduced_arg = ldexp (b, t); /* Compute initial low-precision approximation */ argf = (float)reduced_arg; approxf = qseedf (argf); /* Apply two Newton-Raphson iterations with quadratic convergence */ approxf_err = fmaf (-argf, approxf * approxf, 1.0f); approxf = fmaf (0.5f * approxf, approxf_err, approxf); approxf_err = fmaf (-argf, approxf * approxf, 1.0f); approxf = fmaf (0.5f * approxf, approxf_err, approxf); /* rsqrt approximation is now accurate to 1 single-precision ulp */ rsqrt_approx = (double)approxf; /* Perform a Halley iteration wih cubic convergence. Based on the work of Peter Markstein. See: Peter Markstein,"IA-64 and Elementary Functions", Prentice Hall 2000 */ rsqrt_approx_squared = rsqrt_approx * rsqrt_approx; rsqrt_approx_err = fma (-reduced_arg, rsqrt_approx_squared, 1.0); refined_rsqrt_approx = fma (fma (rsqrt_approx_err, three_eighth, half), rsqrt_approx * rsqrt_approx_err, rsqrt_approx); sqrt_approx = reduced_arg * refined_rsqrt_approx; sqrt_residual = fma (-sqrt_approx, sqrt_approx, reduced_arg); result = fma (sqrt_residual, half * refined_rsqrt_approx, sqrt_approx); /* map back from primary approximation interval by jamming exponent */ result = ldexp (result, f); } else { /* handle special cases */ result = (a < 0) ? QNAN_INDEFINITE : (a + a); } return result; } /* https://groups.google.com/forum/#!original/comp.lang.c/qFv18ql_WlU/IK8KGZZFJx4J From: geo <[email protected]> Newsgroups: sci.math,comp.lang.c,comp.lang.fortran Subject: 64-bit KISS RNGs Date: Sat, 28 Feb 2009 04:30:48 -0800 (PST) This 64-bit KISS RNG has three components, each nearly good enough to serve alone. The components are: Multiply-With-Carry (MWC), period (2^121+2^63-1) Xorshift (XSH), period 2^64-1 Congruential (CNG), period 2^64 */ static uint64_t kiss64_x = 1234567890987654321ULL; static uint64_t kiss64_c = 123456123456123456ULL; static uint64_t kiss64_y = 362436362436362436ULL; static uint64_t kiss64_z = 1066149217761810ULL; static uint64_t kiss64_t; #define MWC64 (kiss64_t = (kiss64_x << 58) + kiss64_c, \\ kiss64_c = (kiss64_x >> 6), kiss64_x += kiss64_t, \\ kiss64_c += (kiss64_x < kiss64_t), kiss64_x) #define XSH64 (kiss64_y ^= (kiss64_y << 13), kiss64_y ^= (kiss64_y >> 17), \\ kiss64_y ^= (kiss64_y << 43)) #define CNG64 (kiss64_z = 6906969069ULL * kiss64_z + 1234567ULL) #define KISS64 (MWC64 + XSH64 + CNG64) int main (void) { const uint64_t N = 10000000000ULL; /* desired number of test cases */ double arg, ref, res; uint64_t argi, refi, resi, count = 0; double spec[] = {0, 1, INFINITY, NAN}; printf ("test a few special cases:\ "); for (int i = 0; i < sizeof (spec)/sizeof(spec[0]); i++) { printf ("my_sqrt(%22.13a) = %22.13a\ ", spec[i], my_sqrt(spec[i])); printf ("my_sqrt(%22.13a) = %22.13a\ ", -spec[i], my_sqrt(-spec[i])); } printf ("test %llu random cases:\ ", N); do { count++; argi = KISS64; memcpy (&arg, &argi, sizeof arg); res = my_sqrt (arg); ref = sqrt (arg); memcpy (&resi, &res, sizeof resi); memcpy (&refi, &ref, sizeof refi); if (resi != refi) { printf ("\ error @ arg=%22.13a res=%22.13a ref=%22.13a\ ", arg, res, ref); return EXIT_FAILURE; } if ((count & 0xfffff) == 0) printf ("\ [%llu]", count); } while (count < N); printf ("\ [%llu]", count); printf ("\ tests PASSED\ "); return EXIT_SUCCESS; } |
以上程序的输出应类似于以下内容:
1 2 3 4 5 6 7 8 9 10 11 12 | test a few special cases: my_sqrt( 0x0.0000000000000p+0) = 0x0.0000000000000p+0 my_sqrt( -0x0.0000000000000p+0) = -0x0.0000000000000p+0 my_sqrt( 0x1.0000000000000p+0) = 0x1.0000000000000p+0 my_sqrt( -0x1.0000000000000p+0) = -0x1.#IND000000000p+0 my_sqrt( 0x1.#INF000000000p+0) = 0x1.#INF000000000p+0 my_sqrt( -0x1.#INF000000000p+0) = -0x1.#IND000000000p+0 my_sqrt( 0x1.#QNAN00000000p+0) = 0x1.#QNAN00000000p+0 my_sqrt( -0x1.#QNAN00000000p+0) = -0x1.#QNAN00000000p+0 test 10000000000 random cases: [10000000000] tests PASSED |
1 2 3 | z = 1 / z; z = ( z + x / z) / 2; /* 1st Newton-Raphson iteration */ ... |
->
1 2 3 | z = 1 / z; z += ( x / z - z) * 0.5; /* 1st Newton-Raphson iteration */ ... |
这可能会更快。
并且(我想)尽快停止一次迭代。
停止时,比较
由于您遇到的错误范围如此之大,所以我担心其余的浮点"硬件"在取整甚至精度方面都很草率。
糟糕,我忘了。有理由为您提供
1 2 3 | z = ( z + x * z) * 0.5; /* 1st Newton-Raphson iteration */ ... z = 1 / z; |
此外,请查看是否有一种方法可以减小指数,而不是对