Artifact 1f49090d909b3b81d48d7bca0d2a9fac4945252d2a53ae281ace7f0b5e29b3e5:
- Executable file
r37/lisp/csl/cslbase/arith07.c
— part of check-in
[f2fda60abd]
at
2011-09-02 18:13:33
on branch master
— Some historical releases purely for archival purposes
git-svn-id: https://svn.code.sf.net/p/reduce-algebra/code/trunk/historical@1375 2bfe0521-f11c-4a00-b80e-6202646ff360 (user: arthurcnorman@users.sourceforge.net, size: 30697) [annotate] [blame] [check-ins using] [more...]
/* arith07.c Copyright (C) 1990-2002 Codemist Ltd */ /* * Arithmetic functions. negation plus a load of Common Lisp things * for support of complex numbers. * */ /* * This code may be used and modified, and redistributed in binary * or source form, subject to the "CCL Public License", which should * accompany it. This license is a variant on the BSD license, and thus * permits use of code derived from this in either open and commercial * projects: but it does require that updates to this code be made * available back to the originators of the package. * Before merging other code in with this or linking this code * with other packages or libraries please check that the license terms * of the other material are compatible with those of this. */ /* Signature: 0a772466 08-Apr-2002 */ #include <stdarg.h> #include <string.h> #include <ctype.h> #include <math.h> #include "machine.h" #include "tags.h" #include "cslerror.h" #include "externs.h" #include "arith.h" #ifdef TIMEOUT #include "timeout.h" #endif Lisp_Object copyb(Lisp_Object a) /* * copy a bignum. */ { Lisp_Object b, nil; int32 len = bignum_length(a), i; push(a); b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len); pop(a); errexit(); len = (len-CELL)/4; for (i=0; i<len; i++) bignum_digits(b)[i] = bignum_digits(a)[i]; return b; } Lisp_Object negateb(Lisp_Object a) /* * Negate a bignum. Note that negating the 1-word bignum * value of 0x08000000 will produce a fixnum as a result, * which might confuse the caller... in a similar way negating * the value -0x40000000 will need to promote from a one-word * bignum to a 2-word bignum. How messy just for negation! */ { Lisp_Object b, nil; int32 len = bignum_length(a), i, carry; if (len == CELL+4) /* one-word bignum - do specially */ { len = -(int32)bignum_digits(a)[0]; if (len == fix_mask) return fixnum_of_int(len); else if (len == 0x40000000) return make_two_word_bignum(0, len); else return make_one_word_bignum(len); } push(a); b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len); pop(a); errexit(); len = (len-CELL)/4-1; carry = -1; for (i=0; i<len; i++) { carry = clear_top_bit(~bignum_digits(a)[i]) + top_bit(carry); bignum_digits(b)[i] = clear_top_bit(carry); } /* * Handle the top digit separately since it is signed. */ carry = ~bignum_digits(a)[i] + top_bit(carry); if (!signed_overflow(carry)) { /* * If the most significant word ends up as -1 then I just might * have 0x40000000 in the next word down and so I may need to shrink * the number. Since I handled 1-word bignums specially I have at * least two words to deal with here. */ if (carry == -1 && (bignum_digits(b)[i-1] & 0x40000000) != 0) { bignum_digits(b)[i-1] |= ~0x7fffffff; numhdr(b) -= pack_hdrlength(1); #ifdef ADDRESS_64 if ((i & 1) != 0) bignum_digits(b)[i] = 0; else bignum_digits(b)[i] = make_bighdr(2); #else if ((i & 1) == 0) bignum_digits(b)[i] = 0; else bignum_digits(b)[i] = make_bighdr(2); #endif } else bignum_digits(b)[i] = carry; /* no shrinking needed */ return b; } /* * Here I have overflow: this can only happen when I negate a number * that started off with 0xc0000000 in the most significant digit, * and I have to pad a zero word onto the front. */ bignum_digits(b)[i] = clear_top_bit(carry); return lengthen_by_one_bit(b, carry); } /* * generic negation */ Lisp_Object negate(Lisp_Object a) { #ifdef COMMON Lisp_Object nil; /* needed for errexit() */ #endif switch ((int)a & TAG_BITS) { case TAG_FIXNUM: { int32 aa = -int_of_fixnum(a); /* * negating the number -#x8000000 (which is a fixnum) yields a value * which just fails to be a fixnum. */ if (aa != 0x08000000) return fixnum_of_int(aa); else return make_one_word_bignum(aa); } #ifdef COMMON case TAG_SFLOAT: { Float_union aa; aa.i = a - TAG_SFLOAT; aa.f = (float) (-aa.f); return (aa.i & ~(int32)0xf) + TAG_SFLOAT; } #endif case TAG_NUMBERS: { int32 ha = type_of_header(numhdr(a)); switch (ha) { case TYPE_BIGNUM: return negateb(a); #ifdef COMMON case TYPE_RATNUM: { Lisp_Object n = numerator(a), d = denominator(a); push(d); n = negate(n); pop(d); errexit(); return make_ratio(n, d); } case TYPE_COMPLEX_NUM: { Lisp_Object r = real_part(a), i = imag_part(a); push(i); r = negate(r); pop(i); errexit(); push(r); i = negate(i); pop(r); errexit(); return make_complex(r, i); } #endif default: return aerror1("bad arg for minus", a); } } case TAG_BOXFLOAT: { double d = float_of_number(a); return make_boxfloat(-d, type_of_header(flthdr(a))); } default: return aerror1("bad arg for minus", a); } } /*****************************************************************************/ /*** Transcendental functions etcetera. ***/ /*****************************************************************************/ /* * Much of the code here is extracted from the portable Fortran library * used by Codemist with its Fortran compiler. */ /* * The object of the following macro is to adjust the floating point * variables concerned so that the more significant one can be squared * with NO LOSS OF PRECISION. It is only used when there is no danger * of over- or under-flow. * * This code is NOT PORTABLE but can be modified for use elsewhere * It should, however, serve for IEEE and IBM FP formats. */ #define _fp_normalize(high, low) \ { double temp; /* access to representation */ \ temp = high; /* take original number */ \ ((int32 *)&temp)[current_fp_rep & FP_WORD_ORDER] = 0; \ /* make low part of mantissa 0 */ \ low += (high - temp); /* add into low-order result */ \ high = temp; \ } #ifdef COMMON double MS_CDECL Cabs(Complex z) { /* * Obtain the absolute value of a complex number - note that the main * agony here is in ensuring that neither overflow nor underflow can * wreck the calculation. Given ideal arithmetic the sum could be carried * through as just sqrt(x^2 + y^2). */ double x = z.real, y = z.imag; double scale; int n1, n2; if (x==0.0) return fabs(y); else if (y==0.0) return fabs(x); (void)frexp(x, &n1); (void)frexp(y, &n2); /* The exact range of values returned by frexp does not matter here */ if (n2>n1) n1 = n2; /* n1 is now the exponent of the larger (in absolute value) of x, y */ scale = ldexp(1.0, n1); /* can not be 0.0 */ x /= scale; y /= scale; /* The above scaling operation introduces no rounding error (since the */ /* scale factor is exactly a power of 2). It reduces the larger of x, y */ /* to be somewhere near 1.0 so overflow in x*x+y*y is impossible. It is */ /* still possible that one of x*x and y*y will underflow (but not both) */ /* but this is harmless. */ return scale * sqrt(x*x + y*y); } Complex MS_CDECL Ccos(Complex z) { double x = z.real, y = z.imag; /* * cos(x + iy) = cos(x)*cosh(y) - i sin(x)*sinh(y) * For smallish y this can be used directly. For |y| > 50 I will * compute sinh and cosh as just +/- exp(|y|)/2 */ double s = sin(x), c = cos(x); double absy = fabs(y); if (absy <= 50.0) { double sh = sinh(y), ch = cosh(y); z.real = c*ch; z.imag = - s*sh; return z; } else { double w; int n = _reduced_exp(absy, &w) - 1; z.real = ldexp(c*w, n); if (y < 0.0) z.imag = ldexp(s*w, n); else z.imag = ldexp(-s*w, n); return z; } } static double reduced_power(double a, int n) { /* * Compute (1 + a)^n - 1 avoiding undue roundoff error. * Assumes n >= 1 on entry and that a is small. */ if (n == 1) return a; { double d = reduced_power(a, n/2); d = (2.0 + d)*d; if (n & 1) d += (1.0 + d)*a; return d; } } /* * The following values are included for documentation purposes - they * give the largest args that can be given to exp() without leading to * overflow on IBM mainframe and IEEE machines. * #ifdef IBMFLOAT * #define _exp_arg_limit 174.673089501106208 * #else * #define _exp_arg_limit 709.78271289338397 * #endif * Note that in any case exp(50.0) will not overflow (it is only 5.2e21), * so it can be evaluated the simple direct way. */ int _reduced_exp(double x, double *r) { /* * (*r) = exp(x)/2^n; return n; * where n will be selected so that *r gets set to a fairly small value * (precise range of r unimportant provided it will be WELL away from * chances of overflow, even when exp(x) would actually overflow). This * function may only be called with argument x that is positive and * large enough that n will end up satisfying n>=1. The coding here * will ensure that if x>4.0, and in general the use of this function * will only be for x > 50. * For IBM hardware it would be good to be able to control the value * of n mod 4, maybe, to help counter wobbling precision. This is not * done here. */ int n; double f; n = (int)(x / 7.625 + 0.5); /* * 7.625 = 61/8 and is expected to have an exact floating point * representation here, so f is computed without any rounding error. * (do I need something like the (x - 0.5) - 0.5 trick here?) */ f = exp(x - 7.625*(double)n); /* * the magic constant is ((exp(61/8) / 2048) - 1) and it arises because * 61/88 is a decent rational approximation to log(2), hence exp(61/8) * is almost 2^11. Thus I compute exp(x) as * 2^(11*n) * (exp(61/8)/2^11)^n * exp(f) * The first factor is exact, the second is (1+e)^n where e is small and * n is an integer, so can be computer accurately, and the residue f at the * end is small enough not to give over-bad trouble. * The numeric constant given here was calculated with the REDUCE 3.3 * bigfloat package. */ #define _e61q8 3.81086435594567676751e-4 *r = reduced_power(_e61q8, n)*f + f; #undef _e61q8 return 11*n; } Complex MS_CDECL Cexp(Complex z) { double x = z.real, y = z.imag; /* * value is exp(x)*(cos(y) + i sin(y)) but have care with overflow * Here (and throughout the complex library) there is an opportunity * to save time by computing sin(y) and cos(y) together. Since this * code is (to begin with) to sit on top of an arbitrary C library, * perhaps with hardware support for the calculation of real-valued * trig functions I am not going to try to realise this saving. */ double s = sin(y), c = cos(y); /* * if x > 50 I will use a cautious sceme which computes exp(x) with * its (binary) exponent separated. Note that 50.0 is chosen as a * number noticably smaller than _exp_arg_limit (exp(50) = 5.18e21), * but is not a critical very special number. */ if (x <= 50.0) /* includes x < 0.0, of course */ { double w = exp(x); z.real = w*c; z.imag = w*s; return z; } else { double w; int n = _reduced_exp(x, &w); z.real = ldexp(w*c, n); z.imag = ldexp(w*s, n); return z; } } Complex MS_CDECL Cln(Complex z) { double x = z.real, y = z.imag; /* * if x and y are both very large then cabs(z) may be out of range * even though log or if is OK. Thus it is necessary to perform an * elaborate scaled calculation here, and not just * z.real = log(cabs(z)); */ double scale, r; int n1, n2; if (x==0.0) r = log(fabs(y)); else if (y==0.0) r = log(fabs(x)); else { (void)frexp(x, &n1); (void)frexp(y, &n2); /* The exact range of values returned by frexp does not matter here */ if (n2>n1) n1 = n2; scale = ldexp(1.0, n1); x /= scale; y /= scale; r = log(scale) + 0.5*log(x*x + y*y); } z.real = r; /* * The C standard is not very explicit about the behaviour of atan2(0.0, -n) * while for Fortran it is necessary that this returns +pi not -pi. Hence * with extreme caution I put a special test here. */ if (y == 0.0) if (x < 0.0) z.imag = _pi; else z.imag = 0.0; else z.imag = atan2(y, x); return z; } /* * Complex raising to a power. This seems to be pretty nasty * to get right, and the code includes extra high precision variants * on atan() and log(). Further refinements wrt efficiency may be * possible later on. * This code has been partially tested, and seems to be uniformly * better than using just a**b = exp(b*log(a)), but much more careful * study is needed before it can possibly be claimed that it is * right in the sense of not throwing away accuracy when it does not * have to. I also need to make careful checks to verify that the * correct (principal) value is computed. */ /* * The next function is used after arithmetic has been done on extra- * precision numbers so that the relationship between high and low parts * is no longer known. Re-instate it. */ #define _two_minus_25 2.98023223876953125e-8 /* 2^(-25) */ static double fp_add(double a, double b, double *lowres) { /* Result is the high part of a+b, with the low part assigned to *lowres */ double absa, absb; if (a >= 0.0) absa = a; else absa = -a; if (b >= 0.0) absb = b; else absb = -b; if (absa < absb) { double t = a; a = b; b = t; } /* Now a is the larger (in absolute value) of the two numbers */ if (absb > absa * _two_minus_25) { double al = 0.0, bl = 0.0; /* * If the exponent difference beweeen a and b is no more than 25 then * I can add the top part (20 or 24 bits) of a to the top part of b * without going beyond the 52 or 56 bits that a full mantissa can hold. */ _fp_normalize(a, al); _fp_normalize(b, bl); a = a + b; /* No rounding needed here */ b = al + bl; if (a == 0.0) { a = b; b = 0.0; } } /* * The above step leaves b small wrt the value in a (unless a+b led * to substantial cancellation of leading digits), but leaves the high * part a with bits everywhere. Force low part of a to zero. */ { double al = 0.0; _fp_normalize(a, al); b = b + al; } if (a >= 0.0) absa = a; else absa = -a; if (b >= 0.0) absb = b; else absb = -b; if (absb > absa * _two_minus_25) /* * If on input a is close to -b, then a+b is close to zero. In this * case the exponents of a abd b matched, and so earlier calculations * have all been done exactly. Go around again to split residue into * high and low parts. */ { double al = 0.0, bl = 0.0; _fp_normalize(b, bl); a = a + b; _fp_normalize(a, al); b = bl + al; } *lowres = b; return a; } #undef _two_minus_25 static void extended_atan2(double b, double a, double *thetah, double *thetal) { int octant; double rh, rl, thh, thl; /* * First reduce the argument to the first octant (i.e. a, b both +ve, * and b <= a). */ if (b < 0.0) { octant = 4; a = -a; b = -b; } else octant = 0; if (a < 0.0) { double t = a; octant += 2; a = b; b = -t; } if (b > a) { double t = a; octant += 1; a = b; b = t; } { static struct { double h; double l;} _atan[] = { /* * The table here gives atan(n/16) in 1.5-precision for n=0..16 * Note that all the magic numbers used in this file were calculated * using the REDUCE bigfloat package, and all the 'exact' parts have * a denominator of at worst 2^26 and so are expected to have lots * of trailing zero bits in their floating point representation. */ { 0.0, 0.0 }, { 0.06241881847381591796875, -0.84778585694947708870e-8 }, { 0.124355018138885498046875, -2.35921240630155201508e-8 }, { 0.185347974300384521484375, -2.43046897565983490387e-8 }, { 0.2449786663055419921875, -0.31786778380154175187e-8 }, { 0.302884876728057861328125, -0.83530864557675689054e-8 }, { 0.358770668506622314453125, 0.17639499059427950639e-8 }, { 0.412410438060760498046875, 0.35366268088529162896e-8 }, { 0.4636476039886474609375, 0.50121586552767562314e-8 }, { 0.512389481067657470703125, -2.07569197640365239794e-8 }, { 0.558599293231964111328125, 2.21115983246433832164e-8 }, { 0.602287352085113525390625, -0.59501493437085023057e-8 }, { 0.643501102924346923828125, 0.58689374629746842287e-8 }, { 0.6823165416717529296875, 1.32029951485689299817e-8 }, { 0.71882998943328857421875, 1.01883359311982641515e-8 }, { 0.75315129756927490234375, -1.66070805128190106297e-8 }, { 0.785398185253143310546875, -2.18556950009312141541e-8 } }; int k = (int)(16.0*(b/a + 0.03125)); /* 0 to 16 */ double kd = (double)k/16.0; double ah = a, al = 0.0, bh = b, bl = 0.0, ch, cl, q, q2; _fp_normalize(ah, al); _fp_normalize(bh, bl); ch = bh - ah*kd; cl = bl - al*kd; ah = ah + bh*kd; al = al + bl*kd; bh = ch; bl = cl; /* Now |(a/b)| <= 1/32 */ ah = fp_add(ah, al, &al); /* Re-normalise */ bh = fp_add(bh, bl, &bl); /* Compute approximation to b/a */ rh = (bh + bl)/(ah + al); rl = 0.0; _fp_normalize(rh, rl); bh -= ah*rh; bl -= al*rh; rl = (bh + bl)/(ah + al); /* Quotient now formed */ /* * Now it is necessary to compute atan(q) to one-and-a-half precision. * Since |q| < 1/32 I will leave just q as the high order word of * the result and compute atan(q)-q as a single precision value. This * gives about 16 bits accuracy beyond regular single precision work. */ q = rh + rl; q2 = q*q; /* The expansion the follows could be done better using a minimax poly */ rl -= q*q2*(0.33333333333333333333 - q2*(0.20000000000000000000 - q2*(0.14285714285714285714 - q2*(0.11111111111111111111 - q2* 0.09090909090909090909)))); /* OK - now (rh, rl) is atan(reduced b/a). Need to add on atan(kd) */ rh += _atan[k].h; rl += _atan[k].l; } /* * The following constants give high precision versions of pi and pi/2, * and the high partwords (p2h and pih) have lots of low order zero bits * in their binary representation. Expect (=require) that the arithmetic * that computes thh is done without introduced rounding error. */ #define _p2h 1.57079632580280303955078125 #define _p2l 9.92093579680540441639751e-10 #define _pih 3.14159265160560607910156250 #define _pil 1.984187159361080883279502e-9 switch (octant) { default: case 0: thh = rh; thl = rl; break; case 1: thh = _p2h - rh; thl = _p2l - rl; break; case 2: thh = _p2h + rh; thl = _p2l + rl; break; case 3: thh = _pih - rh; thl = _pil - rl; break; case 4: thh = -_pih + rh; thl = -_pil + rl; break; case 5: thh = -_p2h - rh; thl = -_p2l - rl; break; case 6: thh = -_p2h + rh; thl = -_p2l + rl; break; case 7: thh = -rh; thl = -rl; break; } #undef _p2h #undef _p2l #undef _pih #undef _pil *thetah = fp_add(thh, thl, thetal); } static void extended_log(int k, double a, double b, double *logrh, double *logrl) { /* * If we had exact arithmetic this procedure could be: * k*log(2) + 0.5*log(a^2 + b^2) */ double al = 0.0, bl = 0.0, all = 0.0, bll = 0.0, c, ch, cl, cll; double w, q, qh, ql, rh, rl; int n; /* * First (a^2 + b^2) is calculated, using extra precision. * Because any rounding at this stage can lead to bad errors in * the power that I eventually want to compute, I use 3-word * arithmetic here, and with the version of _fp_normalize given * above and IEEE or IBM370 arithmetic this part of the * computation is exact. */ _fp_normalize(a, al); _fp_normalize(al, all); _fp_normalize(b, bl); _fp_normalize(bl, bll); ch = a*a + b*b; cl = 2.0*(a*al + b*bl); cll = (al*al + bl*bl) + all*(2.0*(a + al) + all) + bll*(2.0*(b + bl) + bll); _fp_normalize(ch, cl); _fp_normalize(cl, cll); c = ch + (cl + cll); /* single precision approximation */ /* * At this stage the scaling of the input value will mean that we * have 0.25 <= c <= 2.0 * * Now rewrite things as * (2*k + n)*log(s) + 0.5*log((a^2 + b^2)/2^n)) * where s = sqrt(2) * and where the arg to the log is in sqrt(0.5), sqrt(2) */ #define _sqrt_half 0.70710678118654752440 #define _sqrt_two 1.41421356237309504880 k = 2*k; while (c < _sqrt_half) { k -= 1; ch *= 2.0; cl *= 2.0; cll *= 2.0; c *= 2.0; } while (c > _sqrt_two) { k += 1; ch *= 0.5; cl *= 0.5; cll *= 0.5; c *= 0.5; } #undef _sqrt_half #undef _sqrt_two n = (int)(16.0/c + 0.5); w = (double)n / 16.0; ch *= w; cl *= w; cll *= w; /* Now |c-1| < 0.04317 */ ch = (ch - 0.5) - 0.5; ch = fp_add(ch, cl, &cl); cl = cl + cll; /* * (ch, cl) is now the reduced argument ready for calculating log(1+c), * and now that the reduction is over I can drop back to the use of just * two doubleprecision values to represent c. */ c = ch + cl; qh = c / (2.0 + c); ql = 0.0; _fp_normalize(qh, ql); ql = ((ch - qh*(2.0 + ch)) + cl - qh*cl) / (2.0 + c); /* (qh, ql) is now c/(2.0 + c) */ rh = qh; /* 18 bits bigger than low part will end up */ q = qh + ql; w = q*q; rl = ql + q*w*(0.33333333333333333333 + w*(0.20000000000000000000 + w*(0.14285714285714285714 + w*(0.11111111111111111111 + w*(0.09090909090909090909))))); /* * (rh, rl) is now atan(c) correct to double precision plus about 18 bits. */ { double temp; static struct { double h; double l; } _log_table[] = { /* * The following values are (in extra precision) -log(n/16)/2 for n * in the range 11 to 23 (i.e. roughly over sqrt(2)/2 to sqrt(2)) */ { 0.1873466968536376953125, 2.786706765149099245e-8 }, { 0.1438410282135009765625, 0.801238948715710950e-8 }, { 0.103819668292999267578125, 1.409612298322959552e-8 }, { 0.066765725612640380859375, -2.930037906928620319e-8 }, { 0.0322692394256591796875, 2.114312640614896196e-8 }, { 0.0, 0.0 }, { -0.0303122997283935546875, -1.117982386660280307e-8 }, { -0.0588915348052978515625, 1.697710612429310295e-8 }, { -0.08592510223388671875, -2.622944289242004947e-8 }, { -0.111571788787841796875, 1.313073691899185245e-8 }, { -0.135966837406158447265625, -2.033566243215020975e-8 }, { -0.159226894378662109375, 2.881939480146987639e-8 }, { -0.18145275115966796875, 0.431498374218108783e-8 }}; rh = fp_add(rh, _log_table[n-11].h, &temp); rl = temp + _log_table[n-11].l + rl; } #define _exact_part_logroot2 0.3465735912322998046875 #define _approx_part_logroot2 (-9.5232714997888393927e-10) /* Multiply this by k and add it in */ { double temp, kd = (double)k; rh = fp_add(rh, kd*_exact_part_logroot2, &temp); rl = rl + temp + kd*_approx_part_logroot2; } #undef _exact_part_logroot2 #undef _approx_part_logroot2 *logrh = rh; *logrl = rl; return; } Complex MS_CDECL Cpow(Complex z1, Complex z2) { double a = z1.real, b = z1.imag, c = z2.real, d = z2.imag; int k, m, n; double scale; double logrh, logrl, thetah, thetal; double r, i, rh, rl, ih, il, clow, dlow, q; double cw, sw, cost, sint; if (b == 0.0 && d == 0.0 && a >= 0.0)/* Simple case if both args are real */ { z1.real = pow(a, c); z1.imag = 0.0; return z1; } /* * Start by scaling z1 by dividing out a power of 2 so that |z1| is * fairly close to 1 */ if (a == 0.0) { if (b == 0.0) return z1; /* 0.0**anything is really an error */ /* The exact values returned by frexp do not matter here */ (void)frexp(b, &k); } else { (void)frexp(a, &k); if (b != 0.0) { int n; (void)frexp(b, &n); if (n > k) k = n; } } scale = ldexp(1.0, k); a /= scale; b /= scale; /* * The idea next is to express z1 as r*exp(i theta), then z1**z2 * is exp(z2*log(z1)) and the exponent simplifies to * (c*log r - d*theta) + i(c theta + d log r). * Note r = scale*sqrt(a*a + b*b) now. * The argument for exp() must be computed to noticably more than * regular precision, since otherwise exp() greatly magnifies the * error in the real part (if it is large and positive) and range * reduction in the imaginary part can equally lead to accuracy * problems. Neither c nor d can usefully be anywhere near the * extreme range of floating point values, so I will not even try * to scale them, thus I will end up happy(ish) if I can compute * atan2(b, a) and log(|z1|) in one-and-a-half precision form. * It would be best to compute theta in units of pi/4 rather than in * raidians, since then the case of z^n (integer n) with arg(z) an * exact multiple of pi/4 would work out better. However at present I * just hope that the 1.5-precision working is good enough. */ extended_atan2(b, a, &thetah, &thetal); extended_log(k, a, b, &logrh, &logrl); /* Normalise all numbers */ clow = 0.0; dlow = 0.0; _fp_normalize(c, clow); _fp_normalize(d, dlow); /* (rh, rl) = c*logr - d*theta; */ rh = c*logrh - d*thetah; /* No rounding in this computation */ rl = c*logrl + clow*(logrh + logrl) - d*thetal - dlow*(thetah + thetal); /* (ih, il) = c*theta + d*logr; */ ih = c*thetah + d*logrh; /* No rounding in this computation */ il = c*thetal + clow*(thetah + thetal) + d*logrl + dlow*(logrh + logrl); /* * Now it remains to take the exponential of the extended precision * value in ((rh, rl), (ih, il)). * Range reduce the real part by multiples of log(2), and the imaginary * part by multiples of pi/2. */ rh = fp_add(rh, rl, &rl); /* renormalise */ r = rh + rl; /* Approximate value */ #define _recip_log_2 1.4426950408889634074 q = r * _recip_log_2; m = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5); q = (double)m; #undef _recip_log_2 /* * log 2 = 11629080/2^24 - 0.000 00000 19046 54299 95776 78785 * to reasonable accuracy. It is vital that the exact part be * read in as a number with lots of low zero bits, so when it gets * multiplied by the integer q there is NO rounding needed. */ #define _exact_part_log2 0.693147182464599609375 #define _approx_part_log2 (-1.9046542999577678785418e-9) rh = rh - q*_exact_part_log2; rl = rl - q*_approx_part_log2; #undef _exact_part_log2 #undef _approx_part_log2 r = exp(rh + rl); /* This should now be accurate enough */ ih = fp_add(ih, il, &il); i = ih + il; /* Approximate value */ #define _recip_pi_by_2 0.6366197723675813431 q = i * _recip_pi_by_2; n = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5); q = (double)n; /* * pi/2 = 105414357/2^26 + 0.000 00000 09920 93579 68054 04416 39751 * to reasonable accuracy. It is vital that the exact part be * read in as a number with lots of low zero bits, so when it gets * multiplied by the integer q there is NO rounding needed. */ #define _exact_part_pi2 1.57079632580280303955078125 #define _approx_part_pi2 9.92093579680540441639751e-10 ih = ih - q*_exact_part_pi2; il = il - q*_approx_part_pi2; #undef _recip_pi_by_2 #undef _exact_part_pi2 #undef _approx_part_pi2 i = ih + il; /* Now accurate enough */ /* * Having done super-careful range reduction I can call the regular * sin/cos routines here. If sin/cos could both be computed together * that could speed things up a little bit, but by this stage they have * not much in common. */ cw = cos(i); sw = sin(i); switch (n & 3) /* quadrant control */ { default: case 0: cost = cw; sint = sw; break; case 1: cost = -sw; sint = cw; break; case 2: cost = -cw; sint = -sw; break; case 3: cost = sw; sint = -cw; break; } /* * Now, at long last, I can assemble the results and return. */ z1.real = ldexp(r*cost, m); z1.imag = ldexp(r*sint, m); return z1; } /* * End of complex-to-complex-power code. */ #endif /* end of arith07.c */