Artifact c95fc2b5757c451aad0339c1a8356d9cd8eabfea82daf40be0460aaa05a79cb8:
- Executable file
r36/cslbase/arith02.c
— part of check-in
[f2fda60abd]
at
2011-09-02 18:13:33
on branch master
— Some historical releases purely for archival purposes
git-svn-id: https://svn.code.sf.net/p/reduce-algebra/code/trunk/historical@1375 2bfe0521-f11c-4a00-b80e-6202646ff360 (user: arthurcnorman@users.sourceforge.net, size: 38707) [annotate] [blame] [check-ins using] [more...]
/* arith02.c Copyright (C) 1990/1991 Codemist Ltd */ /* * Arithmetic functions. * Multiplication of generic numbers * and in particular a lot of bignum support. * * Version 1.4 February 1991 - Fast multiplication. */ /* Signature: 4a0039df 31-May-1997 */ #include <stdarg.h> #include <string.h> #include <ctype.h> #include <math.h> #ifdef __WATCOMC__ #include <float.h> #endif #include "machine.h" #include "tags.h" #include "cslerror.h" #include "externs.h" #include "arith.h" #ifdef TIMEOUT #include "timeout.h" #endif /* * Now for multiplication */ /* * I provide symbols IMULTIPLY and IDIVIDE which can be asserted if the * corresponding routines have been provided elsewhere (e.g. in machine * code for extra speed) */ #ifndef IMULTIPLY #ifdef MULDIV64 /* * If MULDIV64 is asserted this function in not in fact needed * since a macro in arith.h arranges that the multiplication is done * in-line. However this version is left here in the source code as * convenient documentation of what the function needs to do. */ unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c) /* * (result, *rlow) = a*b + c as 62-bit product * * rlow may well be the same as one of a, b or c so the * assignment to *rlow must not be done until the calculation is * complete. Inputs a and b are 31-bit values (i.e. they have * their top bit zero and are to be treated as positive value. * c may use all 32 bits, but again is treated as unsigned. * The result is computed and the low 31 bits placed in rlow, * (the top bit of rlow will end up zero) and the top part * of the result is returned. */ { /* NB the value r cound be int64 or unsigned64 - it does not matter */ unsigned64 r = (unsigned64)a*(unsigned64)b + (unsigned64)c; *rlow = (unsigned32)(r & 0x7fffffff); return (unsigned32)(r >> 31); } #else #ifdef OLDER_VERSION unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c) /* * (result, *rlow) = a*b + c as 62-bit product * * The code given here forms the produce by doing four single-precision * (16*16->32) multiplies and using shifts etc to glue the partial results * together. */ { unsigned32 ah = a >> 16, bh = b >> 16, ch = c >> 16; unsigned32 w0, w1, w2, w3, w4; a -= ah << 16; b -= bh << 16; c -= ch << 16; /* a, b and c now split into high/low parts */ w0 = a*b; /* One... */ w1 = w0 >> 16; w0 = w0 - (w1 << 16) + c; w1 += ch; w2 = a*bh; /* Two... */ w3 = w2 >> 16; w1 += w2 - (w3 << 16); w2 = ah*b; /* Three... */ w4 = w2 >> 16; w1 += w2 - (w4 << 16); w2 = w0 >> 16; w1 += w2; w0 -= w2 << 16; w2 = ah*bh + w3 + w4; /* Four 16-bit multiplies done in all. */ w2 += w1 >> 16; /* Here I do a minor shift to split the result at bit 31 rather than 32 */ *rlow = w0 + ((w1 & 0x7fff) << 16); return (w2<<1) + ((w1>>15) & 1); } #else unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c) /* * (result, *rlow) = a*b + c as 62-bit product * * The code given here forms the produce by doing three single-precision * (16*16->32) multiplies and using shifts etc to glue the partial results * together. This is slightly faster than the above (maybe simpler?) code * on at least some machines. */ { unsigned32 ah, bh; unsigned32 w0, w1, w2; ah = a >> 16; /* * On some machines I know that multi-bit shifts are especially painful, * while on others it is nasty to access the literal value needed as a * mask here. Hence I make some show of providing an alternative. */ #ifdef FAST_SHIFTS a -= ah << 16; #else a &= 0xffff; #endif bh = b >> 16; #ifdef FAST_SHIFTS b -= bh << 16; #else b &= 0xffff; #endif /* * At present I can not see any way of issuing any of these multiplies * any earlier, or of doing useful work between the times that I launch * them, or even delaying before I use their results. This will be rather * sad on machines where multiplication could overlap with other operations. */ w2 = ah*bh; w1 = (a + ah)*(b + bh); w0 = a*b; /* * The largest exact result that can be computed by the next line (given * that a and b start off just 31 bit) is 0xfffd0002 */ w1 = (w1 - w2) - w0; /* == (a*bh + b*ah) */ /* * I split into 30 bits in the lower word and 32 in the upper, so I have * 2 bits available for temporary carry effects in the lower part. */ w2 = (w2 << 2) + (w1 >> 14) + (w0 >> 30) + (c >> 30); w1 &= 0x3fff; w0 = (c & 0x3fffffff) + (w0 & 0x3fffffffU) + (w1 << 16); w0 += ((w2 & 1) << 30); w2 = (w2 >> 1) + (w0 >> 31); *rlow = w0 & 0x7fffffffU; return w2; } #endif #endif #endif /* IMULTIPLY */ static Lisp_Object timesii(Lisp_Object a, Lisp_Object b) /* * multiplying two fixnums together is much messier than adding them, * mainly because the result can easily be a two-word bignum */ { unsigned32 aa = (unsigned32)int_of_fixnum(a), bb = (unsigned32)int_of_fixnum(b); unsigned32 temp, low, high; /* * Multiplication by 0 or by -1 is just possibly common enough to be worth * filtering out the following special cases. Avoidance of the tedious * checks for overflow may make this useful even if Imultiply is very fast. */ if (aa <= 1) { if (aa == 0) return fixnum_of_int(0); else return b; } else if (bb <= 1) { if (bb == 0) return fixnum_of_int(0); else return a; } /* * I dump the low part of the product in temp then copy to a variable low * because temp has to have its address taken and so is not a candidate for * living in a register. */ Dmultiply(high, temp, clear_top_bit(aa), clear_top_bit(bb), 0); /* * The result handed back here has only 31 bits active in its low part. */ low = temp; /* * The next two lines convert the unsigned product produced by Imultiply * into a signed product. */ if ((int32)aa < 0) high -= bb; if ((int32)bb < 0) high -= aa; if ((high & 0x40000000) == 0) high = clear_top_bit(high); else high = set_top_bit(high); if (high == 0 && (low & 0x40000000) == 0) { /* one word positive result */ if (low <= 0x07ffffff) return fixnum_of_int(low); else return make_one_word_bignum(low); } else if (high == -1 && (low & 0x40000000) != 0) { /* one word negative result */ low = set_top_bit(low); if ((low & fix_mask) == fix_mask) return fixnum_of_int(low); else return make_one_word_bignum(low); } else { /* two-word bignum result needed */ Lisp_Object w = getvector(TAG_NUMBERS, TYPE_BIGNUM, 12), nil; errexit(); ((int32 *)((char *)w - TAG_NUMBERS))[1] = low; ((int32 *)((char *)w - TAG_NUMBERS))[2] = high; ((int32 *)((char *)w - TAG_NUMBERS))[3] = 0; /* padder word */ return w; } } #ifdef COMMON static Lisp_Object timesis(Lisp_Object a, Lisp_Object b) { Float_union bb; bb.i = b - TAG_SFLOAT; bb.f = (float) ((float)int_of_fixnum(a) * bb.f); return (bb.i & ~(int32)0xf) + TAG_SFLOAT; } #endif static Lisp_Object timesib(Lisp_Object a, Lisp_Object b) { int32 aa = int_of_fixnum(a), lenb, i; unsigned32 carry, ms_dig, w; Lisp_Object c, nil; /* * I will split off the (easy) cases of the fixnum being -1, 0 or 1. */ if (aa == 0) return fixnum_of_int(0); else if (aa == 1) return b; else if (aa == -1) return negateb(b); lenb = bignum_length(b); push(b); c = getvector(TAG_NUMBERS, TYPE_BIGNUM, lenb); pop(b); errexit(); lenb = (lenb >> 2) - 1; if (aa < 0) { aa = -aa; carry = 0xffffffffU; for (i=0; i<lenb-1; i++) { carry = clear_top_bit(~bignum_digits(b)[i]) + top_bit(carry); bignum_digits(c)[i] = clear_top_bit(carry); } /* * I do the most significant digit separately. */ carry = clear_top_bit(~bignum_digits(b)[i]) + top_bit(carry); /* * there is a special case needed here - if b started off as a number * like 0xc0000000,0,0,0 then negating it would call for extension of * the bignum c (this is the usual assymetry in the range of twos- * complement numbers). But if I detect that case specially I can * observe that the ONLY case where negation overflows is where the * negated value is exactly a power of 2 such .. an easy thing to * multiply a by! Furthermore the power of two involved is known to * by 30 mod 31. */ if (carry == 0x40000000) { bignum_digits(c)[i] = (aa & 1) << 30; aa = aa >> 1; goto extend_by_one_word; } if ((carry & 0x40000000) != 0) carry = set_top_bit(carry); bignum_digits(c)[i] = carry; } else { for (i=0; i<lenb; i++) bignum_digits(c)[i] = bignum_digits(b)[i]; } /* * Now c is a copy of b (negated if necessary) and I just want to * multiply it by the positive value a. This is the heart of the * procedure. Re-write Imultiply in assembly code if you want it * to go faster. See the top of this file for a portable sample * implementation of Imultiply, which gives back a result as a pair * of 31-bit values. */ carry = 0; /* * here aa is > 0, and a fortiori the 0x80000000 bit of aa is clear, * so I do not have to worry about the difference between 31 and 32 * bit values for aa. */ for (i=0; i<lenb-1; i++) Dmultiply(carry, bignum_digits(c)[i], bignum_digits(c)[i], (unsigned32)aa, carry); ms_dig = bignum_digits(c)[i]; Dmultiply(carry, w, clear_top_bit(ms_dig), (unsigned32)aa, carry); /* * After forming the product to (lenb) digits I need to see if there * is any overflow. Calculate what the next digit would be, sign * extending into the 0x80000000 bit as necessary. */ if ((carry & 0x40000000) != 0) carry = set_top_bit(carry); if (((int32)ms_dig) < 0) carry -= aa; aa = (int32)carry; if (aa == -1 && (w & 0x40000000) != 0) { bignum_digits(c)[i] = set_top_bit(w); return c; } bignum_digits(c)[i] = w; if (aa == 0 && (w & 0x40000000) == 0) return c; /* * drop through to extend the number by a word - note that because I * am multiplying by a fixnum it is only possible to have to expand by * just one word. */ extend_by_one_word: if ((lenb & 1) == 0) /* * Here there was a padder word that I can expand into. */ { bignum_digits(c)[lenb] = aa; numhdr(c) += pack_hdrlength(1); return c; } /* * Need to allocate more space to grow into. */ push(c); a = getvector(TAG_NUMBERS, TYPE_BIGNUM, (lenb<<2)+8); pop(c); errexit(); for (i=0; i<lenb; i++) bignum_digits(a)[i] = bignum_digits(c)[i]; bignum_digits(a)[i] = aa; bignum_digits(a)[i+1] = 0; /* the padder word */ return a; } #ifdef COMMON static Lisp_Object timesir(Lisp_Object a, Lisp_Object b) /* * multiply integer (fixnum or bignum) by ratio. */ { Lisp_Object nil = C_nil; Lisp_Object w = nil; if (a == fixnum_of_int(0)) return a; else if (a == fixnum_of_int(1)) return b; push3(b, a, nil); #define g stack[0] #define a stack[-1] #define b stack[-2] g = gcd(a, denominator(b)); nil = C_nil; if (exception_pending()) goto fail; a = quot2(a, g); nil = C_nil; if (exception_pending()) goto fail; g = quot2(denominator(b), g); nil = C_nil; if (exception_pending()) goto fail; a = times2(a, numerator(b)); nil = C_nil; if (exception_pending()) goto fail; /* * make_ratio tidies things up if the denominator was exactly 1 */ w = make_ratio(a, g); fail: popv(3); return w; #undef a #undef b #undef g } static Lisp_Object timesic(Lisp_Object a, Lisp_Object b) /* * multiply an arbitrary non-complex number by a complex one */ { Lisp_Object nil; Lisp_Object r = real_part(b), i = imag_part(b); push2(a, r); i = times2(a, i); pop2(r, a); errexit(); push(i); r = times2(a, r); pop(i); errexit(); return make_complex(r, i); } #endif static Lisp_Object timesif(Lisp_Object a, Lisp_Object b) { double d = (double)int_of_fixnum(a) * float_of_number(b); return make_boxfloat(d, type_of_header(flthdr(b))); } #ifdef COMMON #define timessi(a, b) timesis(b, a) static Lisp_Object timessb(Lisp_Object a, Lisp_Object b) { double d = float_of_number(a) * float_of_number(b); return make_sfloat(d); } #define timessr(a, b) timessb(a, b) #define timessc(a, b) timesic(a, b) #endif static Lisp_Object timessf(Lisp_Object a, Lisp_Object b) { double d = float_of_number(a) * float_of_number(b); return make_boxfloat(d, type_of_header(flthdr(b))); } #define timesbi(a, b) timesib(b, a) #ifdef COMMON #define timesbs(a, b) timessb(b, a) #endif /* * Now for bignum multiplication - made more than comfortably complicated * by a desire to make it go fast for very big numbers. */ #ifndef KARATSUBA_CUTOFF /* * I have conducted some experiments on one machine to find out what the * best cut-off value here is. The exact value chosen is not very * critical, and the fancy techniques do not pay off until numbers get * a lot bigger than this length (which is expressed in units of 31-bit * words, i.e. about 10 decimals). Anyone who wants may recompile with * alternative values to try to get the system fine-tuned for their * own computer - but I do not expect it to be possible to achieve much * by so doing. */ #define KARATSUBA_CUTOFF 12 #endif static void long_times(unsigned32 *c, unsigned32 *a, unsigned32 *b, unsigned32 *d, int32 lena, int32 lenb, int32 lenc); static void long_times1(unsigned32 *c, unsigned32 *a, unsigned32 *b, unsigned32 *d, int32 lena, int32 lenb, int32 lenc) /* * Here both a and b are big, with lena <= lenb. Split each into two chunks * of size (lenc/4), say (a1,a2) and (b1,b2), and compute each of * a1*b1 * a2*b2 * (a1+a2)*(b1+b2) * where in the last case a1+a2 and b1+b2 can be computed keeping their * carry bits as k1 and k2 (so that a1+a2 and b1+b2 are restricted to * size lenb). The chunks can then be combined with a few additions and * subtractions to form the product that is really wanted. If in fact a * was shorter than lenc/4 (so that after a is split up the top half is * all zero) I do things in a more straightforward way. I require that on * entry to this code lenc<4 < lenb <= lenc/2. */ { int32 h = lenc/4; /* lenc must have been made even enough... */ int32 lena1 = lena - h; int32 lenb1 = lenb - h; unsigned32 carrya, carryb; int32 i; /* * if the top half of a would be all zero I go through a separate path, * doing just two subsidiary multiplications. */ if (lena1 <= 0) { long_times(c+h, a, b+h, d, lena, lenb-h, 2*h); for (i=0; i<h; i++) c[3*h+i] = 0; long_times(d, a, b, c, lena, h, 2*h); for (i=0; i<h; i++) c[i] = d[i]; carrya = 0; for (;i<2*h; i++) { unsigned32 w = c[i] + d[i] + carrya; c[i] = clear_top_bit(w); carrya = w >> 31; } for (;carrya!=0;i++) { unsigned32 w = c[i] + 1; c[i] = clear_top_bit(w); carrya = w >> 31; } return; } /* * form (a1+a2) and (b1+b2). */ carrya = 0; for (i=0; i<h; i++) { unsigned32 w = a[i] + carrya; if (i < lena1) w += a[h+i]; d[i] = clear_top_bit(w); carrya = w >> 31; } carryb = 0; for (i=0; i<h; i++) { unsigned32 w = b[i] + carryb; if (i < lenb1) w += b[h+i]; d[h+i] = clear_top_bit(w); carryb = w >> 31; } long_times(c+h, d, d+h, c, h, h, 2*h); /* * Adjust to allow for the cases of a1+a2 or b1+b2 overflowing * by a single bit. */ c[3*h] = carrya & carryb; if (carrya != 0) { carrya = 0; for (i=0; i<h; i++) { unsigned32 w = c[2*h+i] + d[h+i] + carrya; c[2*h+i] = clear_top_bit(w); carrya = w >> 31; } } if (carryb != 0) { carryb = 0; for (i=0; i<h; i++) { unsigned32 w = c[2*h+i] + d[i] + carryb; c[2*h+i] = clear_top_bit(w); carryb = w >> 31; } } c[3*h] += carrya + carryb; for (i=1; i<h; i++) c[3*h+i] = 0; /* * Now (a1+a2)*(b1+b2) should have been computed totally properly */ for (i=0; i<h; i++) d[h+i] = 0; /* * multiply out a1*b1, where note that a1 and b1 may be less long * than h, but not by much. */ long_times(d, a+h, b+h, c, lena-h, lenb-h, 2*h); carrya = 0; for (i=0; i<2*h; i++) { unsigned32 w = c[2*h+i] + d[i] + carrya; c[2*h+i] = clear_top_bit(w); carrya = w >> 31; } carrya = 0; for (i=0; i<2*h; i++) { unsigned32 w = c[h+i] - d[i] - carrya; c[h+i] = clear_top_bit(w); carrya = w >> 31; } for (; carrya!=0 && i<3*h; i++) { unsigned32 w = c[h+i] - 1; c[h+i] = clear_top_bit(w); carrya = w >> 31; } /* * multiply out a2*b2 */ long_times(d, a, b, c, h, h, 2*h); for (i=0; i<h; i++) c[i] = d[i]; carrya = 0; for (; i<2*h; i++) { unsigned32 w = c[i] + d[i] + carrya; c[i] = clear_top_bit(w); carrya = w >> 31; } for (; carrya!=0 && i<4*h; i++) { unsigned32 w = c[i] + 1; c[i] = clear_top_bit(w); carrya = w >> 31; } carrya = 0; for (i=0; i<2*h; i++) { unsigned32 w = c[h+i] - d[i] - carrya; c[h+i] = clear_top_bit(w); carrya = w >> 31; } for (; carrya!=0 && i<3*h; i++) { unsigned32 w = c[h+i] - 1; c[h+i] = clear_top_bit(w); carrya = w >> 31; } /* * The product is now complete */ } static void long_times2(unsigned32 *c, unsigned32 *a, unsigned32 *b, int32 lena, int32 lenb, int32 lenc) /* * This case is standard old fashioned long multiplication. Dump the * result into c. */ { int32 i; for (i=0; i<lenc; i++) c[i] = 0; for (i=0; i<lena; i++) { unsigned32 carry = 0, da = a[i]; int32 j; /* * When I multiply by (for instance) a high power of 2 there will * be plenty of zero digits in the number being worked with, and * so the test da!=0 will save something useful. */ if (da != 0) { for (j=0; j<lenb; j++) { int32 k = i + j; Dmultiply(carry, c[k], da, b[j], /* NB the addition here is OK and fits into a 32-bit unsigned result */ carry + c[k]); } c[i+j] = carry; } } } static void long_times(unsigned32 *c, unsigned32 *a, unsigned32 *b, unsigned32 *d, int32 lena, int32 lenb, int32 lenc) /* * This decides if a multiplication is big enough to benefit from * decomposition a la Karatsuba. * In recursive entries through here out of long_times1() the numbers a * and b may have shrunk in ways that mean I need to reconsider the * precision to which I am working. This must leave c filled out all * the way to lenc, with padding 0s if necessary. */ { if (lenb < lena) { unsigned32 *t1; int32 t2; t1 = a; a = b; b = t1; t2 = lena; lena = lenb; lenb = t2; } if (4*lenb <= lenc) /* In this case I should shrink lenc a bit.. */ { int32 newlenc = (lenb+1)/2; int k = 0; while (newlenc > KARATSUBA_CUTOFF) { newlenc = (newlenc + 1)/2; k++; } while (k != 0) { newlenc = 2*newlenc; k--; } newlenc = 4*newlenc; while (lenc > newlenc) c[--lenc] = 0; } if (lena > KARATSUBA_CUTOFF) long_times1(c, a, b, d, lena, lenb, lenc); else long_times2(c, a, b, lena, lenb, lenc); } static Lisp_Object timesbb(Lisp_Object a, Lisp_Object b) /* * a and b are both guaranteed to be bignums when I call this * procedure. */ { int sign = 1; Lisp_Object c, d, nil; int32 lena, lenb, lenc, i; lena = (bignum_length(a) >> 2) - 1; lenb = (bignum_length(b) >> 2) - 1; if (lena == 1 && lenb == 1) /* * I am going to deem multiplication of two one-word bignums worthy of * a special case, since it is probably fairly common and it will be cheap * enough that avoiding overheads might matter. I still need to split * off the signs, because Imultiply can only deal with 31-bit unsigned values. * One-word bignums each have value at least 2^27 (or else they would have * been represented as fixnums) so the result here will always be a two-word * bignum. */ { int32 va = (int32)bignum_digits(a)[0], vb = (int32)bignum_digits(b)[0], vc; unsigned32 vclow; if (va < 0) { if (vb < 0) Dmultiply(vc, vclow, -va, -vb, 0); else { Dmultiply(vc, vclow, -va, vb, 0); if (vclow == 0) vc = -vc; else { vclow = clear_top_bit(-(int32)vclow); vc = ~vc; } } } else if (vb < 0) { Dmultiply(vc, vclow, va, -vb, 0); if (vclow == 0) vc = -vc; else { vclow = clear_top_bit(-(int32)vclow); vc = ~vc; } } else Dmultiply(vc, vclow, va, vb, 0); return make_two_word_bignum(vc, vclow); } /* * I take the absolute values of the two input values a and b, * recording what the eventual sign for the product will need to be. */ if (((int32)bignum_digits(a)[lena-1]) < 0) { sign = -sign; push(b); /* * Negating a negative bignum can sometimes mean that it will * have to get longer (because of the twos complement assymmetry), * but can never cause it to shrink, In particular it can never lead * to demotion to a fixnum, so after this call to negateb it is still * OK to assume that a is a bignum. */ a = negateb(a); pop(b); errexit(); lena = (bignum_length(a) >> 2) - 1; } if (((int32)bignum_digits(b)[lenb-1]) < 0) { sign = -sign; push(a); /* see above comments about negateb */ b = negateb(b); pop(a); errexit(); lenb = (bignum_length(b) >> 2) - 1; } if (lenb < lena) /* Commute so that b is at least as long as a */ { c = a; a = b; b = c; lenc = lena; lena = lenb; lenb = lenc; } push2(a, b); /* * For very big numbers I have two special actions called for here. First * I must round up the size of the result vector to have a big enough power * of two as a factor so that (recursive) splitting in two does not cause * trouble later. Then I have to allocate some workspace, the size of that * being related to the size of the numbers being handled. */ if (lena > KARATSUBA_CUTOFF) { int k = 0; /* * I pad lenc up to have a suitably large power of 2 as a factor so * that splitting numbers in half works neatly for me. */ lenc = (lenb+1)/2; /* rounded up half-length of longer number */ while (lenc > KARATSUBA_CUTOFF) { lenc = (lenc + 1)/2; k++; } while (k != 0) { lenc = 2*lenc; k--; } lenc = 2*lenc; c = getvector(TAG_NUMBERS, TYPE_BIGNUM, (1 + 2*lenc) << 2); errexitn(2); /* * The next line seems pretty shameless, but it may reduce the amount of * garbage collection I do. When the workspace vector needed is short enough * (at present up to 256 bytes) I use the character assembly buffer (boffo) * as my workspace, relying on an expectation that bignumber multiplication * can never be triggered from places within the reader where that buffer is * in use for its normal purpose. I forge tag bits to make boffo look like * a number here, but can never trigger garbage collection in a way that * might inspect same, so that too is safe at present. */ if (((1 + lenc) << 2) <= (int32)length_of_header(vechdr(boffo))) d = (Lisp_Object)((char *)boffo + TAG_NUMBERS - TAG_VECTOR); else { push(c); d = getvector(TAG_NUMBERS, TYPE_BIGNUM, (1 + lenc) << 2); pop(c); } lenc = 2*lenc; } else { /* * In cases where I will use classical long multiplication there is no * need to waste space with extra padding or with the workspace vector d. */ lenc = lena + lenb; c = getvector(TAG_NUMBERS, TYPE_BIGNUM, (1 + lenc) << 2); d = c; /* set d to avoid dataflow anomaly */ } pop2(b, a); errexit(); { unsigned32 *da = &bignum_digits(a)[0], *db = &bignum_digits(b)[0], *dc = &bignum_digits(c)[0], *dd = &bignum_digits(d)[0]; long_times(dc, da, db, dd, lena, lenb, lenc); } /* * Here the absolute value of the product has been computed properly. * The result can easily have a zero top digit, which will need trimming * off. If at least one of the input values was a number which had to * be represented with a zero leading digit (e.g. 0x40000000) then the * product can have two leading zero digits here. Similarly for negative * results. Also padding (to allow splitting numbers into two) can have * resulted in extra padding up at the top. lenc gives the size of vector * that was allocated, lena+lenb is a much better guess of how much data * is active in it. */ errexit(); { int32 newlenc = lena + lenb; /* * I tidy up by putting a zero in any padding word above the top of the * active data, and by inserting a header in space that gets trimmed off * in such a way that the garbage collector will not get upset. This * all just roughly trims the numbers - fine adjustment follows. */ if ((newlenc & 1) == 0) { bignum_digits(c)[newlenc] = 0; if (lenc != newlenc) /* i.e. I padded out somewhat */ { bignum_digits(c)[newlenc+1] = make_bighdr(lenc-newlenc); lenc = newlenc; numhdr(c) = make_bighdr(lenc+1); } } else if (lenc != newlenc) /* i.e. I padded out somewhat */ { bignum_digits(c)[newlenc] = make_bighdr(lenc-newlenc+1); lenc = newlenc; numhdr(c) = make_bighdr(lenc+1); } } /* * Now I am safe against the garbage collector, and the number c has as * its length just lena+lenb, even if it had been padded out earlier. */ if (sign < 0) { unsigned32 carry = 0xffffffffU; for (i=0; i<lenc-1; i++) { carry = clear_top_bit(~bignum_digits(c)[i]) + top_bit(carry); bignum_digits(c)[i] = clear_top_bit(carry); } carry = ~bignum_digits(c)[i] + top_bit(carry); if (carry != -1) { bignum_digits(c)[i] = carry; return c; /* no truncation needed */ } carry = bignum_digits(c)[i-1]; if ((carry & 0x40000000) == 0) { bignum_digits(c)[i] = 0xffffffffU; return c; /* no truncation becase of previous digit */ } /* * I need to argue that lenc was at least 2, so bignum_digits(c)[i-2] * could at worst access the header word of the bignum - but it can never * do that because if it were doing so then the bignum product would * be about to have a value zero or thereabouts. One-word bignums are not * allowed to have leading zero digits. */ if (carry == 0x7fffffff && (bignum_digits(c)[i-2] & 0x40000000) != 0) /* chop 2 */ { bignum_digits(c)[i-2] |= ~0x7fffffff; /* * I common up the code to chop off two words from the number at label "chop2" */ goto chop2; } bignum_digits(c)[i-1] |= ~0x7fffffff; /* Drop through to truncate by 1 and sometimes that is easy */ } else { unsigned32 w = bignum_digits(c)[lenc-1]; if (w != 0) return c; /* no truncation */ w = bignum_digits(c)[lenc-2]; if ((w & 0x40000000) != 0) return c; if (w == 0 && (bignum_digits(c)[lenc-3] & 0x40000000) == 0) goto chop2; /* truncate one word */ } /* * here the data in the bignum is all correct (even in the most significant * digit) but I need to shrink the number by one word. Because of all the * doubleword alignment that is used here this can sometimes be done very * easily, and other times it involves forging a short bit of dummy data * to fill in a gap that gets left in the heap. */ numhdr(c) -= pack_hdrlength(1); if ((lenc & 1) != 0) bignum_digits(c)[lenc-1] = 0; /* tidy up */ else bignum_digits(c)[lenc-1] = make_bighdr(2); return c; chop2: /* * Trim two words from the number c */ numhdr(c) -= pack_hdrlength(2); lenc -= 2; bignum_digits(c)[lenc] = 0; lenc |= 1; bignum_digits(c)[lenc] = make_bighdr(2); return c; } #ifdef COMMON #define timesbr(a, b) timesir(a, b) #define timesbc(a, b) timesic(a, b) #endif #define timesbf(a, b) timessf(a, b) #ifdef COMMON #define timesri(a, b) timesir(b, a) #define timesrs(a, b) timessr(b, a) #define timesrb(a, b) timesbr(b, a) static Lisp_Object timesrr(Lisp_Object a, Lisp_Object b) /* * multiply a pair of rational numbers */ { Lisp_Object nil = C_nil; Lisp_Object w = nil; push5(numerator(a), denominator(a), numerator(b), denominator(b), nil); #define g stack[0] #define db stack[-1] #define nb stack[-2] #define da stack[-3] #define na stack[-4] g = gcd(na, db); nil = C_nil; if (exception_pending()) goto fail; na = quot2(na, g); nil = C_nil; if (exception_pending()) goto fail; db = quot2(db, g); nil = C_nil; if (exception_pending()) goto fail; g = gcd(nb, da); nil = C_nil; if (exception_pending()) goto fail; nb = quot2(nb, g); nil = C_nil; if (exception_pending()) goto fail; da = quot2(da, g); nil = C_nil; if (exception_pending()) goto fail; na = times2(na, nb); nil = C_nil; if (exception_pending()) goto fail; da = times2(da, db); nil = C_nil; if (exception_pending()) goto fail; w = make_ratio(na, da); fail: popv(5); return w; #undef g #undef db #undef nb #undef da #undef na } #define timesrc(a, b) timesic(a, b) #define timesrf(a, b) timessf(a, b) #define timesci(a, b) timesic(b, a) #define timescs(a, b) timessc(b, a) #define timescb(a, b) timesbc(b, a) #define timescr(a, b) timesrc(b, a) static Lisp_Object timescc(Lisp_Object a, Lisp_Object b) /* * multiply a pair of complex values */ { Lisp_Object nil = C_nil; Lisp_Object w = nil; push4(real_part(a), imag_part(a), real_part(b), imag_part(b)); push2(nil, nil); #define u stack[0] #define v stack[-1] #define ib stack[-2] #define rb stack[-3] #define ia stack[-4] #define ra stack[-5] u = times2(ra, rb); nil = C_nil; if (exception_pending()) goto fail; v = times2(ia, ib); nil = C_nil; if (exception_pending()) goto fail; v = negate(v); nil = C_nil; if (exception_pending()) goto fail; u = plus2(u, v); /* real part of result */ nil = C_nil; if (exception_pending()) goto fail; v = times2(ra, ib); nil = C_nil; if (exception_pending()) goto fail; ib = times2(rb, ia); nil = C_nil; if (exception_pending()) goto fail; v = plus2(v, ib); /* imaginary part */ nil = C_nil; if (exception_pending()) goto fail; w = make_complex(u, v); fail: popv(6); return w; #undef u #undef v #undef ib #undef rb #undef ia #undef ra } #define timescf(a, b) timesci(a, b) #endif #define timesfi(a, b) timesif(b, a) #ifdef COMMON #define timesfs(a, b) timessf(b, a) #endif #define timesfb(a, b) timesbf(b, a) #ifdef COMMON #define timesfr(a, b) timesrf(b, a) #define timesfc(a, b) timescf(b, a) #endif static Lisp_Object timesff(Lisp_Object a, Lisp_Object b) /* * multiply boxed floats - see commentary on plusff() */ { #ifdef COMMON int32 ha = type_of_header(flthdr(a)), hb = type_of_header(flthdr(b)); #endif double d = float_of_number(a) * float_of_number(b); #ifdef COMMON if (ha == TYPE_LONG_FLOAT || hb == TYPE_LONG_FLOAT) ha = TYPE_LONG_FLOAT; else if (ha == TYPE_DOUBLE_FLOAT || hb == TYPE_DOUBLE_FLOAT) ha = TYPE_DOUBLE_FLOAT; else ha = TYPE_SINGLE_FLOAT; return make_boxfloat(d, ha); #else return make_boxfloat(d, TYPE_DOUBLE_FLOAT); #endif } /* * ... and now for the dispatch code that copes with general * multiplication. */ Lisp_Object times2(Lisp_Object a, Lisp_Object b) { switch ((int)a & TAG_BITS) { case TAG_FIXNUM: switch ((int)b & TAG_BITS) { case TAG_FIXNUM: return timesii(a, b); #ifdef COMMON case TAG_SFLOAT: return timesis(a, b); #endif case TAG_NUMBERS: { int32 hb = type_of_header(numhdr(b)); switch (hb) { case TYPE_BIGNUM: return timesib(a, b); #ifdef COMMON case TYPE_RATNUM: return timesir(a, b); case TYPE_COMPLEX_NUM: return timesic(a, b); #endif default: return aerror1("bad arg for times", b); } } case TAG_BOXFLOAT: return timesif(a, b); default: return aerror1("bad arg for times", b); } #ifdef COMMON case TAG_SFLOAT: switch (b & TAG_BITS) { case TAG_FIXNUM: return timessi(a, b); case TAG_SFLOAT: { Float_union aa, bb; /* timesss() coded in-line */ aa.i = a - TAG_SFLOAT; bb.i = b - TAG_SFLOAT; aa.f = (float) (aa.f * bb.f); return (aa.i & ~(int32)0xf) + TAG_SFLOAT; } case TAG_NUMBERS: { int32 hb = type_of_header(numhdr(b)); switch (hb) { case TYPE_BIGNUM: return timessb(a, b); case TYPE_RATNUM: return timessr(a, b); case TYPE_COMPLEX_NUM: return timessc(a, b); default: return aerror1("bad arg for times", b); } } case TAG_BOXFLOAT: return timessf(a, b); default: return aerror1("bad arg for times", b); } #endif case TAG_NUMBERS: { int32 ha = type_of_header(numhdr(a)); switch (ha) { case TYPE_BIGNUM: switch ((int)b & TAG_BITS) { case TAG_FIXNUM: return timesbi(a, b); #ifdef COMMON case TAG_SFLOAT: return timesbs(a, b); #endif case TAG_NUMBERS: { int32 hb = type_of_header(numhdr(b)); switch (hb) { case TYPE_BIGNUM: return timesbb(a, b); #ifdef COMMON case TYPE_RATNUM: return timesbr(a, b); case TYPE_COMPLEX_NUM: return timesbc(a, b); #endif default: return aerror1("bad arg for times", b); } } case TAG_BOXFLOAT: return timesbf(a, b); default: return aerror1("bad arg for times", b); } #ifdef COMMON case TYPE_RATNUM: switch (b & TAG_BITS) { case TAG_FIXNUM: return timesri(a, b); case TAG_SFLOAT: return timesrs(a, b); case TAG_NUMBERS: { int32 hb = type_of_header(numhdr(b)); switch (hb) { case TYPE_BIGNUM: return timesrb(a, b); case TYPE_RATNUM: return timesrr(a, b); case TYPE_COMPLEX_NUM: return timesrc(a, b); default: return aerror1("bad arg for times", b); } } case TAG_BOXFLOAT: return timesrf(a, b); default: return aerror1("bad arg for times", b); } case TYPE_COMPLEX_NUM: switch (b & TAG_BITS) { case TAG_FIXNUM: return timesci(a, b); case TAG_SFLOAT: return timescs(a, b); case TAG_NUMBERS: { int32 hb = type_of_header(numhdr(b)); switch (hb) { case TYPE_BIGNUM: return timescb(a, b); case TYPE_RATNUM: return timescr(a, b); case TYPE_COMPLEX_NUM: return timescc(a, b); default: return aerror1("bad arg for times", b); } } case TAG_BOXFLOAT: return timescf(a, b); default: return aerror1("bad arg for times", b); } #endif default: return aerror1("bad arg for times", a); } } case TAG_BOXFLOAT: switch ((int)b & TAG_BITS) { case TAG_FIXNUM: return timesfi(a, b); #ifdef COMMON case TAG_SFLOAT: return timesfs(a, b); #endif case TAG_NUMBERS: { int32 hb = type_of_header(numhdr(b)); switch (hb) { case TYPE_BIGNUM: return timesfb(a, b); #ifdef COMMON case TYPE_RATNUM: return timesfr(a, b); case TYPE_COMPLEX_NUM: return timesfc(a, b); #endif default: return aerror1("bad arg for times", b); } } case TAG_BOXFLOAT: return timesff(a, b); default: return aerror1("bad arg for times", b); } default: return aerror1("bad arg for times", a); } } /* end of arith02.c */