/* arith02.c Copyright (C) 1990-2002 Codemist Ltd */ /* * Arithmetic functions. * Multiplication of generic numbers * and in particular a lot of bignum support. * */ /* * This code may be used and modified, and redistributed in binary * or source form, subject to the "CCL Public License", which should * accompany it. This license is a variant on the BSD license, and thus * permits use of code derived from this in either open and commercial * projects: but it does require that updates to this code be made * available back to the originators of the package. * Before merging other code in with this or linking this code * with other packages or libraries please check that the license terms * of the other material are compatible with those of this. */ /* Signature: 1884c4f0 08-Apr-2002 */ #include #include #include #include #ifdef __WATCOMC__ #include #endif #include "machine.h" #include "tags.h" #include "cslerror.h" #include "externs.h" #include "arith.h" #ifdef TIMEOUT #include "timeout.h" #endif /* * Now for multiplication */ /* * I provide symbols IMULTIPLY and IDIVIDE which can be asserted if the * corresponding routines have been provided elsewhere (e.g. in machine * code for extra speed) */ #ifndef IMULTIPLY #ifdef MULDIV64 /* * If MULDIV64 is asserted this function in not in fact needed * since a macro in arith.h arranges that the multiplication is done * in-line. However this version is left here in the source code as * convenient documentation of what the function needs to do. */ unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c) /* * (result, *rlow) = a*b + c as 62-bit product * * rlow may well be the same as one of a, b or c so the * assignment to *rlow must not be done until the calculation is * complete. Inputs a and b are 31-bit values (i.e. they have * their top bit zero and are to be treated as positive value. * c may use all 32 bits, but again is treated as unsigned. * The result is computed and the low 31 bits placed in rlow, * (the top bit of rlow will end up zero) and the top part * of the result is returned. */ { /* NB the value r cound be int64 or unsigned64 - it does not matter */ unsigned64 r = (unsigned64)a*(unsigned64)b + (unsigned64)c; *rlow = (unsigned32)(r & 0x7fffffff); return (unsigned32)(r >> 31); } #else #ifdef OLDER_VERSION unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c) /* * (result, *rlow) = a*b + c as 62-bit product * * The code given here forms the produce by doing four single-precision * (16*16->32) multiplies and using shifts etc to glue the partial results * together. */ { unsigned32 ah = a >> 16, bh = b >> 16, ch = c >> 16; unsigned32 w0, w1, w2, w3, w4; a -= ah << 16; b -= bh << 16; c -= ch << 16; /* a, b and c now split into high/low parts */ w0 = a*b; /* One... */ w1 = w0 >> 16; w0 = w0 - (w1 << 16) + c; w1 += ch; w2 = a*bh; /* Two... */ w3 = w2 >> 16; w1 += w2 - (w3 << 16); w2 = ah*b; /* Three... */ w4 = w2 >> 16; w1 += w2 - (w4 << 16); w2 = w0 >> 16; w1 += w2; w0 -= w2 << 16; w2 = ah*bh + w3 + w4; /* Four 16-bit multiplies done in all. */ w2 += w1 >> 16; /* Here I do a minor shift to split the result at bit 31 rather than 32 */ *rlow = w0 + ((w1 & 0x7fff) << 16); return (w2<<1) + ((w1>>15) & 1); } #else unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c) /* * (result, *rlow) = a*b + c as 62-bit product * * The code given here forms the produce by doing three single-precision * (16*16->32) multiplies and using shifts etc to glue the partial results * together. This is slightly faster than the above (maybe simpler?) code * on at least some machines. */ { unsigned32 ah, bh; unsigned32 w0, w1, w2; ah = a >> 16; /* * On some machines I know that multi-bit shifts are especially painful, * while on others it is nasty to access the literal value needed as a * mask here. Hence I make some show of providing an alternative. */ #ifdef FAST_SHIFTS a -= ah << 16; #else a &= 0xffff; #endif bh = b >> 16; #ifdef FAST_SHIFTS b -= bh << 16; #else b &= 0xffff; #endif /* * At present I can not see any way of issuing any of these multiplies * any earlier, or of doing useful work between the times that I launch * them, or even delaying before I use their results. This will be rather * sad on machines where multiplication could overlap with other operations. */ w2 = ah*bh; w1 = (a + ah)*(b + bh); w0 = a*b; /* * The largest exact result that can be computed by the next line (given * that a and b start off just 31 bit) is 0xfffd0002 */ w1 = (w1 - w2) - w0; /* == (a*bh + b*ah) */ /* * I split into 30 bits in the lower word and 32 in the upper, so I have * 2 bits available for temporary carry effects in the lower part. */ w2 = (w2 << 2) + (w1 >> 14) + (w0 >> 30) + (c >> 30); w1 &= 0x3fff; w0 = (c & 0x3fffffff) + (w0 & 0x3fffffffU) + (w1 << 16); w0 += ((w2 & 1) << 30); w2 = (w2 >> 1) + (w0 >> 31); *rlow = w0 & 0x7fffffffU; return w2; } #endif #endif #endif /* IMULTIPLY */ static Lisp_Object timesii(Lisp_Object a, Lisp_Object b) /* * multiplying two fixnums together is much messier than adding them, * mainly because the result can easily be a two-word bignum */ { unsigned32 aa = (unsigned32)int_of_fixnum(a), bb = (unsigned32)int_of_fixnum(b); unsigned32 temp, low, high; /* * Multiplication by 0 or by -1 is just possibly common enough to be worth * filtering out the following special cases. Avoidance of the tedious * checks for overflow may make this useful even if Imultiply is very fast. */ if (aa <= 1) { if (aa == 0) return fixnum_of_int(0); else return b; } else if (bb <= 1) { if (bb == 0) return fixnum_of_int(0); else return a; } /* * I dump the low part of the product in temp then copy to a variable low * because temp has to have its address taken and so is not a candidate for * living in a register. */ Dmultiply(high, temp, clear_top_bit(aa), clear_top_bit(bb), 0); /* * The result handed back here has only 31 bits active in its low part. */ low = temp; /* * The next two lines convert the unsigned product produced by Imultiply * into a signed product. */ if ((int32)aa < 0) high -= bb; if ((int32)bb < 0) high -= aa; if ((high & 0x40000000) == 0) high = clear_top_bit(high); else high = set_top_bit(high); if (high == 0 && (low & 0x40000000) == 0) { /* one word positive result */ if (low <= 0x07ffffff) return fixnum_of_int(low); else return make_one_word_bignum(low); } else if (high == -1 && (low & 0x40000000) != 0) { /* one word negative result */ low = set_top_bit(low); if ((low & fix_mask) == fix_mask) return fixnum_of_int(low); else return make_one_word_bignum(low); } else return make_two_word_bignum(high, low); } #ifdef COMMON static Lisp_Object timesis(Lisp_Object a, Lisp_Object b) { Float_union bb; bb.i = b - TAG_SFLOAT; bb.f = (float) ((float)int_of_fixnum(a) * bb.f); return (bb.i & ~(int32)0xf) + TAG_SFLOAT; } #endif static Lisp_Object timesib(Lisp_Object a, Lisp_Object b) { int32 aa = int_of_fixnum(a), lenb, i; unsigned32 carry, ms_dig, w; Lisp_Object c, nil; /* * I will split off the (easy) cases of the fixnum being -1, 0 or 1. */ if (aa == 0) return fixnum_of_int(0); else if (aa == 1) return b; else if (aa == -1) return negateb(b); lenb = bignum_length(b); push(b); c = getvector(TAG_NUMBERS, TYPE_BIGNUM, lenb); pop(b); errexit(); lenb = (lenb-CELL)/4; if (aa < 0) { aa = -aa; carry = 0xffffffffU; for (i=0; i> 1; goto extend_by_one_word; } if ((carry & 0x40000000) != 0) carry = set_top_bit(carry); bignum_digits(c)[i] = carry; } else { for (i=0; i 0, and a fortiori the 0x80000000 bit of aa is clear, * so I do not have to worry about the difference between 31 and 32 * bit values for aa. */ for (i=0; i> 31; } for (;carrya!=0;i++) { unsigned32 w = c[i] + 1; c[i] = clear_top_bit(w); carrya = w >> 31; } return; } /* * form (a1+a2) and (b1+b2). */ carrya = 0; for (i=0; i> 31; } carryb = 0; for (i=0; i> 31; } long_times(c+h, d, d+h, c, h, h, 2*h); /* * Adjust to allow for the cases of a1+a2 or b1+b2 overflowing * by a single bit. */ c[3*h] = carrya & carryb; if (carrya != 0) { carrya = 0; for (i=0; i> 31; } } if (carryb != 0) { carryb = 0; for (i=0; i> 31; } } c[3*h] += carrya + carryb; for (i=1; i> 31; } carrya = 0; for (i=0; i<2*h; i++) { unsigned32 w = c[h+i] - d[i] - carrya; c[h+i] = clear_top_bit(w); carrya = w >> 31; } for (; carrya!=0 && i<3*h; i++) { unsigned32 w = c[h+i] - 1; c[h+i] = clear_top_bit(w); carrya = w >> 31; } /* * multiply out a2*b2 */ long_times(d, a, b, c, h, h, 2*h); for (i=0; i> 31; } for (; carrya!=0 && i<4*h; i++) { unsigned32 w = c[i] + 1; c[i] = clear_top_bit(w); carrya = w >> 31; } carrya = 0; for (i=0; i<2*h; i++) { unsigned32 w = c[h+i] - d[i] - carrya; c[h+i] = clear_top_bit(w); carrya = w >> 31; } for (; carrya!=0 && i<3*h; i++) { unsigned32 w = c[h+i] - 1; c[h+i] = clear_top_bit(w); carrya = w >> 31; } /* * The product is now complete */ } static void long_times2(unsigned32 *c, unsigned32 *a, unsigned32 *b, int32 lena, int32 lenb, int32 lenc) /* * This case is standard old fashioned long multiplication. Dump the * result into c. */ { int32 i; for (i=0; i KARATSUBA_CUTOFF) { newlenc = (newlenc + 1)/2; k++; } while (k != 0) { newlenc = 2*newlenc; k--; } newlenc = 4*newlenc; while (lenc > newlenc) c[--lenc] = 0; } if (lena > KARATSUBA_CUTOFF) long_times1(c, a, b, d, lena, lenb, lenc); else long_times2(c, a, b, lena, lenb, lenc); } static Lisp_Object timesbb(Lisp_Object a, Lisp_Object b) /* * a and b are both guaranteed to be bignums when I call this * procedure. */ { int sign = 1; Lisp_Object c, d, nil; int32 lena, lenb, lenc, i; lena = (bignum_length(a) - CELL)/4; lenb = (bignum_length(b) - CELL)/4; if (lena == 1 && lenb == 1) /* * I am going to deem multiplication of two one-word bignums worthy of * a special case, since it is probably fairly common and it will be cheap * enough that avoiding overheads might matter. I still need to split * off the signs, because Imultiply can only deal with 31-bit unsigned values. * One-word bignums each have value at least 2^27 (or else they would have * been represented as fixnums) so the result here will always be a two-word * bignum. */ { int32 va = (int32)bignum_digits(a)[0], vb = (int32)bignum_digits(b)[0], vc; unsigned32 vclow; if (va < 0) { if (vb < 0) Dmultiply(vc, vclow, -va, -vb, 0); else { Dmultiply(vc, vclow, -va, vb, 0); if (vclow == 0) vc = -vc; else { vclow = clear_top_bit(-(int32)vclow); vc = ~vc; } } } else if (vb < 0) { Dmultiply(vc, vclow, va, -vb, 0); if (vclow == 0) vc = -vc; else { vclow = clear_top_bit(-(int32)vclow); vc = ~vc; } } else Dmultiply(vc, vclow, va, vb, 0); return make_two_word_bignum(vc, vclow); } /* * I take the absolute values of the two input values a and b, * recording what the eventual sign for the product will need to be. */ if (((int32)bignum_digits(a)[lena-1]) < 0) { sign = -sign; push(b); /* * Negating a negative bignum can sometimes mean that it will * have to get longer (because of the twos complement assymmetry), * but can never cause it to shrink, In particular it can never lead * to demotion to a fixnum, so after this call to negateb it is still * OK to assume that a is a bignum. */ a = negateb(a); pop(b); errexit(); lena = (bignum_length(a) - CELL)/4; } if (((int32)bignum_digits(b)[lenb-1]) < 0) { sign = -sign; push(a); /* see above comments about negateb */ b = negateb(b); pop(a); errexit(); lenb = (bignum_length(b) - CELL)/4; } if (lenb < lena) /* Commute so that b is at least as long as a */ { c = a; a = b; b = c; lenc = lena; lena = lenb; lenb = lenc; } push2(a, b); /* * For very big numbers I have two special actions called for here. First * I must round up the size of the result vector to have a big enough power * of two as a factor so that (recursive) splitting in two does not cause * trouble later. Then I have to allocate some workspace, the size of that * being related to the size of the numbers being handled. */ if (lena > KARATSUBA_CUTOFF) { int k = 0; /* * I pad lenc up to have a suitably large power of 2 as a factor so * that splitting numbers in half works neatly for me. */ lenc = (lenb+1)/2; /* rounded up half-length of longer number */ while (lenc > KARATSUBA_CUTOFF) { lenc = (lenc + 1)/2; k++; } while (k != 0) { lenc = 2*lenc; k--; } lenc = 2*lenc; c = getvector(TAG_NUMBERS, TYPE_BIGNUM, CELL+8*lenc); errexitn(2); /* * The next line seems pretty shameless, but it may reduce the amount of * garbage collection I do. When the workspace vector needed is short enough * (at present up to 256 bytes) I use the character assembly buffer (boffo) * as my workspace, relying on an expectation that bignumber multiplication * can never be triggered from places within the reader where that buffer is * in use for its normal purpose. I forge tag bits to make boffo look like * a number here, but can never trigger garbage collection in a way that * might inspect same, so that too is safe at present. */ if ((4*lenc+CELL) <= (intxx)length_of_header(vechdr(boffo))) d = (Lisp_Object)((char *)boffo + TAG_NUMBERS - TAG_VECTOR); else { push(c); d = getvector(TAG_NUMBERS, TYPE_BIGNUM, CELL+4*lenc); pop(c); } lenc = 2*lenc; } else { /* * In cases where I will use classical long multiplication there is no * need to waste space with extra padding or with the workspace vector d. */ lenc = lena + lenb; c = getvector(TAG_NUMBERS, TYPE_BIGNUM, CELL+4*lenc); d = c; /* set d to avoid dataflow anomaly */ } pop2(b, a); errexit(); { unsigned32 *da = &bignum_digits(a)[0], *db = &bignum_digits(b)[0], *dc = &bignum_digits(c)[0], *dd = &bignum_digits(d)[0]; long_times(dc, da, db, dd, lena, lenb, lenc); } /* * Here the absolute value of the product has been computed properly. * The result can easily have a zero top digit, which will need trimming * off. If at least one of the input values was a number which had to * be represented with a zero leading digit (e.g. 0x40000000) then the * product can have two leading zero digits here. Similarly for negative * results. Also padding (to allow splitting numbers into two) can have * resulted in extra padding up at the top. lenc gives the size of vector * that was allocated, lena+lenb is a much better guess of how much data * is active in it. */ errexit(); { int32 newlenc = lena + lenb; /* * I tidy up by putting a zero in any padding word above the top of the * active data, and by inserting a header in space that gets trimmed off * in such a way that the garbage collector will not get upset. This * all just roughly trims the numbers - fine adjustment follows. */ #ifdef ADDRESS_64 if ((newlenc & 1) != 0) #else if ((newlenc & 1) == 0) #endif { bignum_digits(c)[newlenc] = 0; if (lenc != newlenc) /* i.e. I padded out somewhat */ { *(Header *)&bignum_digits(c)[newlenc+1] = make_bighdr(lenc-newlenc-1); lenc = newlenc; numhdr(c) = make_bighdr(lenc+CELL/4); } } else if (lenc != newlenc) /* i.e. I padded out somewhat */ { *(Header *)&bignum_digits(c)[newlenc] = make_bighdr(lenc-newlenc); lenc = newlenc; numhdr(c) = make_bighdr(lenc+CELL/4); } } /* * Now I am safe against the garbage collector, and the number c has as * its length just lena+lenb, even if it had been padded out earlier. */ if (sign < 0) { unsigned32 carry = 0xffffffffU; for (i=0; i