Long integer representation.The absolute value of a number is equal to SUM(for i=0 through abs(ob_size)-1) ob_digit[i] 2**(SHIFTi) Negative numbers are represented with ob_size < 0; zero is represented by ob_size == 0. In a normalized number, ob_digit[abs(ob_size)-1] (the most significant digit) is never zero. Also, in all cases, for all valid i,0 <= ob_digit[i] <= MASK. The allocation function takes care of allocating extra memory so that ob_digit[0] … ob_digit[abs(ob_size)-1] are actually available. CAUTION: Generic code manipulating subtypes of PyVarObject has to aware that ints abuse ob_size’s sign bit.
/* (ah*X+al)(bh*X+bl) = ah*bh*X*X + (ah*bl + al*bh)*X + al*bl * Let k = (ah+al)*(bh+bl) = ah*bl + al*bh + ah*bh + al*bl * Then the original product is * ah*bh*X*X + (k - ah*bh - al*bl)*X + al*bl * By picking X to be a power of 2, "*X" is just shifting, and it's * been reduced to 3 multiplies on numbers half the size. */
/* We want to split based on the larger number; fiddle so that b * is largest. */ if (asize > bsize) { t1 = a; a = b; b = t1;
i = asize; asize = bsize; bsize = i; }
/* Use gradeschool math when either number is too small. */ i = a == b ? KARATSUBA_SQUARE_CUTOFF : KARATSUBA_CUTOFF; if (asize <= i) { if (asize == 0) return (PyLongObject *)PyLong_FromLong(0); else return x_mul(a, b); }
/* If a is small compared to b, splitting on b gives a degenerate * case with ah==0, and Karatsuba may be (even much) less efficient * than "grade school" then. However, we can still win, by viewing * b as a string of "big digits", each of width a->ob_size. That * leads to a sequence of balanced calls to k_mul. */ if (2 * asize <= bsize) return k_lopsided_mul(a, b);
/* Split a & b into hi & lo pieces. */ shift = bsize >> 1; if (kmul_split(a, shift, &ah, &al) < 0) goto fail; assert(Py_SIZE(ah) > 0); /* the split isn't degenerate */
if (a == b) { bh = ah; bl = al; Py_INCREF(bh); Py_INCREF(bl); } elseif (kmul_split(b, shift, &bh, &bl) < 0) goto fail;
/* The plan: * 1. Allocate result space (asize + bsize digits: that's always * enough). * 2. Compute ah*bh, and copy into result at 2*shift. * 3. Compute al*bl, and copy into result at 0. Note that this * can't overlap with #2. * 4. Subtract al*bl from the result, starting at shift. This may * underflow (borrow out of the high digit), but we don't care: * we're effectively doing unsigned arithmetic mod * BASE**(sizea + sizeb), and so long as the *final* result fits, * borrows and carries out of the high digit can be ignored. * 5. Subtract ah*bh from the result, starting at shift. * 6. Compute (ah+al)*(bh+bl), and add it into the result starting * at shift. */
/* 1. Allocate result space. */ ret = _PyLong_New(asize + bsize); if (ret == NULL) goto fail; #ifdef Py_DEBUG /* Fill with trash, to catch reference to uninitialized digits. */ memset(ret->ob_digit, 0xDF, Py_SIZE(ret) * sizeof(digit)); #endif
/* 2. t1 <- ah*bh, and copy into high digits of result. */ if ((t1 = k_mul(ah, bh)) == NULL) goto fail; assert(Py_SIZE(t1) >= 0); assert(2*shift + Py_SIZE(t1) <= Py_SIZE(ret)); memcpy(ret->ob_digit + 2*shift, t1->ob_digit, Py_SIZE(t1) * sizeof(digit));
/* Zero-out the digits higher than the ah*bh copy. */ i = Py_SIZE(ret) - 2*shift - Py_SIZE(t1); if (i) memset(ret->ob_digit + 2*shift + Py_SIZE(t1), 0, i * sizeof(digit));
/* 3. t2 <- al*bl, and copy into the low digits. */ if ((t2 = k_mul(al, bl)) == NULL) { Py_DECREF(t1); goto fail; } assert(Py_SIZE(t2) >= 0); assert(Py_SIZE(t2) <= 2*shift); /* no overlap with high digits */ memcpy(ret->ob_digit, t2->ob_digit, Py_SIZE(t2) * sizeof(digit));
/* Zero out remaining digits. */ i = 2*shift - Py_SIZE(t2); /* number of uninitialized digits */ if (i) memset(ret->ob_digit + Py_SIZE(t2), 0, i * sizeof(digit));
/* 4 & 5. Subtract ah*bh (t1) and al*bl (t2). We do al*bl first * because it's fresher in cache. */ i = Py_SIZE(ret) - shift; /* # digits after shift */ (void)v_isub(ret->ob_digit + shift, i, t2->ob_digit, Py_SIZE(t2)); Py_DECREF(t2);
(void)v_isub(ret->ob_digit + shift, i, t1->ob_digit, Py_SIZE(t1)); Py_DECREF(t1);
/* 6. t3 <- (ah+al)(bh+bl), and add into result. */ if ((t1 = x_add(ah, al)) == NULL) goto fail; Py_DECREF(ah); Py_DECREF(al); ah = al = NULL;
if (a == b) { t2 = t1; Py_INCREF(t2); } elseif ((t2 = x_add(bh, bl)) == NULL) { Py_DECREF(t1); goto fail; } Py_DECREF(bh); Py_DECREF(bl); bh = bl = NULL;
t3 = k_mul(t1, t2); Py_DECREF(t1); Py_DECREF(t2); if (t3 == NULL) goto fail; assert(Py_SIZE(t3) >= 0);
/* Add t3. It's not obvious why we can't run out of room here. * See the (*) comment after this function. */ (void)v_iadd(ret->ob_digit + shift, i, t3->ob_digit, Py_SIZE(t3)); Py_DECREF(t3);
/* fast path for single-digit multiplication */ if (Py_ABS(Py_SIZE(a)) <= 1 && Py_ABS(Py_SIZE(b)) <= 1) { stwodigits v = (stwodigits)(MEDIUM_VALUE(a)) * MEDIUM_VALUE(b); return PyLong_FromLongLong((longlong)v); }
z = k_mul(a, b); /* Negate if exactly one of the inputs is negative. */ if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) { _PyLong_Negate(&z); if (z == NULL) returnNULL; } return (PyObject *)z; }
在这里我们看到,当两个数皆小于 2^30-1 时,Python 将直接使用普通乘法并返回,否则将使用 K 算法进行计算