| Ilya Zakharevich on Sun, 2 Sep 2001 11:45:29 -0400 |
[Date Prev] [Date Next] [Thread Prev] [Thread Next] [Date Index] [Thread Index]
| [PATCH 2.1.1] 15x speedup of prime tables |
The following patch makes calculations of primes up to 100m (or
435000000) 15x times quickier on UltraSparcs with 256K of cache. I do
not have x86 with 256K level=2 cache to check the effects on x86 - but
they should be quite similar.
This was suggested by a discussion on s.m.research. Results: up to
100m in 1.7s, up to 435m in 7.9s (same as before with >4M caches, and
better that before with 4M cache).
Enjoy,
Ilya
--- ./src/basemath/arith2.c~ Mon Jan 15 14:21:55 2001
+++ ./src/basemath/arith2.c Sun Sep 2 11:33:29 2001
@@ -100,7 +100,7 @@ initprimes1(long size, long *lenp, long
return (byteptr) gprealloc(p,r-p,size + 2);
}
-#define PRIME_ARENA (512 * 1024)
+#define PRIME_ARENA (200 * 1024) /* No slowdown even with 256K level-2 cache */
/* Here's the workhorse. This is recursive, although normally the first
recursive call will bottom out and invoke initprimes1() at once.
@@ -108,7 +108,7 @@ initprimes1(long size, long *lenp, long
byteptr
initprimes0(ulong maxnum, long *lenp, long *lastp)
{
- long k, size, alloced, asize, psize, rootnum, curlow, last, remains, need;
+ long k, size, alloced, asize, psize, rootnum, curlow, last, remains;
byteptr q,r,s,fin, p, p1, fin1, plast, curdiff;
#if 0
@@ -134,12 +134,25 @@ initprimes0(ulong maxnum, long *lenp, lo
fin1 = p1 + psize - 1;
remains = (maxnum - rootnum) >> 1; /* number of odd numbers to check */
- need = 100 * rootnum; /* Make % overhead negligeable. */
- if (need < PRIME_ARENA) need = PRIME_ARENA;
- if (avma - bot < need>>1) { /* need to do our own allocation */
- alloced = 1; asize = need;
+ /* ARENA_IN_ROOTS below 12: some slowdown starts to be noticable
+ * when things fit into the cache.
+ * XXX The choice of 10 gives a slowdown of 1-2% on UltraSparcII,
+ * but makes calculations even for (the current) maximum of 436273009
+ * fit into 256K cache (still common for some architectures).
+ *
+ * One may change it when small caches become uncommon, but the win
+ * is not going to be very noticable...
+ */
+#ifndef ARENA_IN_ROOTS
+# define ARENA_IN_ROOTS 10
+#endif /* ARENA_IN_ROOTS */
+
+ asize = ARENA_IN_ROOTS * rootnum; /* Make % overhead negligeable. */
+ if (asize < PRIME_ARENA) asize = PRIME_ARENA;
+ if (avma - bot < asize>>1) { /* need to do our own allocation */
+ alloced = 1;
} else { /* scratch area is free part of PARI stack */
- alloced = 0; asize = avma - bot;
+ alloced = 0;
}
if (asize > remains) asize = remains + 1;/* + room for a sentinel byte */
if (alloced)