From 7570df267ff2772e1223c214a9bbfab073b4fde0 Mon Sep 17 00:00:00 2001 From: Erik van der Kouwe Date: Mon, 17 May 2010 16:44:26 +0000 Subject: [PATCH] Full 64-bit multitplication and division added to u64 library --- include/minix/u64.h | 5 + lib/libc/arch/i386/int64/Makefile.inc | 3 + lib/libc/arch/i386/int64/bsr64.S | 17 ++ lib/libc/arch/i386/int64/div64.c | 87 ++++++++ lib/libc/arch/i386/int64/div64u.S | 17 +- lib/libc/arch/i386/int64/mul64.c | 20 ++ man/man3/int64.3 | 36 +++- test/Makefile | 2 +- test/run | 2 +- test/test53.c | 292 ++++++++++++++++++++++++++ 10 files changed, 477 insertions(+), 4 deletions(-) create mode 100644 lib/libc/arch/i386/int64/bsr64.S create mode 100644 lib/libc/arch/i386/int64/div64.c create mode 100644 lib/libc/arch/i386/int64/mul64.c create mode 100644 test/test53.c diff --git a/include/minix/u64.h b/include/minix/u64.h index d1e6dcce8..98fb5d451 100644 --- a/include/minix/u64.h +++ b/include/minix/u64.h @@ -15,13 +15,18 @@ u64_t add64ul(u64_t i, unsigned long j); u64_t sub64(u64_t i, u64_t j); u64_t sub64u(u64_t i, unsigned j); u64_t sub64ul(u64_t i, unsigned long j); +int bsr64(u64_t i); unsigned diff64(u64_t i, u64_t j); u64_t cvu64(unsigned i); u64_t cvul64(unsigned long i); unsigned cv64u(u64_t i); unsigned long cv64ul(u64_t i); +u64_t div64(u64_t i, u64_t j); unsigned long div64u(u64_t i, unsigned j); +u64_t div64u64(u64_t i, unsigned j); +u64_t rem64(u64_t i, u64_t j); unsigned rem64u(u64_t i, unsigned j); +u64_t mul64(u64_t i, u64_t j); u64_t mul64u(unsigned long i, unsigned j); int cmp64(u64_t i, u64_t j); int cmp64u(u64_t i, unsigned j); diff --git a/lib/libc/arch/i386/int64/Makefile.inc b/lib/libc/arch/i386/int64/Makefile.inc index 60e5a7248..b3d7fa5ba 100644 --- a/lib/libc/arch/i386/int64/Makefile.inc +++ b/lib/libc/arch/i386/int64/Makefile.inc @@ -4,13 +4,16 @@ SRCS+= \ add64.S \ add64u.S \ + bsr64.S \ cmp64.S \ cv64u.S \ cvu64.S \ diff64.S \ + div64.c \ div64u.S \ ex64.S \ make64.S \ + mul64.c \ mul64u.S \ sub64.S \ sub64u.S diff --git a/lib/libc/arch/i386/int64/bsr64.S b/lib/libc/arch/i386/int64/bsr64.S new file mode 100644 index 000000000..84a00d470 --- /dev/null +++ b/lib/libc/arch/i386/int64/bsr64.S @@ -0,0 +1,17 @@ +/* bsr64() - 64 bit bit scan reverse Author: Erik van der Kouwe */ +/* 15 May 2010 */ +#include + +.text +.globl _bsr64 + +_bsr64: +/* int bsr64(u64_t i); */ + bsr 8(%esp), %eax /* check high-order DWORD */ + jnz 0f /* non-zero: return index+32 */ + bsr 4(%esp), %eax /* check low-order DWORD */ + jnz 1f /* non-zero: return index */ + movl $-1, %eax /* both were zero, return -1 */ + jmp 1f +0: addl $32, %eax /* add 32 to high-order index */ +1: ret diff --git a/lib/libc/arch/i386/int64/div64.c b/lib/libc/arch/i386/int64/div64.c new file mode 100644 index 000000000..4602e7c5b --- /dev/null +++ b/lib/libc/arch/i386/int64/div64.c @@ -0,0 +1,87 @@ +/* div64() - full 64-bit division */ +/* rem64() - full 64-bit modulo */ +/* Author: Erik van der Kouwe */ +/* 14 May 2010 */ +#include +#include + +static u32_t shl64hi(u64_t i, unsigned shift) +{ + /* compute the high-order 32-bit value in (i << shift) */ + if (shift == 0) + return i.hi; + else if (shift < 32) + return (i.hi << shift) | (i.lo >> (32 - shift)); + else if (shift == 32) + return i.lo; + else if (shift < 64) + return i.lo << (shift - 32); + else + return 0; +} + +static u64_t divrem64(u64_t *i, u64_t j) +{ + u32_t i32, j32, q; + u64_t result = { 0, 0 }; + unsigned shift; + + assert(i); + + /* this function is not suitable for small divisors */ + assert(ex64hi(j) != 0); + + /* as long as i >= j we work on reducing i */ + while (cmp64(*i, j) >= 0) { + /* shift to obtain the 32 most significant bits */ + shift = 63 - bsr64(*i); + i32 = shl64hi(*i, shift); + j32 = shl64hi(j, shift); + + /* find a lower bound for *i/j */ + if (j32 + 1 < j32) { + /* avoid overflow, since *i >= j we know q >= 1 */ + q = 1; + } else { + /* use 32-bit division, round j32 up to ensure that + * we obtain a lower bound + */ + q = i32 / (j32 + 1); + + /* since *i >= j we know q >= 1 */ + if (q < 1) q = 1; + } + + /* perform the division using the lower bound we found */ + *i = sub64(*i, mul64(j, cvu64(q))); + result = add64u(result, q); + } + + /* if we get here then *i < j; because we round down we are finished */ + return result; +} + +u64_t div64(u64_t i, u64_t j) +{ + /* divrem64 is unsuitable for small divisors, especially zero which would + * trigger a infinite loop; use assembly function in this case + */ + if (!ex64hi(j)) { + return div64u64(i, ex64lo(j)); + } + + return divrem64(&i, j); +} + +u64_t rem64(u64_t i, u64_t j) +{ + /* divrem64 is unsuitable for small divisors, especially zero which would + * trigger a infinite loop; use assembly function in this case + */ + if (!ex64hi(j)) { + return cvu64(rem64u(i, ex64lo(j))); + } + + divrem64(&i, j); + return i; +} diff --git a/lib/libc/arch/i386/int64/div64u.S b/lib/libc/arch/i386/int64/div64u.S index c66979480..164166f8f 100644 --- a/lib/libc/arch/i386/int64/div64u.S +++ b/lib/libc/arch/i386/int64/div64u.S @@ -1,8 +1,10 @@ /* div64u() - 64 bit divided by unsigned giving unsigned long */ /* Author: Kees J. Bot */ /* 7 Dec 1995 */ +#include + .text -.globl _div64u, _rem64u +.globl _div64u, _div64u64, _rem64u _div64u: /* unsigned long div64u(u64_t i, unsigned j); */ @@ -13,6 +15,19 @@ _div64u: divl 12(%esp) /* i / j = (q<<32) + ((r<<32) + il) / j */ ret +_div64u64: +/* u64_t div64u64(u64_t i, unsigned j); */ + xorl %edx, %edx + movl 12(%esp), %eax /* i = (ih<<32) + il */ + divl 16(%esp) /* ih = q * j + r */ + movl 4(%esp), %ecx /* get pointer to result */ + movl %eax, 4(%ecx) /* store high-order result */ + movl 8(%esp), %eax + divl 16(%esp) /* i / j = (q<<32) + ((r<<32) + il) / j */ + movl %eax, 0(%ecx) /* store low result */ + movl %ecx, %eax /* return pointer to result struct */ + ret BYTES_TO_POP_ON_STRUCT_RETURN + _rem64u: /* unsigned rem64u(u64_t i, unsigned j); */ pop %ecx diff --git a/lib/libc/arch/i386/int64/mul64.c b/lib/libc/arch/i386/int64/mul64.c new file mode 100644 index 000000000..d3c12aafd --- /dev/null +++ b/lib/libc/arch/i386/int64/mul64.c @@ -0,0 +1,20 @@ +#include + +u64_t mul64(u64_t i, u64_t j) +{ + u64_t result; + + /* Compute as follows: + * i * j = + * (i.hi << 32 + i.lo) * (j.hi << 32 + j.lo) = + * (i.hi << 32) * (j.hi << 32 + j.lo) + i.lo * (j.hi << 32 + j.lo) = + * (i.hi * j.hi) << 64 + (i.hi * j.lo) << 32 + (i.lo * j.hi << 32) + i.lo * j.lo + * + * 64-bit-result multiply only needed for (i.lo * j.lo) + * upper 32 bits overflow for (i.lo * j.hi) and (i.hi * j.lo) + * all overflows for (i.hi * j.hi) + */ + result = mul64u(i.lo, j.lo); + result.hi += i.hi * j.lo + i.lo * j.hi; + return result; +} diff --git a/man/man3/int64.3 b/man/man3/int64.3 index 9bc04ab83..ad19b0c5d 100644 --- a/man/man3/int64.3 +++ b/man/man3/int64.3 @@ -1,6 +1,6 @@ .TH INT64 3 .SH NAME -int64, add64, add64u, add64ul, sub64, sub64u, sub64ul, diff64, cvu64, cvul64, cv64u, cv64ul, div64u, rem64u, mul64u, cmp64, cmp64u, cmp64ul, ex64lo, ex64hi, make64 \- 64 bit disk offset computations +int64, add64, add64u, add64ul, sub64, sub64u, sub64ul, diff64, bsr64, cvu64, cvul64, cv64u, cv64ul, div64, div64u, div64u64, rem64, rem64u, mul64, mul64u, cmp64, cmp64u, cmp64ul, ex64lo, ex64hi, make64 \- 64 bit disk offset computations .SH SYNOPSIS .ft B .nf @@ -13,12 +13,17 @@ u64_t sub64(u64_t \fIi\fP, u64_t \fIj\fP) u64_t sub64u(u64_t \fIi\fP, unsigned \fIj\fP) u64_t sub64ul(u64_t \fIi\fP, unsigned long \fIj\fP) unsigned diff64(u64_t \fIi\fP, u64_t \fIj\fP) +int bsr64(u64_t \fIi\fP) u64_t cvu64(unsigned \fIi\fP) u64_t cvul64(unsigned long \fIi\fP) unsigned cv64u(u64_t \fIi\fP) unsigned long cv64ul(u64_t \fIi\fP) +u64_t div64(u64_t \fIi\fP, u64_t \fIj\fP) unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP) +u64_t div64u64(u64_t \fIi\fP, unsigned \fIj\fP) +u64_t rem64(u64_t \fIi\fP, u64_t \fIj\fP) unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP) +u64_t mul64(u64_t \fIi\fP, u64_t \fIj\fP) u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP) int cmp64(u64_t \fIi\fP, u64_t \fIj\fP) int cmp64u(u64_t \fIi\fP, unsigned \fIj\fP) @@ -95,6 +100,9 @@ from the 64 bit number .I i forming an unsigned. Overflow is not checked. .TP +.B "int bsr64(u64_t \fIi\fP)" +Return the index of the highest-order bit set. If the value is zero, -1 is returned. +.TP .B "u64_t cvu64(unsigned \fIi\fP)" Convert an unsigned to a 64 bit number. .TP @@ -109,6 +117,12 @@ Convert a 64 bit number to an unsigned if it fits, otherwise return Convert a 64 bit number to an unsigned long if it fits, otherwise return .BR ULONG_MAX . .TP +.B "u64_t div64(u64_t \fIi\fP, u64_t \fIj\fP)" +Divide the 64 bit number +.I i +by the 64 bit number +.I j +giving a 64 bit number. .B "unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP)" Divide the 64 bit number .I i @@ -116,6 +130,19 @@ by the unsigned .I j giving an unsigned long. Overflow is not checked. (Typical "byte offset to block number" conversion.) +.B "u64_t div64u64(u64_t \fIi\fP, unsigned \fIj\fP)" +Divide the 64 bit number +.I i +by the unsigned +.I j +giving a 64 bit number. +.TP +.B "u64_t rem64(u64_t \fIi\fP, u64_t \fIj\fP)" +Compute the remainder of the division of the 64 bit number +.I i +by the 64 bit number +.I j +as a 64 bit number. .TP .B "unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP)" Compute the remainder of the division of the 64 bit number @@ -124,6 +151,13 @@ by the unsigned .I j as an unsigned. (Typical "byte offset within a block" computation.) .TP +.B "u64_t mul64(u64_t \fIi\fP, u64_t \fIj\fP)" +Multiply the 64 bit number +.I i +by the 64 bit number +.I j +giving a 64 bit number. +.TP .B "u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP)" Multiply the unsigned long .I i diff --git a/test/Makefile b/test/Makefile index 1b22b3563..86bd4fcdf 100644 --- a/test/Makefile +++ b/test/Makefile @@ -12,7 +12,7 @@ OBJ= test1 test2 test3 test4 test5 test6 test7 test8 test9 \ test21 test22 test23 test25 test26 test27 test28 test29 \ test30 test31 test32 test34 test35 test36 test37 test38 \ test39 t10a t11a t11b test40 t40a t40b t40c t40d t40e t40f test41 \ - test42 test44 test45 test47 test48 test49 test50 test51 test52 + test42 test44 test45 test47 test48 test49 test50 test51 test52 test53 BIGOBJ= test20 test24 ROOTOBJ= test11 test33 test43 test46 diff --git a/test/run b/test/run index 270a2a150..6b0d9bd4d 100755 --- a/test/run +++ b/test/run @@ -14,7 +14,7 @@ badones= # list of tests that failed tests=" 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \ 41 42 43 44 45 45-gcc 46 47 48 49 49-gcc 50 \ - 51 51-gcc 52 52-gcc \ + 51 51-gcc 52 52-gcc 53 \ sh1.sh sh2.sh" tests_no=`expr 0` diff --git a/test/test53.c b/test/test53.c new file mode 100644 index 000000000..e2d57b5de --- /dev/null +++ b/test/test53.c @@ -0,0 +1,292 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define ERR err(__LINE__) +#define MAX_ERROR 4 +#define TIMED 0 + +static volatile int errct; +static volatile expect_SIGFPE; +static u64_t i, j, k; +static jmp_buf jmpbuf_SIGFPE, jmpbuf_main; + +static void quit(void) +{ + if (errct == 0) { + printf("ok\n"); + exit(0); + } else { + printf("%d errors\n", errct); + exit(1); + } + assert(0); /* not reachable */ +} + +static void err(int line) +{ + /* print error information */ + printf("error line %d; i=0x%.8x%.8x; j=0x%.8x%.8x; k=0x%.8x%.8x\n", + line, + ex64hi(i), ex64lo(i), + ex64hi(j), ex64lo(j), + ex64hi(k), ex64lo(k)); + + /* quit after too many errors */ + if (errct++ > MAX_ERROR) { + printf("Too many errors; test aborted\n"); + quit(); + } +} + +#define LENGTHOF(arr) (sizeof(arr) / sizeof(arr[0])) + +static u64_t getargval(int index, int *done) +{ + u32_t values[] = { + /* corner cases */ + 0, + 1, + 0x7fffffff, + 0x80000000, + 0x80000001, + 0xffffffff, + /* random values */ + 0xa9, + 0x0d88, + 0x242811, + 0xeb44d1bc, + 0x5b, + 0xfb50, + 0x569c02, + 0xb23c8f7d, + 0xc3, + 0x2366, + 0xfabb73, + 0xcb4e8aef, + 0xe9, + 0xffdc, + 0x05842d, + 0x3fff902d}; + + assert(done); + + /* values with corner case and random 32-bit components */ + if (index < LENGTHOF(values) * LENGTHOF(values)) + return make64(values[index / LENGTHOF(values)], values[index % LENGTHOF(values)]); + + index -= LENGTHOF(values) * LENGTHOF(values); + + /* small numbers */ + if (index < 16) return make64(index + 2, 0); + index -= 16; + + /* big numbers */ + if (index < 16) return make64(-index - 2, -1); + index -= 16; + + /* powers of two */ + if (index < 14) return make64(1 << (index * 2 + 5), 0); + index -= 14; + if (index < 16) return make64(0, 1 << (index * 2 + 1)); + index -= 16; + + /* done */ + *done = 1; + return make64(0, 0); +} + +static void handler_SIGFPE(int signum) +{ + assert(signum == SIGFPE); + + /* restore the signal handler */ + if (signal(SIGFPE, handler_SIGFPE) == SIG_ERR) ERR; + + /* division by zero occurred, was this expected? */ + if (expect_SIGFPE) { + /* expected: jump back to test */ + expect_SIGFPE = 0; + longjmp(jmpbuf_SIGFPE, -1); + } else { + /* not expected: error and jump back to main */ + longjmp(jmpbuf_main, -1); + } + + /* not reachable */ + assert(0); + exit(-1); +} + +static void testmul(void) +{ + int kdone, kidx; + u32_t ilo = ex64lo(i), jlo = ex64lo(j); + u64_t prod = mul64(i, j); + int prodbits; + + /* compute maximum index of highest-order bit */ + prodbits = bsr64(i) + bsr64(j) + 1; + if (cmp64u(i, 0) == 0 || cmp64u(j, 0) == 0) prodbits = -1; + if (bsr64(prod) > prodbits) ERR; + + /* compare to 32-bit multiplication if possible */ + if (ex64hi(i) == 0 && ex64hi(j) == 0) { + if (cmp64(prod, mul64u(ilo, jlo)) != 0) ERR; + + /* if there is no overflow we can check against pure 32-bit */ + if (prodbits < 32 && cmp64u(prod, ilo * jlo) != 0) ERR; + } + + /* in 32-bit arith low-order DWORD matches regardless of overflow */ + if (ex64lo(prod) != ilo * jlo) ERR; + + /* multiplication by zero yields zero */ + if (prodbits < 0 && cmp64u(prod, 0) != 0) ERR; + + /* if there is no overflow, check absence of zero divisors */ + if (prodbits >= 0 && prodbits < 64 && cmp64u(prod, 0) == 0) ERR; + + /* commutativity */ + if (cmp64(prod, mul64(j, i)) != 0) ERR; + + /* loop though all argument value combinations for third argument */ + for (kdone = 0, kidx = 0; k = getargval(kidx, &kdone), !kdone; kidx++) { + /* associativity */ + if (cmp64(mul64(mul64(i, j), k), mul64(i, mul64(j, k))) != 0) ERR; + + /* left and right distributivity */ + if (cmp64(mul64(add64(i, j), k), add64(mul64(i, k), mul64(j, k))) != 0) ERR; + if (cmp64(mul64(i, add64(j, k)), add64(mul64(i, j), mul64(i, k))) != 0) ERR; + } +} + +static void testdiv0(void) +{ + int funcidx; + + assert(cmp64u(j, 0) == 0); + + /* loop through the 5 different division functions */ + for (funcidx = 0; funcidx < 5; funcidx++) { + expect_SIGFPE = 1; + if (setjmp(jmpbuf_SIGFPE) == 0) { + /* divide by zero using various functions */ + switch (funcidx) { + case 0: div64(i, j); ERR; break; + case 1: div64u64(i, ex64lo(j)); ERR; break; + case 2: div64u(i, ex64lo(j)); ERR; break; + case 3: rem64(i, j); ERR; break; + case 4: rem64u(i, ex64lo(j)); ERR; break; + default: assert(0); ERR; break; + } + + /* if we reach this point there was no signal and an + * error has been recorded + */ + expect_SIGFPE = 0; + } else { + /* a signal has been received and expect_SIGFPE has + * been reset; all is ok now + */ + assert(!expect_SIGFPE); + } + } +} + +static void testdiv(void) +{ + u64_t q, r; +#if TIMED + struct timeval tvstart, tvend; + + printf("i=0x%.8x%.8x; j=0x%.8x%.8x\n", + ex64hi(i), ex64lo(i), + ex64hi(j), ex64lo(j)); + fflush(stdout); + if (gettimeofday(&tvstart, NULL) < 0) ERR; +#endif + + /* division by zero has a separate test */ + if (cmp64u(j, 0) == 0) { + testdiv0(); + return; + } + + /* perform division, store q in k to make ERR more informative */ + q = div64(i, j); + r = rem64(i, j); + k = q; + +#if TIMED + if (gettimeofday(&tvend, NULL) < 0) ERR; + tvend.tv_sec -= tvstart.tv_sec; + tvend.tv_usec -= tvstart.tv_usec; + if (tvend.tv_usec < 0) { + tvend.tv_sec -= 1; + tvend.tv_usec += 1000000; + } + printf("q=0x%.8x%.8x; r=0x%.8x%.8x; time=%d.%.6d\n", + ex64hi(q), ex64lo(q), + ex64hi(r), ex64lo(r), + tvend.tv_sec, tvend.tv_usec); + fflush(stdout); +#endif + + /* compare to 64/32-bit division if possible */ + if (!ex64hi(j)) { + if (cmp64(q, div64u64(i, ex64lo(j))) != 0) ERR; + if (!ex64hi(q)) { + if (cmp64u(q, div64u(i, ex64lo(j))) != 0) ERR; + } + if (cmp64u(r, rem64u(i, ex64lo(j))) != 0) ERR; + + /* compare to 32-bit division if possible */ + if (!ex64hi(i)) { + if (cmp64u(q, ex64lo(i) / ex64lo(j)) != 0) ERR; + if (cmp64u(r, ex64lo(i) % ex64lo(j)) != 0) ERR; + } + } + + /* check results using i = q j + r and r < j */ + if (cmp64(i, add64(mul64(q, j), r)) != 0) ERR; + if (cmp64(r, j) >= 0) ERR; +} + +static void test(void) +{ + int idone, jdone, iidx, jidx; + + /* loop though all argument value combinations */ + for (idone = 0, iidx = 0; i = getargval(iidx, &idone), !idone; iidx++) + for (jdone = 0, jidx = 0; j = getargval(jidx, &jdone), !jdone; jidx++) { + testmul(); + testdiv(); + } +} + +int main(void) +{ + printf("Test 53 "); + + /* set up signal handler to deal with div by zero */ + if (setjmp(jmpbuf_main) == 0) { + if (signal(SIGFPE, handler_SIGFPE) == SIG_ERR) ERR; + + /* perform tests */ + test(); + } else { + /* an unexpected SIGFPE has occurred */ + ERR; + } + + /* this was all */ + quit(); + assert(0); /* not reachable */ + return -1; +} -- 2.44.0