/uspace/trunk/softfloat/generic/div.c |
---|
31,7 → 31,9 |
#include<div.h> |
#include<comparison.h> |
#include<mul.h> |
#include<common.h> |
float32 divFloat32(float32 a, float32 b) |
{ |
float32 result; |
306,57 → 308,12 |
cfrac |= ( remlo != 0 ); |
} |
/* pack and round */ |
/* find first nonzero digit and shift result and detect possibly underflow */ |
while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) { |
cexp--; |
cfrac <<= 1; |
/* TODO: fix underflow */ |
}; |
cfrac >>= 1; |
++cexp; |
cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3)); |
if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) { |
++cexp; |
cfrac >>= 1; |
} |
/* check overflow */ |
if (cexp >= FLOAT64_MAX_EXPONENT ) { |
/* FIXME: overflow, return infinity */ |
result.parts.exp = FLOAT64_MAX_EXPONENT; |
result.parts.fraction = 0; |
/* round and shift */ |
result = finishFloat64(cexp, cfrac, result.parts.sign); |
return result; |
} |
if (cexp < 0) { |
/* FIXME: underflow */ |
result.parts.exp = 0; |
if ((cexp + FLOAT64_FRACTION_SIZE) < 0) { |
result.parts.fraction = 0; |
return result; |
} |
cfrac >>= 1; |
while (cexp < 0) { |
cexp ++; |
cfrac >>= 1; |
} |
return result; |
} else { |
cexp ++; /*normalized*/ |
result.parts.exp = (__u32)cexp; |
} |
result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK)); |
return result; |
} |
__u64 divFloat64estim(__u64 a, __u64 b) |
{ |
__u64 bhi; |
/uspace/trunk/softfloat/generic/common.c |
---|
0,0 → 1,89 |
/* |
* Copyright (C) 2005 Josef Cejka |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
#include<sftypes.h> |
#include<common.h> |
/** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions |
* @param exp exponent with bias |
* @param cfrac fraction shifted 10 places left with added hidden bit |
* @return valied float64 |
*/ |
float64 finishFloat64(__s32 cexp, __u64 cfrac, char sign) |
{ |
float64 result; |
result.parts.sign = sign; |
/* find first nonzero digit and shift result and detect possibly underflow */ |
while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) { |
cexp--; |
cfrac <<= 1; |
/* TODO: fix underflow */ |
}; |
cfrac >>= 1; |
++cexp; |
cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3)); |
if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) { |
++cexp; |
cfrac >>= 1; |
} |
/* check overflow */ |
if (cexp >= FLOAT64_MAX_EXPONENT ) { |
/* FIXME: overflow, return infinity */ |
result.parts.exp = FLOAT64_MAX_EXPONENT; |
result.parts.fraction = 0; |
return result; |
} |
if (cexp < 0) { |
/* FIXME: underflow */ |
result.parts.exp = 0; |
if ((cexp + FLOAT64_FRACTION_SIZE) < 0) { |
result.parts.fraction = 0; |
return result; |
} |
cfrac >>= 1; |
while (cexp < 0) { |
cexp ++; |
cfrac >>= 1; |
} |
return result; |
} else { |
cexp ++; /*normalized*/ |
result.parts.exp = (__u32)cexp; |
} |
result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK)); |
return result; |
} |
/uspace/trunk/softfloat/generic/mul.c |
---|
29,6 → 29,7 |
#include<sftypes.h> |
#include<mul.h> |
#include<comparison.h> |
#include<common.h> |
/** Multiply two 32 bit float numbers |
* |
217,26 → 218,10 |
} |
/* exp is signed so we can easy detect underflow */ |
exp = a.parts.exp + b.parts.exp; |
exp -= FLOAT64_BIAS; |
exp = a.parts.exp + b.parts.exp - FLOAT64_BIAS; |
if (exp >= FLOAT64_MAX_EXPONENT) { |
/* FIXME: overflow */ |
/* set infinity as result */ |
result.binary = FLOAT64_INF; |
result.parts.sign = a.parts.sign ^ b.parts.sign; |
return result; |
}; |
frac1 = a.parts.fraction; |
if (exp < 0) { |
/* FIXME: underflow */ |
/* return signed zero */ |
result.parts.fraction = 0x0; |
result.parts.exp = 0x0; |
return result; |
}; |
frac1 = a.parts.fraction; |
if (a.parts.exp > 0) { |
frac1 |= FLOAT64_HIDDEN_BIT_MASK; |
} else { |
251,70 → 236,21 |
++exp; |
}; |
frac1 <<= 1; /* one bit space for rounding */ |
frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1); |
frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2); |
mul64integers(frac1, frac2, &frac1, &frac2); |
/* round and return */ |
/* FIXME: ugly soulution is to shift whole frac2 >> as in 32bit version |
* Here is is more slower because we have to shift two numbers with carry |
* Better is find first nonzero bit and make only one shift |
* Third version is to shift both numbers a bit to right and result will be then |
* placed in higher part of result. Then lower part will be good only for rounding. |
*/ |
while ((exp < FLOAT64_MAX_EXPONENT) && (frac2 > 0 )) { |
frac1 >>= 1; |
frac1 &= ((frac2 & 0x1) << 63); |
frac2 >>= 1; |
++exp; |
frac2 |= (frac1 != 0); |
if (frac2 & (0x1ll << 62)) { |
frac2 <<= 1; |
exp--; |
} |
while ((exp < FLOAT64_MAX_EXPONENT) && (frac1 >= ( (__u64)1 << (FLOAT64_FRACTION_SIZE + 2)))) { |
++exp; |
frac1 >>= 1; |
}; |
/* rounding */ |
/* ++frac1; FIXME: not works - without it is ok */ |
frac1 >>= 1; /* shift off rounding space */ |
if ((exp < FLOAT64_MAX_EXPONENT) && (frac1 >= ((__u64)1 << (FLOAT64_FRACTION_SIZE + 1)))) { |
++exp; |
frac1 >>= 1; |
}; |
if (exp >= FLOAT64_MAX_EXPONENT ) { |
/* TODO: fix overflow */ |
/* return infinity*/ |
result.parts.exp = FLOAT64_MAX_EXPONENT; |
result.parts.fraction = 0x0; |
result = finishFloat64(exp, frac2, result.parts.sign); |
return result; |
} |
exp -= FLOAT64_FRACTION_SIZE; |
if (exp <= FLOAT64_FRACTION_SIZE) { |
/* denormalized number */ |
frac1 >>= 1; /* denormalize */ |
while ((frac1 > 0) && (exp < 0)) { |
frac1 >>= 1; |
++exp; |
}; |
if (frac1 == 0) { |
/* FIXME : underflow */ |
result.parts.exp = 0; |
result.parts.fraction = 0; |
return result; |
}; |
}; |
result.parts.exp = exp; |
result.parts.fraction = frac1 & ( ((__u64)1 << FLOAT64_FRACTION_SIZE) - 1); |
return result; |
} |
/** Multiply two 64 bit numbers and return result in two parts |
* @param a first operand |
* @param b second operand |