Rev 828 | Rev 1031 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 828 | Rev 829 | ||
---|---|---|---|
Line 27... | Line 27... | ||
27 | */ |
27 | */ |
28 | 28 | ||
29 | #include<sftypes.h> |
29 | #include<sftypes.h> |
30 | #include<mul.h> |
30 | #include<mul.h> |
31 | #include<comparison.h> |
31 | #include<comparison.h> |
- | 32 | #include<common.h> |
|
32 | 33 | ||
33 | /** Multiply two 32 bit float numbers |
34 | /** Multiply two 32 bit float numbers |
34 | * |
35 | * |
35 | */ |
36 | */ |
36 | float32 mulFloat32(float32 a, float32 b) |
37 | float32 mulFloat32(float32 a, float32 b) |
Line 215... | Line 216... | ||
215 | result.parts.exp = b.parts.exp; |
216 | result.parts.exp = b.parts.exp; |
216 | return result; |
217 | return result; |
217 | } |
218 | } |
218 | 219 | ||
219 | /* exp is signed so we can easy detect underflow */ |
220 | /* exp is signed so we can easy detect underflow */ |
220 | exp = a.parts.exp + b.parts.exp; |
221 | exp = a.parts.exp + b.parts.exp - FLOAT64_BIAS; |
221 | exp -= FLOAT64_BIAS; |
- | |
222 | - | ||
223 | if (exp >= FLOAT64_MAX_EXPONENT) { |
- | |
224 | /* FIXME: overflow */ |
- | |
225 | /* set infinity as result */ |
- | |
226 | result.binary = FLOAT64_INF; |
- | |
227 | result.parts.sign = a.parts.sign ^ b.parts.sign; |
- | |
228 | return result; |
- | |
229 | }; |
- | |
230 | - | ||
231 | if (exp < 0) { |
- | |
232 | /* FIXME: underflow */ |
- | |
233 | /* return signed zero */ |
- | |
234 | result.parts.fraction = 0x0; |
- | |
235 | result.parts.exp = 0x0; |
- | |
236 | return result; |
- | |
237 | }; |
- | |
238 | 222 | ||
239 | frac1 = a.parts.fraction; |
223 | frac1 = a.parts.fraction; |
- | 224 | ||
240 | if (a.parts.exp > 0) { |
225 | if (a.parts.exp > 0) { |
241 | frac1 |= FLOAT64_HIDDEN_BIT_MASK; |
226 | frac1 |= FLOAT64_HIDDEN_BIT_MASK; |
242 | } else { |
227 | } else { |
243 | ++exp; |
228 | ++exp; |
244 | }; |
229 | }; |
Line 249... | Line 234... | ||
249 | frac2 |= FLOAT64_HIDDEN_BIT_MASK; |
234 | frac2 |= FLOAT64_HIDDEN_BIT_MASK; |
250 | } else { |
235 | } else { |
251 | ++exp; |
236 | ++exp; |
252 | }; |
237 | }; |
253 | 238 | ||
254 | frac1 <<= 1; /* one bit space for rounding */ |
239 | frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1); |
- | 240 | frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2); |
|
255 | 241 | ||
256 | mul64integers(frac1, frac2, &frac1, &frac2); |
242 | mul64integers(frac1, frac2, &frac1, &frac2); |
257 | 243 | ||
258 | /* round and return */ |
- | |
259 | /* FIXME: ugly soulution is to shift whole frac2 >> as in 32bit version |
- | |
260 | * Here is is more slower because we have to shift two numbers with carry |
- | |
261 | * Better is find first nonzero bit and make only one shift |
- | |
262 | * Third version is to shift both numbers a bit to right and result will be then |
- | |
263 | * placed in higher part of result. Then lower part will be good only for rounding. |
- | |
264 | */ |
- | |
265 | - | ||
266 | while ((exp < FLOAT64_MAX_EXPONENT) && (frac2 > 0 )) { |
- | |
267 | frac1 >>= 1; |
244 | frac2 |= (frac1 != 0); |
268 | frac1 &= ((frac2 & 0x1) << 63); |
245 | if (frac2 & (0x1ll << 62)) { |
269 | frac2 >>= 1; |
246 | frac2 <<= 1; |
270 | ++exp; |
- | |
271 | } |
- | |
272 | - | ||
273 | while ((exp < FLOAT64_MAX_EXPONENT) && (frac1 >= ( (__u64)1 << (FLOAT64_FRACTION_SIZE + 2)))) { |
- | |
274 | ++exp; |
- | |
275 | frac1 >>= 1; |
- | |
276 | }; |
- | |
277 | - | ||
278 | /* rounding */ |
- | |
279 | /* ++frac1; FIXME: not works - without it is ok */ |
- | |
280 | frac1 >>= 1; /* shift off rounding space */ |
- | |
281 | - | ||
282 | if ((exp < FLOAT64_MAX_EXPONENT) && (frac1 >= ((__u64)1 << (FLOAT64_FRACTION_SIZE + 1)))) { |
- | |
283 | ++exp; |
247 | exp--; |
284 | frac1 >>= 1; |
- | |
285 | }; |
- | |
286 | - | ||
287 | if (exp >= FLOAT64_MAX_EXPONENT ) { |
- | |
288 | /* TODO: fix overflow */ |
- | |
289 | /* return infinity*/ |
- | |
290 | result.parts.exp = FLOAT64_MAX_EXPONENT; |
- | |
291 | result.parts.fraction = 0x0; |
- | |
292 | return result; |
- | |
293 | } |
248 | } |
294 | - | ||
295 | exp -= FLOAT64_FRACTION_SIZE; |
- | |
296 | 249 | ||
297 | if (exp <= FLOAT64_FRACTION_SIZE) { |
- | |
298 | /* denormalized number */ |
- | |
299 | frac1 >>= 1; /* denormalize */ |
- | |
300 | while ((frac1 > 0) && (exp < 0)) { |
- | |
301 | frac1 >>= 1; |
- | |
302 | ++exp; |
- | |
303 | }; |
- | |
304 | if (frac1 == 0) { |
- | |
305 | /* FIXME : underflow */ |
- | |
306 | result.parts.exp = 0; |
- | |
307 | result.parts.fraction = 0; |
- | |
308 | return result; |
- | |
309 | }; |
- | |
310 | }; |
- | |
311 | result.parts.exp = exp; |
- | |
312 | result.parts.fraction = frac1 & ( ((__u64)1 << FLOAT64_FRACTION_SIZE) - 1); |
250 | result = finishFloat64(exp, frac2, result.parts.sign); |
313 | - | ||
314 | return result; |
251 | return result; |
315 | - | ||
316 | } |
252 | } |
317 | 253 | ||
318 | /** Multiply two 64 bit numbers and return result in two parts |
254 | /** Multiply two 64 bit numbers and return result in two parts |
319 | * @param a first operand |
255 | * @param a first operand |
320 | * @param b second operand |
256 | * @param b second operand |
Line 323... | Line 259... | ||
323 | */ |
259 | */ |
324 | void mul64integers(__u64 a,__u64 b, __u64 *lo, __u64 *hi) |
260 | void mul64integers(__u64 a,__u64 b, __u64 *lo, __u64 *hi) |
325 | { |
261 | { |
326 | __u64 low, high, middle1, middle2; |
262 | __u64 low, high, middle1, middle2; |
327 | __u32 alow, blow; |
263 | __u32 alow, blow; |
328 | 264 | ||
329 | alow = a & 0xFFFFFFFF; |
265 | alow = a & 0xFFFFFFFF; |
330 | blow = b & 0xFFFFFFFF; |
266 | blow = b & 0xFFFFFFFF; |
331 | 267 | ||
332 | a >>= 32; |
268 | a >>= 32; |
333 | b >>= 32; |
269 | b >>= 32; |