Subversion Repositories HelenOS-historic

Rev

Rev 828 | Rev 1031 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 828 Rev 829
1
/*
1
/*
2
 * Copyright (C) 2005 Josef Cejka
2
 * Copyright (C) 2005 Josef Cejka
3
 * All rights reserved.
3
 * All rights reserved.
4
 *
4
 *
5
 * Redistribution and use in source and binary forms, with or without
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
6
 * modification, are permitted provided that the following conditions
7
 * are met:
7
 * are met:
8
 *
8
 *
9
 * - Redistributions of source code must retain the above copyright
9
 * - Redistributions of source code must retain the above copyright
10
 *   notice, this list of conditions and the following disclaimer.
10
 *   notice, this list of conditions and the following disclaimer.
11
 * - Redistributions in binary form must reproduce the above copyright
11
 * - Redistributions in binary form must reproduce the above copyright
12
 *   notice, this list of conditions and the following disclaimer in the
12
 *   notice, this list of conditions and the following disclaimer in the
13
 *   documentation and/or other materials provided with the distribution.
13
 *   documentation and/or other materials provided with the distribution.
14
 * - The name of the author may not be used to endorse or promote products
14
 * - The name of the author may not be used to endorse or promote products
15
 *   derived from this software without specific prior written permission.
15
 *   derived from this software without specific prior written permission.
16
 *
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 */
27
 */
28
 
28
 
29
#include<sftypes.h>
29
#include<sftypes.h>
30
#include<mul.h>
30
#include<mul.h>
31
#include<comparison.h>
31
#include<comparison.h>
-
 
32
#include<common.h>
32
 
33
 
33
/** Multiply two 32 bit float numbers
34
/** Multiply two 32 bit float numbers
34
 *
35
 *
35
 */
36
 */
36
float32 mulFloat32(float32 a, float32 b)
37
float32 mulFloat32(float32 a, float32 b)
37
{
38
{
38
    float32 result;
39
    float32 result;
39
    __u64 frac1, frac2;
40
    __u64 frac1, frac2;
40
    __s32 exp;
41
    __s32 exp;
41
 
42
 
42
    result.parts.sign = a.parts.sign ^ b.parts.sign;
43
    result.parts.sign = a.parts.sign ^ b.parts.sign;
43
   
44
   
44
    if (isFloat32NaN(a) || isFloat32NaN(b) ) {
45
    if (isFloat32NaN(a) || isFloat32NaN(b) ) {
45
        /* TODO: fix SigNaNs */
46
        /* TODO: fix SigNaNs */
46
        if (isFloat32SigNaN(a)) {
47
        if (isFloat32SigNaN(a)) {
47
            result.parts.fraction = a.parts.fraction;
48
            result.parts.fraction = a.parts.fraction;
48
            result.parts.exp = a.parts.exp;
49
            result.parts.exp = a.parts.exp;
49
            return result;
50
            return result;
50
        };
51
        };
51
        if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */
52
        if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */
52
            result.parts.fraction = b.parts.fraction;
53
            result.parts.fraction = b.parts.fraction;
53
            result.parts.exp = b.parts.exp;
54
            result.parts.exp = b.parts.exp;
54
            return result;
55
            return result;
55
        };
56
        };
56
        /* set NaN as result */
57
        /* set NaN as result */
57
        result.binary = FLOAT32_NAN;
58
        result.binary = FLOAT32_NAN;
58
        return result;
59
        return result;
59
    };
60
    };
60
       
61
       
61
    if (isFloat32Infinity(a)) {
62
    if (isFloat32Infinity(a)) {
62
        if (isFloat32Zero(b)) {
63
        if (isFloat32Zero(b)) {
63
            /* FIXME: zero * infinity */
64
            /* FIXME: zero * infinity */
64
            result.binary = FLOAT32_NAN;
65
            result.binary = FLOAT32_NAN;
65
            return result;
66
            return result;
66
        }
67
        }
67
        result.parts.fraction = a.parts.fraction;
68
        result.parts.fraction = a.parts.fraction;
68
        result.parts.exp = a.parts.exp;
69
        result.parts.exp = a.parts.exp;
69
        return result;
70
        return result;
70
    }
71
    }
71
 
72
 
72
    if (isFloat32Infinity(b)) {
73
    if (isFloat32Infinity(b)) {
73
        if (isFloat32Zero(a)) {
74
        if (isFloat32Zero(a)) {
74
            /* FIXME: zero * infinity */
75
            /* FIXME: zero * infinity */
75
            result.binary = FLOAT32_NAN;
76
            result.binary = FLOAT32_NAN;
76
            return result;
77
            return result;
77
        }
78
        }
78
        result.parts.fraction = b.parts.fraction;
79
        result.parts.fraction = b.parts.fraction;
79
        result.parts.exp = b.parts.exp;
80
        result.parts.exp = b.parts.exp;
80
        return result;
81
        return result;
81
    }
82
    }
82
 
83
 
83
    /* exp is signed so we can easy detect underflow */
84
    /* exp is signed so we can easy detect underflow */
84
    exp = a.parts.exp + b.parts.exp;
85
    exp = a.parts.exp + b.parts.exp;
85
    exp -= FLOAT32_BIAS;
86
    exp -= FLOAT32_BIAS;
86
   
87
   
87
    if (exp >= FLOAT32_MAX_EXPONENT) {
88
    if (exp >= FLOAT32_MAX_EXPONENT) {
88
        /* FIXME: overflow */
89
        /* FIXME: overflow */
89
        /* set infinity as result */
90
        /* set infinity as result */
90
        result.binary = FLOAT32_INF;
91
        result.binary = FLOAT32_INF;
91
        result.parts.sign = a.parts.sign ^ b.parts.sign;
92
        result.parts.sign = a.parts.sign ^ b.parts.sign;
92
        return result;
93
        return result;
93
    };
94
    };
94
   
95
   
95
    if (exp < 0) {
96
    if (exp < 0) {
96
        /* FIXME: underflow */
97
        /* FIXME: underflow */
97
        /* return signed zero */
98
        /* return signed zero */
98
        result.parts.fraction = 0x0;
99
        result.parts.fraction = 0x0;
99
        result.parts.exp = 0x0;
100
        result.parts.exp = 0x0;
100
        return result;
101
        return result;
101
    };
102
    };
102
   
103
   
103
    frac1 = a.parts.fraction;
104
    frac1 = a.parts.fraction;
104
    if (a.parts.exp > 0) {
105
    if (a.parts.exp > 0) {
105
        frac1 |= FLOAT32_HIDDEN_BIT_MASK;
106
        frac1 |= FLOAT32_HIDDEN_BIT_MASK;
106
    } else {
107
    } else {
107
        ++exp;
108
        ++exp;
108
    };
109
    };
109
   
110
   
110
    frac2 = b.parts.fraction;
111
    frac2 = b.parts.fraction;
111
 
112
 
112
    if (b.parts.exp > 0) {
113
    if (b.parts.exp > 0) {
113
        frac2 |= FLOAT32_HIDDEN_BIT_MASK;
114
        frac2 |= FLOAT32_HIDDEN_BIT_MASK;
114
    } else {
115
    } else {
115
        ++exp;
116
        ++exp;
116
    };
117
    };
117
 
118
 
118
    frac1 <<= 1; /* one bit space for rounding */
119
    frac1 <<= 1; /* one bit space for rounding */
119
 
120
 
120
    frac1 = frac1 * frac2;
121
    frac1 = frac1 * frac2;
121
/* round and return */
122
/* round and return */
122
   
123
   
123
    while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 1 << (FLOAT32_FRACTION_SIZE + 2)))) {
124
    while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 1 << (FLOAT32_FRACTION_SIZE + 2)))) {
124
        /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left)*/
125
        /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left)*/
125
        ++exp;
126
        ++exp;
126
        frac1 >>= 1;
127
        frac1 >>= 1;
127
    };
128
    };
128
 
129
 
129
    /* rounding */
130
    /* rounding */
130
    /* ++frac1; FIXME: not works - without it is ok */
131
    /* ++frac1; FIXME: not works - without it is ok */
131
    frac1 >>= 1; /* shift off rounding space */
132
    frac1 >>= 1; /* shift off rounding space */
132
   
133
   
133
    if ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 1)))) {
134
    if ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 1)))) {
134
        ++exp;
135
        ++exp;
135
        frac1 >>= 1;
136
        frac1 >>= 1;
136
    };
137
    };
137
 
138
 
138
    if (exp >= FLOAT32_MAX_EXPONENT ) {
139
    if (exp >= FLOAT32_MAX_EXPONENT ) {
139
        /* TODO: fix overflow */
140
        /* TODO: fix overflow */
140
        /* return infinity*/
141
        /* return infinity*/
141
        result.parts.exp = FLOAT32_MAX_EXPONENT;
142
        result.parts.exp = FLOAT32_MAX_EXPONENT;
142
        result.parts.fraction = 0x0;
143
        result.parts.fraction = 0x0;
143
        return result;
144
        return result;
144
    }
145
    }
145
   
146
   
146
    exp -= FLOAT32_FRACTION_SIZE;
147
    exp -= FLOAT32_FRACTION_SIZE;
147
 
148
 
148
    if (exp <= FLOAT32_FRACTION_SIZE) {
149
    if (exp <= FLOAT32_FRACTION_SIZE) {
149
        /* denormalized number */
150
        /* denormalized number */
150
        frac1 >>= 1; /* denormalize */
151
        frac1 >>= 1; /* denormalize */
151
        while ((frac1 > 0) && (exp < 0)) {
152
        while ((frac1 > 0) && (exp < 0)) {
152
            frac1 >>= 1;
153
            frac1 >>= 1;
153
            ++exp;
154
            ++exp;
154
        };
155
        };
155
        if (frac1 == 0) {
156
        if (frac1 == 0) {
156
            /* FIXME : underflow */
157
            /* FIXME : underflow */
157
        result.parts.exp = 0;
158
        result.parts.exp = 0;
158
        result.parts.fraction = 0;
159
        result.parts.fraction = 0;
159
        return result;
160
        return result;
160
        };
161
        };
161
    };
162
    };
162
    result.parts.exp = exp;
163
    result.parts.exp = exp;
163
    result.parts.fraction = frac1 & ( (1 << FLOAT32_FRACTION_SIZE) - 1);
164
    result.parts.fraction = frac1 & ( (1 << FLOAT32_FRACTION_SIZE) - 1);
164
   
165
   
165
    return result; 
166
    return result; 
166
   
167
   
167
}
168
}
168
 
169
 
169
/** Multiply two 64 bit float numbers
170
/** Multiply two 64 bit float numbers
170
 *
171
 *
171
 */
172
 */
172
float64 mulFloat64(float64 a, float64 b)
173
float64 mulFloat64(float64 a, float64 b)
173
{
174
{
174
    float64 result;
175
    float64 result;
175
    __u64 frac1, frac2;
176
    __u64 frac1, frac2;
176
    __s32 exp;
177
    __s32 exp;
177
 
178
 
178
    result.parts.sign = a.parts.sign ^ b.parts.sign;
179
    result.parts.sign = a.parts.sign ^ b.parts.sign;
179
   
180
   
180
    if (isFloat64NaN(a) || isFloat64NaN(b) ) {
181
    if (isFloat64NaN(a) || isFloat64NaN(b) ) {
181
        /* TODO: fix SigNaNs */
182
        /* TODO: fix SigNaNs */
182
        if (isFloat64SigNaN(a)) {
183
        if (isFloat64SigNaN(a)) {
183
            result.parts.fraction = a.parts.fraction;
184
            result.parts.fraction = a.parts.fraction;
184
            result.parts.exp = a.parts.exp;
185
            result.parts.exp = a.parts.exp;
185
            return result;
186
            return result;
186
        };
187
        };
187
        if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */
188
        if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */
188
            result.parts.fraction = b.parts.fraction;
189
            result.parts.fraction = b.parts.fraction;
189
            result.parts.exp = b.parts.exp;
190
            result.parts.exp = b.parts.exp;
190
            return result;
191
            return result;
191
        };
192
        };
192
        /* set NaN as result */
193
        /* set NaN as result */
193
        result.binary = FLOAT64_NAN;
194
        result.binary = FLOAT64_NAN;
194
        return result;
195
        return result;
195
    };
196
    };
196
       
197
       
197
    if (isFloat64Infinity(a)) {
198
    if (isFloat64Infinity(a)) {
198
        if (isFloat64Zero(b)) {
199
        if (isFloat64Zero(b)) {
199
            /* FIXME: zero * infinity */
200
            /* FIXME: zero * infinity */
200
            result.binary = FLOAT64_NAN;
201
            result.binary = FLOAT64_NAN;
201
            return result;
202
            return result;
202
        }
203
        }
203
        result.parts.fraction = a.parts.fraction;
204
        result.parts.fraction = a.parts.fraction;
204
        result.parts.exp = a.parts.exp;
205
        result.parts.exp = a.parts.exp;
205
        return result;
206
        return result;
206
    }
207
    }
207
 
208
 
208
    if (isFloat64Infinity(b)) {
209
    if (isFloat64Infinity(b)) {
209
        if (isFloat64Zero(a)) {
210
        if (isFloat64Zero(a)) {
210
            /* FIXME: zero * infinity */
211
            /* FIXME: zero * infinity */
211
            result.binary = FLOAT64_NAN;
212
            result.binary = FLOAT64_NAN;
212
            return result;
213
            return result;
213
        }
214
        }
214
        result.parts.fraction = b.parts.fraction;
215
        result.parts.fraction = b.parts.fraction;
215
        result.parts.exp = b.parts.exp;
216
        result.parts.exp = b.parts.exp;
216
        return result;
217
        return result;
217
    }
218
    }
218
 
219
 
219
    /* exp is signed so we can easy detect underflow */
220
    /* exp is signed so we can easy detect underflow */
220
    exp = a.parts.exp + b.parts.exp;
221
    exp = a.parts.exp + b.parts.exp - FLOAT64_BIAS;
221
    exp -= FLOAT64_BIAS;
-
 
222
   
-
 
223
    if (exp >= FLOAT64_MAX_EXPONENT) {
-
 
224
        /* FIXME: overflow */
-
 
225
        /* set infinity as result */
-
 
226
        result.binary = FLOAT64_INF;
-
 
227
        result.parts.sign = a.parts.sign ^ b.parts.sign;
-
 
228
        return result;
-
 
229
    };
-
 
230
   
-
 
231
    if (exp < 0) {
-
 
232
        /* FIXME: underflow */
-
 
233
        /* return signed zero */
-
 
234
        result.parts.fraction = 0x0;
-
 
235
        result.parts.exp = 0x0;
-
 
236
        return result;
-
 
237
    };
-
 
238
   
222
   
239
    frac1 = a.parts.fraction;
223
    frac1 = a.parts.fraction;
-
 
224
 
240
    if (a.parts.exp > 0) {
225
    if (a.parts.exp > 0) {
241
        frac1 |= FLOAT64_HIDDEN_BIT_MASK;
226
        frac1 |= FLOAT64_HIDDEN_BIT_MASK;
242
    } else {
227
    } else {
243
        ++exp;
228
        ++exp;
244
    };
229
    };
245
   
230
   
246
    frac2 = b.parts.fraction;
231
    frac2 = b.parts.fraction;
247
 
232
 
248
    if (b.parts.exp > 0) {
233
    if (b.parts.exp > 0) {
249
        frac2 |= FLOAT64_HIDDEN_BIT_MASK;
234
        frac2 |= FLOAT64_HIDDEN_BIT_MASK;
250
    } else {
235
    } else {
251
        ++exp;
236
        ++exp;
252
    };
237
    };
253
 
238
 
254
    frac1 <<= 1; /* one bit space for rounding */
239
    frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1);
-
 
240
    frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2);
255
 
241
 
256
    mul64integers(frac1, frac2, &frac1, &frac2);
242
    mul64integers(frac1, frac2, &frac1, &frac2);
257
 
243
 
258
/* round and return */
-
 
259
    /* FIXME: ugly soulution is to shift whole frac2 >> as in 32bit version
-
 
260
     * Here is is more slower because we have to shift two numbers with carry
-
 
261
     * Better is find first nonzero bit and make only one shift
-
 
262
     * Third version is to shift both numbers a bit to right and result will be then
-
 
263
     * placed in higher part of result. Then lower part will be good only for rounding.
-
 
264
     */
-
 
265
   
-
 
266
    while ((exp < FLOAT64_MAX_EXPONENT) && (frac2 > 0 )) {
-
 
267
        frac1 >>= 1;
244
    frac2 |= (frac1 != 0);
268
        frac1 &= ((frac2 & 0x1) << 63);
245
    if (frac2 & (0x1ll << 62)) {
269
        frac2 >>= 1;
246
        frac2 <<= 1;
270
        ++exp;
-
 
271
    }
-
 
272
   
-
 
273
    while ((exp < FLOAT64_MAX_EXPONENT) && (frac1 >= ( (__u64)1 << (FLOAT64_FRACTION_SIZE + 2)))) {
-
 
274
        ++exp;
-
 
275
        frac1 >>= 1;
-
 
276
    };
-
 
277
 
-
 
278
    /* rounding */
-
 
279
    /* ++frac1;  FIXME: not works - without it is ok */
-
 
280
    frac1 >>= 1; /* shift off rounding space */
-
 
281
   
-
 
282
    if ((exp < FLOAT64_MAX_EXPONENT) && (frac1 >= ((__u64)1 << (FLOAT64_FRACTION_SIZE + 1)))) {
-
 
283
        ++exp;
247
        exp--;
284
        frac1 >>= 1;
-
 
285
    };
-
 
286
 
-
 
287
    if (exp >= FLOAT64_MAX_EXPONENT ) {
-
 
288
        /* TODO: fix overflow */
-
 
289
        /* return infinity*/
-
 
290
        result.parts.exp = FLOAT64_MAX_EXPONENT;
-
 
291
        result.parts.fraction = 0x0;
-
 
292
        return result;
-
 
293
    }
248
    }
294
   
-
 
295
    exp -= FLOAT64_FRACTION_SIZE;
-
 
296
 
249
 
297
    if (exp <= FLOAT64_FRACTION_SIZE) {
-
 
298
        /* denormalized number */
-
 
299
        frac1 >>= 1; /* denormalize */
-
 
300
        while ((frac1 > 0) && (exp < 0)) {
-
 
301
            frac1 >>= 1;
-
 
302
            ++exp;
-
 
303
        };
-
 
304
        if (frac1 == 0) {
-
 
305
            /* FIXME : underflow */
-
 
306
        result.parts.exp = 0;
-
 
307
        result.parts.fraction = 0;
-
 
308
        return result;
-
 
309
        };
-
 
310
    };
-
 
311
    result.parts.exp = exp;
-
 
312
    result.parts.fraction = frac1 & ( ((__u64)1 << FLOAT64_FRACTION_SIZE) - 1);
250
    result = finishFloat64(exp, frac2, result.parts.sign);
313
   
-
 
314
    return result; 
251
    return result;
315
   
-
 
316
}
252
}
317
 
253
 
318
/** Multiply two 64 bit numbers and return result in two parts
254
/** Multiply two 64 bit numbers and return result in two parts
319
 * @param a first operand
255
 * @param a first operand
320
 * @param b second operand
256
 * @param b second operand
321
 * @param lo lower part from result
257
 * @param lo lower part from result
322
 * @param hi higher part of result
258
 * @param hi higher part of result
323
 */
259
 */
324
void mul64integers(__u64 a,__u64 b, __u64 *lo, __u64 *hi)
260
void mul64integers(__u64 a,__u64 b, __u64 *lo, __u64 *hi)
325
{
261
{
326
    __u64 low, high, middle1, middle2;
262
    __u64 low, high, middle1, middle2;
327
    __u32 alow, blow;
263
    __u32 alow, blow;
328
   
264
 
329
    alow = a & 0xFFFFFFFF;
265
    alow = a & 0xFFFFFFFF;
330
    blow = b & 0xFFFFFFFF;
266
    blow = b & 0xFFFFFFFF;
331
   
267
   
332
    a >>= 32;
268
    a >>= 32;
333
    b >>= 32;
269
    b >>= 32;
334
   
270
   
335
    low = ((__u64)alow) * blow;
271
    low = ((__u64)alow) * blow;
336
    middle1 = a * blow;
272
    middle1 = a * blow;
337
    middle2 = alow * b;
273
    middle2 = alow * b;
338
    high = a * b;
274
    high = a * b;
339
 
275
 
340
    middle1 += middle2;
276
    middle1 += middle2;
341
    high += (((__u64)(middle1 < middle2)) << 32) + (middle1 >> 32);
277
    high += (((__u64)(middle1 < middle2)) << 32) + (middle1 >> 32);
342
    middle1 <<= 32;
278
    middle1 <<= 32;
343
    low += middle1;
279
    low += middle1;
344
    high += (low < middle1);
280
    high += (low < middle1);
345
    *lo = low;
281
    *lo = low;
346
    *hi = high;
282
    *hi = high;
347
   
283
   
348
    return;
284
    return;
349
}
285
}
350
 
286
 
351
 
287
 
352
 
288