Subversion Repositories HelenOS-historic

Rev

Rev 829 | Rev 1657 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
731 cejka 1
/*
2
 * Copyright (C) 2005 Josef Cejka
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 *
9
 * - Redistributions of source code must retain the above copyright
10
 *   notice, this list of conditions and the following disclaimer.
11
 * - Redistributions in binary form must reproduce the above copyright
12
 *   notice, this list of conditions and the following disclaimer in the
13
 *   documentation and/or other materials provided with the distribution.
14
 * - The name of the author may not be used to endorse or promote products
15
 *   derived from this software without specific prior written permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
 */
28
 
29
#include<sftypes.h>
30
#include<mul.h>
31
#include<comparison.h>
829 cejka 32
#include<common.h>
731 cejka 33
 
34
/** Multiply two 32 bit float numbers
35
 *
36
 */
37
float32 mulFloat32(float32 a, float32 b)
38
{
39
    float32 result;
1031 cejka 40
    uint64_t frac1, frac2;
41
    int32_t exp;
731 cejka 42
 
43
    result.parts.sign = a.parts.sign ^ b.parts.sign;
44
 
737 cejka 45
    if (isFloat32NaN(a) || isFloat32NaN(b) ) {
731 cejka 46
        /* TODO: fix SigNaNs */
47
        if (isFloat32SigNaN(a)) {
804 cejka 48
            result.parts.fraction = a.parts.fraction;
731 cejka 49
            result.parts.exp = a.parts.exp;
50
            return result;
51
        };
52
        if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */
804 cejka 53
            result.parts.fraction = b.parts.fraction;
731 cejka 54
            result.parts.exp = b.parts.exp;
55
            return result;
56
        };
57
        /* set NaN as result */
737 cejka 58
        result.binary = FLOAT32_NAN;
731 cejka 59
        return result;
60
    };
61
 
62
    if (isFloat32Infinity(a)) {
63
        if (isFloat32Zero(b)) {
64
            /* FIXME: zero * infinity */
737 cejka 65
            result.binary = FLOAT32_NAN;
731 cejka 66
            return result;
67
        }
804 cejka 68
        result.parts.fraction = a.parts.fraction;
731 cejka 69
        result.parts.exp = a.parts.exp;
70
        return result;
71
    }
72
 
73
    if (isFloat32Infinity(b)) {
74
        if (isFloat32Zero(a)) {
75
            /* FIXME: zero * infinity */
737 cejka 76
            result.binary = FLOAT32_NAN;
731 cejka 77
            return result;
78
        }
804 cejka 79
        result.parts.fraction = b.parts.fraction;
731 cejka 80
        result.parts.exp = b.parts.exp;
81
        return result;
82
    }
83
 
84
    /* exp is signed so we can easy detect underflow */
85
    exp = a.parts.exp + b.parts.exp;
86
    exp -= FLOAT32_BIAS;
87
 
737 cejka 88
    if (exp >= FLOAT32_MAX_EXPONENT) {
731 cejka 89
        /* FIXME: overflow */
90
        /* set infinity as result */
737 cejka 91
        result.binary = FLOAT32_INF;
92
        result.parts.sign = a.parts.sign ^ b.parts.sign;
731 cejka 93
        return result;
94
    };
95
 
96
    if (exp < 0) {
97
        /* FIXME: underflow */
98
        /* return signed zero */
804 cejka 99
        result.parts.fraction = 0x0;
731 cejka 100
        result.parts.exp = 0x0;
101
        return result;
102
    };
103
 
804 cejka 104
    frac1 = a.parts.fraction;
737 cejka 105
    if (a.parts.exp > 0) {
804 cejka 106
        frac1 |= FLOAT32_HIDDEN_BIT_MASK;
731 cejka 107
    } else {
108
        ++exp;
109
    };
110
 
804 cejka 111
    frac2 = b.parts.fraction;
737 cejka 112
 
113
    if (b.parts.exp > 0) {
804 cejka 114
        frac2 |= FLOAT32_HIDDEN_BIT_MASK;
731 cejka 115
    } else {
116
        ++exp;
117
    };
118
 
804 cejka 119
    frac1 <<= 1; /* one bit space for rounding */
731 cejka 120
 
804 cejka 121
    frac1 = frac1 * frac2;
731 cejka 122
/* round and return */
123
 
804 cejka 124
    while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 1 << (FLOAT32_FRACTION_SIZE + 2)))) {
125
        /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left)*/
731 cejka 126
        ++exp;
804 cejka 127
        frac1 >>= 1;
731 cejka 128
    };
129
 
130
    /* rounding */
804 cejka 131
    /* ++frac1; FIXME: not works - without it is ok */
132
    frac1 >>= 1; /* shift off rounding space */
731 cejka 133
 
804 cejka 134
    if ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 1)))) {
731 cejka 135
        ++exp;
804 cejka 136
        frac1 >>= 1;
731 cejka 137
    };
138
 
737 cejka 139
    if (exp >= FLOAT32_MAX_EXPONENT ) {
731 cejka 140
        /* TODO: fix overflow */
141
        /* return infinity*/
737 cejka 142
        result.parts.exp = FLOAT32_MAX_EXPONENT;
804 cejka 143
        result.parts.fraction = 0x0;
731 cejka 144
        return result;
145
    }
146
 
804 cejka 147
    exp -= FLOAT32_FRACTION_SIZE;
731 cejka 148
 
804 cejka 149
    if (exp <= FLOAT32_FRACTION_SIZE) {
731 cejka 150
        /* denormalized number */
804 cejka 151
        frac1 >>= 1; /* denormalize */
152
        while ((frac1 > 0) && (exp < 0)) {
153
            frac1 >>= 1;
731 cejka 154
            ++exp;
155
        };
804 cejka 156
        if (frac1 == 0) {
731 cejka 157
            /* FIXME : underflow */
158
        result.parts.exp = 0;
804 cejka 159
        result.parts.fraction = 0;
731 cejka 160
        return result;
161
        };
162
    };
163
    result.parts.exp = exp;
804 cejka 164
    result.parts.fraction = frac1 & ( (1 << FLOAT32_FRACTION_SIZE) - 1);
731 cejka 165
 
166
    return result; 
167
 
168
}
169
 
737 cejka 170
/** Multiply two 64 bit float numbers
171
 *
172
 */
173
float64 mulFloat64(float64 a, float64 b)
174
{
175
    float64 result;
1031 cejka 176
    uint64_t frac1, frac2;
177
    int32_t exp;
731 cejka 178
 
737 cejka 179
    result.parts.sign = a.parts.sign ^ b.parts.sign;
180
 
181
    if (isFloat64NaN(a) || isFloat64NaN(b) ) {
182
        /* TODO: fix SigNaNs */
183
        if (isFloat64SigNaN(a)) {
804 cejka 184
            result.parts.fraction = a.parts.fraction;
737 cejka 185
            result.parts.exp = a.parts.exp;
186
            return result;
187
        };
188
        if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */
804 cejka 189
            result.parts.fraction = b.parts.fraction;
737 cejka 190
            result.parts.exp = b.parts.exp;
191
            return result;
192
        };
193
        /* set NaN as result */
194
        result.binary = FLOAT64_NAN;
195
        return result;
196
    };
197
 
198
    if (isFloat64Infinity(a)) {
199
        if (isFloat64Zero(b)) {
200
            /* FIXME: zero * infinity */
201
            result.binary = FLOAT64_NAN;
202
            return result;
203
        }
804 cejka 204
        result.parts.fraction = a.parts.fraction;
737 cejka 205
        result.parts.exp = a.parts.exp;
206
        return result;
207
    }
731 cejka 208
 
737 cejka 209
    if (isFloat64Infinity(b)) {
210
        if (isFloat64Zero(a)) {
211
            /* FIXME: zero * infinity */
212
            result.binary = FLOAT64_NAN;
213
            return result;
214
        }
804 cejka 215
        result.parts.fraction = b.parts.fraction;
737 cejka 216
        result.parts.exp = b.parts.exp;
217
        return result;
218
    }
219
 
220
    /* exp is signed so we can easy detect underflow */
829 cejka 221
    exp = a.parts.exp + b.parts.exp - FLOAT64_BIAS;
737 cejka 222
 
804 cejka 223
    frac1 = a.parts.fraction;
829 cejka 224
 
737 cejka 225
    if (a.parts.exp > 0) {
804 cejka 226
        frac1 |= FLOAT64_HIDDEN_BIT_MASK;
737 cejka 227
    } else {
228
        ++exp;
229
    };
230
 
804 cejka 231
    frac2 = b.parts.fraction;
737 cejka 232
 
233
    if (b.parts.exp > 0) {
804 cejka 234
        frac2 |= FLOAT64_HIDDEN_BIT_MASK;
737 cejka 235
    } else {
236
        ++exp;
237
    };
238
 
829 cejka 239
    frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1);
240
    frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2);
737 cejka 241
 
804 cejka 242
    mul64integers(frac1, frac2, &frac1, &frac2);
737 cejka 243
 
829 cejka 244
    frac2 |= (frac1 != 0);
245
    if (frac2 & (0x1ll << 62)) {
246
        frac2 <<= 1;
247
        exp--;
737 cejka 248
    }
249
 
829 cejka 250
    result = finishFloat64(exp, frac2, result.parts.sign);
251
    return result;
737 cejka 252
}
253
 
254
/** Multiply two 64 bit numbers and return result in two parts
255
 * @param a first operand
256
 * @param b second operand
257
 * @param lo lower part from result
258
 * @param hi higher part of result
259
 */
1031 cejka 260
void mul64integers(uint64_t a,uint64_t b, uint64_t *lo, uint64_t *hi)
737 cejka 261
{
1031 cejka 262
    uint64_t low, high, middle1, middle2;
263
    uint32_t alow, blow;
829 cejka 264
 
737 cejka 265
    alow = a & 0xFFFFFFFF;
266
    blow = b & 0xFFFFFFFF;
267
 
828 cejka 268
    a >>= 32;
269
    b >>= 32;
737 cejka 270
 
1031 cejka 271
    low = ((uint64_t)alow) * blow;
737 cejka 272
    middle1 = a * blow;
273
    middle2 = alow * b;
274
    high = a * b;
275
 
276
    middle1 += middle2;
1031 cejka 277
    high += (((uint64_t)(middle1 < middle2)) << 32) + (middle1 >> 32);
804 cejka 278
    middle1 <<= 32;
737 cejka 279
    low += middle1;
280
    high += (low < middle1);
281
    *lo = low;
282
    *hi = high;
828 cejka 283
 
737 cejka 284
    return;
285
}
286
 
287