arch/m68k/math-emu/fp_util.S

   1 /*
   2  * fp_util.S
   3  *
   4  * Copyright Roman Zippel, 1997.  All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, and the entire permission notice in its entirety,
  11  *    including the disclaimer of warranties.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. The name of the author may not be used to endorse or promote
  16  *    products derived from this software without specific prior
  17  *    written permission.
  18  *
  19  * ALTERNATIVELY, this product may be distributed under the terms of
  20  * the GNU General Public License, in which case the provisions of the GPL are
  21  * required INSTEAD OF the above restrictions.  (This clause is
  22  * necessary due to a potential bad interaction between the GPL and
  23  * the restrictions contained in a BSD-style copyright.)
  24  *
  25  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  26  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  28  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  29  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  30  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  31  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  35  * OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 #include <linux/config.h>
  39 #include "fp_emu.h"
  40
  41 /*
  42  * Here are lots of conversion and normalization functions mainly
  43  * used by fp_scan.S
  44  * Note that these functions are optimized for "normal" numbers,
  45  * these are handled first and exit as fast as possible, this is
  46  * especially important for fp_normalize_ext/fp_conv_ext2ext, as
  47  * it's called very often.
  48  * The register usage is optimized for fp_scan.S and which register
  49  * is currently at that time unused, be careful if you want change
  50  * something here. %d0 and %d1 is always usable, sometimes %d2 (or
  51  * only the lower half) most function have to return the %a0
  52  * unmodified, so that the caller can immediately reuse it.
  53  */
  54
  55         .globl  fp_ill, fp_end
  56
  57         | exits from fp_scan:
  58         | illegal instruction
  59 fp_ill:
  60         printf  ,"fp_illegal\n"
  61         rts
  62         | completed instruction
  63 fp_end:
  64         tst.l   (TASK_MM-8,%a2)
  65         jmi     1f
  66         tst.l   (TASK_MM-4,%a2)
  67         jmi     1f
  68         tst.l   (TASK_MM,%a2)
  69         jpl     2f
  70 1:      printf  ,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
  71 2:      clr.l   %d0
  72         rts
  73
  74         .globl  fp_conv_long2ext, fp_conv_single2ext
  75         .globl  fp_conv_double2ext, fp_conv_ext2ext
  76         .globl  fp_normalize_ext, fp_normalize_double
  77         .globl  fp_normalize_single, fp_normalize_single_fast
  78         .globl  fp_conv_ext2double, fp_conv_ext2single
  79         .globl  fp_conv_ext2long, fp_conv_ext2short
  80         .globl  fp_conv_ext2byte
  81         .globl  fp_finalrounding_single, fp_finalrounding_single_fast
  82         .globl  fp_finalrounding_double
  83         .globl  fp_finalrounding, fp_finaltest, fp_final
  84
  85 /*
  86  * First several conversion functions from a source operand
  87  * into the extended format. Note, that only fp_conv_ext2ext
  88  * normalizes the number and is always called after the other
  89  * conversion functions, which only move the information into
  90  * fp_ext structure.
  91  */
  92
  93         | fp_conv_long2ext:
  94         |
  95         | args: %d0 = source (32-bit long)
  96         |       %a0 = destination (ptr to struct fp_ext)
  97
  98 fp_conv_long2ext:
  99         printf  PCONV,"l2e: %p -> %p(",2,%d0,%a0
 100         clr.l   %d1                     | sign defaults to zero
 101         tst.l   %d0
 102         jeq     fp_l2e_zero             | is source zero?
 103         jpl     1f                      | positive?
 104         moveq   #1,%d1
 105         neg.l   %d0
 106 1:      swap    %d1
 107         move.w  #0x3fff+31,%d1
 108         move.l  %d1,(%a0)+              | set sign / exp
 109         move.l  %d0,(%a0)+              | set mantissa
 110         clr.l   (%a0)
 111         subq.l  #8,%a0                  | restore %a0
 112         printx  PCONV,%a0@
 113         printf  PCONV,")\n"
 114         rts
 115         | source is zero
 116 fp_l2e_zero:
 117         clr.l   (%a0)+
 118         clr.l   (%a0)+
 119         clr.l   (%a0)
 120         subq.l  #8,%a0
 121         printx  PCONV,%a0@
 122         printf  PCONV,")\n"
 123         rts
 124
 125         | fp_conv_single2ext
 126         | args: %d0 = source (single-precision fp value)
 127         |       %a0 = dest (struct fp_ext *)
 128
 129 fp_conv_single2ext:
 130         printf  PCONV,"s2e: %p -> %p(",2,%d0,%a0
 131         move.l  %d0,%d1
 132         lsl.l   #8,%d0                  | shift mantissa
 133         lsr.l   #8,%d1                  | exponent / sign
 134         lsr.l   #7,%d1
 135         lsr.w   #8,%d1
 136         jeq     fp_s2e_small            | zero / denormal?
 137         cmp.w   #0xff,%d1               | NaN / Inf?
 138         jeq     fp_s2e_large
 139         bset    #31,%d0                 | set explizit bit
 140         add.w   #0x3fff-0x7f,%d1        | re-bias the exponent.
 141 9:      move.l  %d1,(%a0)+              | fp_ext.sign, fp_ext.exp
 142         move.l  %d0,(%a0)+              | high lword of fp_ext.mant
 143         clr.l   (%a0)                   | low lword = 0
 144         subq.l  #8,%a0
 145         printx  PCONV,%a0@
 146         printf  PCONV,")\n"
 147         rts
 148         | zeros and denormalized
 149 fp_s2e_small:
 150         | exponent is zero, so explizit bit is already zero too
 151         tst.l   %d0
 152         jeq     9b
 153         move.w  #0x4000-0x7f,%d1
 154         jra     9b
 155         | infinities and NAN
 156 fp_s2e_large:
 157         bclr    #31,%d0                 | clear explizit bit
 158         move.w  #0x7fff,%d1
 159         jra     9b
 160
 161 fp_conv_double2ext:
 162 #ifdef FPU_EMU_DEBUG
 163         getuser.l %a1@(0),%d0,fp_err_ua2,%a1
 164         getuser.l %a1@(4),%d1,fp_err_ua2,%a1
 165         printf  PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
 166 #endif
 167         getuser.l (%a1)+,%d0,fp_err_ua2,%a1
 168         move.l  %d0,%d1
 169         lsl.l   #8,%d0                  | shift high mantissa
 170         lsl.l   #3,%d0
 171         lsr.l   #8,%d1                  | exponent / sign
 172         lsr.l   #7,%d1
 173         lsr.w   #5,%d1
 174         jeq     fp_d2e_small            | zero / denormal?
 175         cmp.w   #0x7ff,%d1              | NaN / Inf?
 176         jeq     fp_d2e_large
 177         bset    #31,%d0                 | set explizit bit
 178         add.w   #0x3fff-0x3ff,%d1       | re-bias the exponent.
 179 9:      move.l  %d1,(%a0)+              | fp_ext.sign, fp_ext.exp
 180         move.l  %d0,(%a0)+
 181         getuser.l (%a1)+,%d0,fp_err_ua2,%a1
 182         move.l  %d0,%d1
 183         lsl.l   #8,%d0
 184         lsl.l   #3,%d0
 185         move.l  %d0,(%a0)
 186         moveq   #21,%d0
 187         lsr.l   %d0,%d1
 188         or.l    %d1,-(%a0)
 189         subq.l  #4,%a0
 190         printx  PCONV,%a0@
 191         printf  PCONV,")\n"
 192         rts
 193         | zeros and denormalized
 194 fp_d2e_small:
 195         | exponent is zero, so explizit bit is already zero too
 196         tst.l   %d0
 197         jeq     9b
 198         move.w  #0x4000-0x3ff,%d1
 199         jra     9b
 200         | infinities and NAN
 201 fp_d2e_large:
 202         bclr    #31,%d0                 | clear explizit bit
 203         move.w  #0x7fff,%d1
 204         jra     9b
 205
 206         | fp_conv_ext2ext:
 207         | originally used to get longdouble from userspace, now it's
 208         | called before arithmetic operations to make sure the number
 209         | is normalized [maybe rename it?].
 210         | args: %a0 = dest (struct fp_ext *)
 211         | returns 0 in %d0 for a NaN, otherwise 1
 212
 213 fp_conv_ext2ext:
 214         printf  PCONV,"e2e: %p(",1,%a0
 215         printx  PCONV,%a0@
 216         printf  PCONV,"), "
 217         move.l  (%a0)+,%d0
 218         cmp.w   #0x7fff,%d0             | Inf / NaN?
 219         jeq     fp_e2e_large
 220         move.l  (%a0),%d0
 221         jpl     fp_e2e_small            | zero / denorm?
 222         | The high bit is set, so normalization is irrelevant.
 223 fp_e2e_checkround:
 224         subq.l  #4,%a0
 225 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 226         move.b  (%a0),%d0
 227         jne     fp_e2e_round
 228 #endif
 229         printf  PCONV,"%p(",1,%a0
 230         printx  PCONV,%a0@
 231         printf  PCONV,")\n"
 232         moveq   #1,%d0
 233         rts
 234 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 235 fp_e2e_round:
 236         fp_set_sr FPSR_EXC_INEX2
 237         clr.b   (%a0)
 238         move.w  (FPD_RND,FPDATA),%d2
 239         jne     fp_e2e_roundother       | %d2 == 0, round to nearest
 240         tst.b   %d0                     | test guard bit
 241         jpl     9f                      | zero is closer
 242         btst    #0,(11,%a0)             | test lsb bit
 243         jne     fp_e2e_doroundup        | round to infinity
 244         lsl.b   #1,%d0                  | check low bits
 245         jeq     9f                      | round to zero
 246 fp_e2e_doroundup:
 247         addq.l  #1,(8,%a0)
 248         jcc     9f
 249         addq.l  #1,(4,%a0)
 250         jcc     9f
 251         move.w  #0x8000,(4,%a0)
 252         addq.w  #1,(2,%a0)
 253 9:      printf  PNORM,"%p(",1,%a0
 254         printx  PNORM,%a0@
 255         printf  PNORM,")\n"
 256         rts
 257 fp_e2e_roundother:
 258         subq.w  #2,%d2
 259         jcs     9b                      | %d2 < 2, round to zero
 260         jhi     1f                      | %d2 > 2, round to +infinity
 261         tst.b   (1,%a0)                 | to -inf
 262         jne     fp_e2e_doroundup        | negative, round to infinity
 263         jra     9b                      | positive, round to zero
 264 1:      tst.b   (1,%a0)                 | to +inf
 265         jeq     fp_e2e_doroundup        | positive, round to infinity
 266         jra     9b                      | negative, round to zero
 267 #endif
 268         | zeros and subnormals:
 269         | try to normalize these anyway.
 270 fp_e2e_small:
 271         jne     fp_e2e_small1           | high lword zero?
 272         move.l  (4,%a0),%d0
 273         jne     fp_e2e_small2
 274 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 275         clr.l   %d0
 276         move.b  (-4,%a0),%d0
 277         jne     fp_e2e_small3
 278 #endif
 279         | Genuine zero.
 280         clr.w   -(%a0)
 281         subq.l  #2,%a0
 282         printf  PNORM,"%p(",1,%a0
 283         printx  PNORM,%a0@
 284         printf  PNORM,")\n"
 285         moveq   #1,%d0
 286         rts
 287         | definitely subnormal, need to shift all 64 bits
 288 fp_e2e_small1:
 289         bfffo   %d0{#0,#32},%d1
 290         move.w  -(%a0),%d2
 291         sub.w   %d1,%d2
 292         jcc     1f
 293         | Pathologically small, denormalize.
 294         add.w   %d2,%d1
 295         clr.w   %d2
 296 1:      move.w  %d2,(%a0)+
 297         move.w  %d1,%d2
 298         jeq     fp_e2e_checkround
 299         | fancy 64-bit double-shift begins here
 300         lsl.l   %d2,%d0
 301         move.l  %d0,(%a0)+
 302         move.l  (%a0),%d0
 303         move.l  %d0,%d1
 304         lsl.l   %d2,%d0
 305         move.l  %d0,(%a0)
 306         neg.w   %d2
 307         and.w   #0x1f,%d2
 308         lsr.l   %d2,%d1
 309         or.l    %d1,-(%a0)
 310 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 311 fp_e2e_extra1:
 312         clr.l   %d0
 313         move.b  (-4,%a0),%d0
 314         neg.w   %d2
 315         add.w   #24,%d2
 316         jcc     1f
 317         clr.b   (-4,%a0)
 318         lsl.l   %d2,%d0
 319         or.l    %d0,(4,%a0)
 320         jra     fp_e2e_checkround
 321 1:      addq.w  #8,%d2
 322         lsl.l   %d2,%d0
 323         move.b  %d0,(-4,%a0)
 324         lsr.l   #8,%d0
 325         or.l    %d0,(4,%a0)
 326 #endif
 327         jra     fp_e2e_checkround
 328         | pathologically small subnormal
 329 fp_e2e_small2:
 330         bfffo   %d0{#0,#32},%d1
 331         add.w   #32,%d1
 332         move.w  -(%a0),%d2
 333         sub.w   %d1,%d2
 334         jcc     1f
 335         | Beyond pathologically small, denormalize.
 336         add.w   %d2,%d1
 337         clr.w   %d2
 338 1:      move.w  %d2,(%a0)+
 339         ext.l   %d1
 340         jeq     fp_e2e_checkround
 341         clr.l   (4,%a0)
 342         sub.w   #32,%d2
 343         jcs     1f
 344         lsl.l   %d1,%d0                 | lower lword needs only to be shifted
 345         move.l  %d0,(%a0)               | into the higher lword
 346 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 347         clr.l   %d0
 348         move.b  (-4,%a0),%d0
 349         clr.b   (-4,%a0)
 350         neg.w   %d1
 351         add.w   #32,%d1
 352         bfins   %d0,(%a0){%d1,#8}
 353 #endif
 354         jra     fp_e2e_checkround
 355 1:      neg.w   %d1                     | lower lword is splitted between
 356         bfins   %d0,(%a0){%d1,#32}      | higher and lower lword
 357 #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
 358         jra     fp_e2e_checkround
 359 #else
 360         move.w  %d1,%d2
 361         jra     fp_e2e_extra1
 362         | These are extremely small numbers, that will mostly end up as zero
 363         | anyway, so this is only important for correct rounding.
 364 fp_e2e_small3:
 365         bfffo   %d0{#24,#8},%d1
 366         add.w   #40,%d1
 367         move.w  -(%a0),%d2
 368         sub.w   %d1,%d2
 369         jcc     1f
 370         | Pathologically small, denormalize.
 371         add.w   %d2,%d1
 372         clr.w   %d2
 373 1:      move.w  %d2,(%a0)+
 374         ext.l   %d1
 375         jeq     fp_e2e_checkround
 376         cmp.w   #8,%d1
 377         jcs     2f
 378 1:      clr.b   (-4,%a0)
 379         sub.w   #64,%d1
 380         jcs     1f
 381         add.w   #24,%d1
 382         lsl.l   %d1,%d0
 383         move.l  %d0,(%a0)
 384         jra     fp_e2e_checkround
 385 1:      neg.w   %d1
 386         bfins   %d0,(%a0){%d1,#8}
 387         jra     fp_e2e_checkround
 388 2:      lsl.l   %d1,%d0
 389         move.b  %d0,(-4,%a0)
 390         lsr.l   #8,%d0
 391         move.b  %d0,(7,%a0)
 392         jra     fp_e2e_checkround
 393 #endif
 394 1:      move.l  %d0,%d1                 | lower lword is splitted between
 395         lsl.l   %d2,%d0                 | higher and lower lword
 396         move.l  %d0,(%a0)
 397         move.l  %d1,%d0
 398         neg.w   %d2
 399         add.w   #32,%d2
 400         lsr.l   %d2,%d0
 401         move.l  %d0,-(%a0)
 402         jra     fp_e2e_checkround
 403         | Infinities and NaNs
 404 fp_e2e_large:
 405         move.l  (%a0)+,%d0
 406         jne     3f
 407 1:      tst.l   (%a0)
 408         jne     4f
 409         moveq   #1,%d0
 410 2:      subq.l  #8,%a0
 411         printf  PCONV,"%p(",1,%a0
 412         printx  PCONV,%a0@
 413         printf  PCONV,")\n"
 414         rts
 415         | we have maybe a NaN, shift off the highest bit
 416 3:      lsl.l   #1,%d0
 417         jeq     1b
 418         | we have a NaN, clear the return value
 419 4:      clrl    %d0
 420         jra     2b
 421
 422
 423 /*
 424  * Normalization functions.  Call these on the output of general
 425  * FP operators, and before any conversion into the destination
 426  * formats. fp_normalize_ext has always to be called first, the
 427  * following conversion functions expect an already normalized
 428  * number.
 429  */
 430
 431         | fp_normalize_ext:
 432         | normalize an extended in extended (unpacked) format, basically
 433         | it does the same as fp_conv_ext2ext, additionally it also does
 434         | the necessary postprocessing checks.
 435         | args: %a0 (struct fp_ext *)
 436         | NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
 437
 438 fp_normalize_ext:
 439         printf  PNORM,"ne: %p(",1,%a0
 440         printx  PNORM,%a0@
 441         printf  PNORM,"), "
 442         move.l  (%a0)+,%d0
 443         cmp.w   #0x7fff,%d0             | Inf / NaN?
 444         jeq     fp_ne_large
 445         move.l  (%a0),%d0
 446         jpl     fp_ne_small             | zero / denorm?
 447         | The high bit is set, so normalization is irrelevant.
 448 fp_ne_checkround:
 449         subq.l  #4,%a0
 450 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 451         move.b  (%a0),%d0
 452         jne     fp_ne_round
 453 #endif
 454         printf  PNORM,"%p(",1,%a0
 455         printx  PNORM,%a0@
 456         printf  PNORM,")\n"
 457         rts
 458 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 459 fp_ne_round:
 460         fp_set_sr FPSR_EXC_INEX2
 461         clr.b   (%a0)
 462         move.w  (FPD_RND,FPDATA),%d2
 463         jne     fp_ne_roundother        | %d2 == 0, round to nearest
 464         tst.b   %d0                     | test guard bit
 465         jpl     9f                      | zero is closer
 466         btst    #0,(11,%a0)             | test lsb bit
 467         jne     fp_ne_doroundup         | round to infinity
 468         lsl.b   #1,%d0                  | check low bits
 469         jeq     9f                      | round to zero
 470 fp_ne_doroundup:
 471         addq.l  #1,(8,%a0)
 472         jcc     9f
 473         addq.l  #1,(4,%a0)
 474         jcc     9f
 475         addq.w  #1,(2,%a0)
 476         move.w  #0x8000,(4,%a0)
 477 9:      printf  PNORM,"%p(",1,%a0
 478         printx  PNORM,%a0@
 479         printf  PNORM,")\n"
 480         rts
 481 fp_ne_roundother:
 482         subq.w  #2,%d2
 483         jcs     9b                      | %d2 < 2, round to zero
 484         jhi     1f                      | %d2 > 2, round to +infinity
 485         tst.b   (1,%a0)                 | to -inf
 486         jne     fp_ne_doroundup         | negative, round to infinity
 487         jra     9b                      | positive, round to zero
 488 1:      tst.b   (1,%a0)                 | to +inf
 489         jeq     fp_ne_doroundup         | positive, round to infinity
 490         jra     9b                      | negative, round to zero
 491 #endif
 492         | Zeros and subnormal numbers
 493         | These are probably merely subnormal, rather than "denormalized"
 494         |  numbers, so we will try to make them normal again.
 495 fp_ne_small:
 496         jne     fp_ne_small1            | high lword zero?
 497         move.l  (4,%a0),%d0
 498         jne     fp_ne_small2
 499 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 500         clr.l   %d0
 501         move.b  (-4,%a0),%d0
 502         jne     fp_ne_small3
 503 #endif
 504         | Genuine zero.
 505         clr.w   -(%a0)
 506         subq.l  #2,%a0
 507         printf  PNORM,"%p(",1,%a0
 508         printx  PNORM,%a0@
 509         printf  PNORM,")\n"
 510         rts
 511         | Subnormal.
 512 fp_ne_small1:
 513         bfffo   %d0{#0,#32},%d1
 514         move.w  -(%a0),%d2
 515         sub.w   %d1,%d2
 516         jcc     1f
 517         | Pathologically small, denormalize.
 518         add.w   %d2,%d1
 519         clr.w   %d2
 520         fp_set_sr FPSR_EXC_UNFL
 521 1:      move.w  %d2,(%a0)+
 522         move.w  %d1,%d2
 523         jeq     fp_ne_checkround
 524         | This is exactly the same 64-bit double shift as seen above.
 525         lsl.l   %d2,%d0
 526         move.l  %d0,(%a0)+
 527         move.l  (%a0),%d0
 528         move.l  %d0,%d1
 529         lsl.l   %d2,%d0
 530         move.l  %d0,(%a0)
 531         neg.w   %d2
 532         and.w   #0x1f,%d2
 533         lsr.l   %d2,%d1
 534         or.l    %d1,-(%a0)
 535 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 536 fp_ne_extra1:
 537         clr.l   %d0
 538         move.b  (-4,%a0),%d0
 539         neg.w   %d2
 540         add.w   #24,%d2
 541         jcc     1f
 542         clr.b   (-4,%a0)
 543         lsl.l   %d2,%d0
 544         or.l    %d0,(4,%a0)
 545         jra     fp_ne_checkround
 546 1:      addq.w  #8,%d2
 547         lsl.l   %d2,%d0
 548         move.b  %d0,(-4,%a0)
 549         lsr.l   #8,%d0
 550         or.l    %d0,(4,%a0)
 551 #endif
 552         jra     fp_ne_checkround
 553         | May or may not be subnormal, if so, only 32 bits to shift.
 554 fp_ne_small2:
 555         bfffo   %d0{#0,#32},%d1
 556         add.w   #32,%d1
 557         move.w  -(%a0),%d2
 558         sub.w   %d1,%d2
 559         jcc     1f
 560         | Beyond pathologically small, denormalize.
 561         add.w   %d2,%d1
 562         clr.w   %d2
 563         fp_set_sr FPSR_EXC_UNFL
 564 1:      move.w  %d2,(%a0)+
 565         ext.l   %d1
 566         jeq     fp_ne_checkround
 567         clr.l   (4,%a0)
 568         sub.w   #32,%d1
 569         jcs     1f
 570         lsl.l   %d1,%d0                 | lower lword needs only to be shifted
 571         move.l  %d0,(%a0)               | into the higher lword
 572 #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
 573         clr.l   %d0
 574         move.b  (-4,%a0),%d0
 575         clr.b   (-4,%a0)
 576         neg.w   %d1
 577         add.w   #32,%d1
 578         bfins   %d0,(%a0){%d1,#8}
 579 #endif
 580         jra     fp_ne_checkround
 581 1:      neg.w   %d1                     | lower lword is splitted between
 582         bfins   %d0,(%a0){%d1,#32}      | higher and lower lword
 583 #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
 584         jra     fp_ne_checkround
 585 #else
 586         move.w  %d1,%d2
 587         jra     fp_ne_extra1
 588         | These are extremely small numbers, that will mostly end up as zero
 589         | anyway, so this is only important for correct rounding.
 590 fp_ne_small3:
 591         bfffo   %d0{#24,#8},%d1
 592         add.w   #40,%d1
 593         move.w  -(%a0),%d2
 594         sub.w   %d1,%d2
 595         jcc     1f
 596         | Pathologically small, denormalize.
 597         add.w   %d2,%d1
 598         clr.w   %d2
 599 1:      move.w  %d2,(%a0)+
 600         ext.l   %d1
 601         jeq     fp_ne_checkround
 602         cmp.w   #8,%d1
 603         jcs     2f
 604 1:      clr.b   (-4,%a0)
 605         sub.w   #64,%d1
 606         jcs     1f
 607         add.w   #24,%d1
 608         lsl.l   %d1,%d0
 609         move.l  %d0,(%a0)
 610         jra     fp_ne_checkround
 611 1:      neg.w   %d1
 612         bfins   %d0,(%a0){%d1,#8}
 613         jra     fp_ne_checkround
 614 2:      lsl.l   %d1,%d0
 615         move.b  %d0,(-4,%a0)
 616         lsr.l   #8,%d0
 617         move.b  %d0,(7,%a0)
 618         jra     fp_ne_checkround
 619 #endif
 620         | Infinities and NaNs, again, same as above.
 621 fp_ne_large:
 622         move.l  (%a0)+,%d0
 623         jne     3f
 624 1:      tst.l   (%a0)
 625         jne     4f
 626 2:      subq.l  #8,%a0
 627         printf  PNORM,"%p(",1,%a0
 628         printx  PNORM,%a0@
 629         printf  PNORM,")\n"
 630         rts
 631         | we have maybe a NaN, shift off the highest bit
 632 3:      move.l  %d0,%d1
 633         lsl.l   #1,%d1
 634         jne     4f
 635         clr.l   (-4,%a0)
 636         jra     1b
 637         | we have a NaN, test if it is signaling
 638 4:      bset    #30,%d0
 639         jne     2b
 640         fp_set_sr FPSR_EXC_SNAN
 641         move.l  %d0,(-4,%a0)
 642         jra     2b
 643
 644         | these next two do rounding as per the IEEE standard.
 645         | values for the rounding modes appear to be:
 646         | 0:    Round to nearest
 647         | 1:    Round to zero
 648         | 2:    Round to -Infinity
 649         | 3:    Round to +Infinity
 650         | both functions expect that fp_normalize was already
 651         | called (and extended argument is already normalized
 652         | as far as possible), these are used if there is different
 653         | rounding precision is selected and before converting
 654         | into single/double
 655
 656         | fp_normalize_double:
 657         | normalize an extended with double (52-bit) precision
 658         | args:  %a0 (struct fp_ext *)
 659
 660 fp_normalize_double:
 661         printf  PNORM,"nd: %p(",1,%a0
 662         printx  PNORM,%a0@
 663         printf  PNORM,"), "
 664         move.l  (%a0)+,%d2
 665         tst.w   %d2
 666         jeq     fp_nd_zero              | zero / denormalized
 667         cmp.w   #0x7fff,%d2
 668         jeq     fp_nd_huge              | NaN / infinitive.
 669         sub.w   #0x4000-0x3ff,%d2       | will the exponent fit?
 670         jcs     fp_nd_small             | too small.
 671         cmp.w   #0x7fe,%d2
 672         jcc     fp_nd_large             | too big.
 673         addq.l  #4,%a0
 674         move.l  (%a0),%d0               | low lword of mantissa
 675         | now, round off the low 11 bits.
 676 fp_nd_round:
 677         moveq   #21,%d1
 678         lsl.l   %d1,%d0                 | keep 11 low bits.
 679         jne     fp_nd_checkround        | Are they non-zero?
 680         | nothing to do here
 681 9:      subq.l  #8,%a0
 682         printf  PNORM,"%p(",1,%a0
 683         printx  PNORM,%a0@
 684         printf  PNORM,")\n"
 685         rts
 686         | Be careful with the X bit! It contains the lsb
 687         | from the shift above, it is needed for round to nearest.
 688 fp_nd_checkround:
 689         fp_set_sr FPSR_EXC_INEX2        | INEX2 bit
 690         and.w   #0xf800,(2,%a0)         | clear bits 0-10
 691         move.w  (FPD_RND,FPDATA),%d2    | rounding mode
 692         jne     2f                      | %d2 == 0, round to nearest
 693         tst.l   %d0                     | test guard bit
 694         jpl     9b                      | zero is closer
 695         | here we test the X bit by adding it to %d2
 696         clr.w   %d2                     | first set z bit, addx only clears it
 697         addx.w  %d2,%d2                 | test lsb bit
 698         | IEEE754-specified "round to even" behaviour.  If the guard
 699         | bit is set, then the number is odd, so rounding works like
 700         | in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
 701         | Otherwise, an equal distance rounds towards zero, so as not
 702         | to produce an odd number.  This is strange, but it is what
 703         | the standard says.
 704         jne     fp_nd_doroundup         | round to infinity
 705         lsl.l   #1,%d0                  | check low bits
 706         jeq     9b                      | round to zero
 707 fp_nd_doroundup:
 708         | round (the mantissa, that is) towards infinity
 709         add.l   #0x800,(%a0)
 710         jcc     9b                      | no overflow, good.
 711         addq.l  #1,-(%a0)               | extend to high lword
 712         jcc     1f                      | no overflow, good.
 713         | Yow! we have managed to overflow the mantissa.  Since this
 714         | only happens when %d1 was 0xfffff800, it is now zero, so
 715         | reset the high bit, and increment the exponent.
 716         move.w  #0x8000,(%a0)
 717         addq.w  #1,-(%a0)
 718         cmp.w   #0x43ff,(%a0)+          | exponent now overflown?
 719         jeq     fp_nd_large             | yes, so make it infinity.
 720 1:      subq.l  #4,%a0
 721         printf  PNORM,"%p(",1,%a0
 722         printx  PNORM,%a0@
 723         printf  PNORM,")\n"
 724         rts
 725 2:      subq.w  #2,%d2
 726         jcs     9b                      | %d2 < 2, round to zero
 727         jhi     3f                      | %d2 > 2, round to +infinity
 728         | Round to +Inf or -Inf.  High word of %d2 contains the
 729         | sign of the number, by the way.
 730         swap    %d2                     | to -inf
 731         tst.b   %d2
 732         jne     fp_nd_doroundup         | negative, round to infinity
 733         jra     9b                      | positive, round to zero
 734 3:      swap    %d2                     | to +inf
 735         tst.b   %d2
 736         jeq     fp_nd_doroundup         | positive, round to infinity
 737         jra     9b                      | negative, round to zero
 738         | Exponent underflow.  Try to make a denormal, and set it to
 739         | the smallest possible fraction if this fails.
 740 fp_nd_small:
 741         fp_set_sr FPSR_EXC_UNFL         | set UNFL bit
 742         move.w  #0x3c01,(-2,%a0)        | 2**-1022
 743         neg.w   %d2                     | degree of underflow
 744         cmp.w   #32,%d2                 | single or double shift?
 745         jcc     1f
 746         | Again, another 64-bit double shift.
 747         move.l  (%a0),%d0
 748         move.l  %d0,%d1
 749         lsr.l   %d2,%d0
 750         move.l  %d0,(%a0)+
 751         move.l  (%a0),%d0
 752         lsr.l   %d2,%d0
 753         neg.w   %d2
 754         add.w   #32,%d2
 755         lsl.l   %d2,%d1
 756         or.l    %d1,%d0
 757         move.l  (%a0),%d1
 758         move.l  %d0,(%a0)
 759         | Check to see if we shifted off any significant bits
 760         lsl.l   %d2,%d1
 761         jeq     fp_nd_round             | Nope, round.
 762         bset    #0,%d0                  | Yes, so set the "sticky bit".
 763         jra     fp_nd_round             | Now, round.
 764         | Another 64-bit single shift and store
 765 1:      sub.w   #32,%d2
 766         cmp.w   #32,%d2                 | Do we really need to shift?
 767         jcc     2f                      | No, the number is too small.
 768         move.l  (%a0),%d0
 769         clr.l   (%a0)+
 770         move.l  %d0,%d1
 771         lsr.l   %d2,%d0
 772         neg.w   %d2
 773         add.w   #32,%d2
 774         | Again, check to see if we shifted off any significant bits.
 775         tst.l   (%a0)
 776         jeq     1f
 777         bset    #0,%d0                  | Sticky bit.
 778 1:      move.l  %d0,(%a0)
 779         lsl.l   %d2,%d1
 780         jeq     fp_nd_round
 781         bset    #0,%d0
 782         jra     fp_nd_round
 783         | Sorry, the number is just too small.
 784 2:      clr.l   (%a0)+
 785         clr.l   (%a0)
 786         moveq   #1,%d0                  | Smallest possible fraction,
 787         jra     fp_nd_round             | round as desired.
 788         | zero and denormalized
 789 fp_nd_zero:
 790         tst.l   (%a0)+
 791         jne     1f
 792         tst.l   (%a0)
 793         jne     1f
 794         subq.l  #8,%a0
 795         printf  PNORM,"%p(",1,%a0
 796         printx  PNORM,%a0@
 797         printf  PNORM,")\n"
 798         rts                             | zero.  nothing to do.
 799         | These are not merely subnormal numbers, but true denormals,
 800         | i.e. pathologically small (exponent is 2**-16383) numbers.
 801         | It is clearly impossible for even a normal extended number
 802         | with that exponent to fit into double precision, so just
 803         | write these ones off as "too darn small".
 804 1:      fp_set_sr FPSR_EXC_UNFL         | Set UNFL bit
 805         clr.l   (%a0)
 806         clr.l   -(%a0)
 807         move.w  #0x3c01,-(%a0)          | i.e. 2**-1022
 808         addq.l  #6,%a0
 809         moveq   #1,%d0
 810         jra     fp_nd_round             | round.
 811         | Exponent overflow.  Just call it infinity.
 812 fp_nd_large:
 813         move.w  #0x7ff,%d0
 814         and.w   (6,%a0),%d0
 815         jeq     1f
 816         fp_set_sr FPSR_EXC_INEX2
 817 1:      fp_set_sr FPSR_EXC_OVFL
 818         move.w  (FPD_RND,FPDATA),%d2
 819         jne     3f                      | %d2 = 0 round to nearest
 820 1:      move.w  #0x7fff,(-2,%a0)
 821         clr.l   (%a0)+
 822         clr.l   (%a0)
 823 2:      subq.l  #8,%a0
 824         printf  PNORM,"%p(",1,%a0
 825         printx  PNORM,%a0@
 826         printf  PNORM,")\n"
 827         rts
 828 3:      subq.w  #2,%d2
 829         jcs     5f                      | %d2 < 2, round to zero
 830         jhi     4f                      | %d2 > 2, round to +infinity
 831         tst.b   (-3,%a0)                | to -inf
 832         jne     1b
 833         jra     5f
 834 4:      tst.b   (-3,%a0)                | to +inf
 835         jeq     1b
 836 5:      move.w  #0x43fe,(-2,%a0)
 837         moveq   #-1,%d0
 838         move.l  %d0,(%a0)+
 839         move.w  #0xf800,%d0
 840         move.l  %d0,(%a0)
 841         jra     2b
 842         | Infinities or NaNs
 843 fp_nd_huge:
 844         subq.l  #4,%a0
 845         printf  PNORM,"%p(",1,%a0
 846         printx  PNORM,%a0@
 847         printf  PNORM,")\n"
 848         rts
 849
 850         | fp_normalize_single:
 851         | normalize an extended with single (23-bit) precision
 852         | args:  %a0 (struct fp_ext *)
 853
 854 fp_normalize_single:
 855         printf  PNORM,"ns: %p(",1,%a0
 856         printx  PNORM,%a0@
 857         printf  PNORM,") "
 858         addq.l  #2,%a0
 859         move.w  (%a0)+,%d2
 860         jeq     fp_ns_zero              | zero / denormalized
 861         cmp.w   #0x7fff,%d2
 862         jeq     fp_ns_huge              | NaN / infinitive.
 863         sub.w   #0x4000-0x7f,%d2        | will the exponent fit?
 864         jcs     fp_ns_small             | too small.
 865         cmp.w   #0xfe,%d2
 866         jcc     fp_ns_large             | too big.
 867         move.l  (%a0)+,%d0              | get high lword of mantissa
 868 fp_ns_round:
 869         tst.l   (%a0)                   | check the low lword
 870         jeq     1f
 871         | Set a sticky bit if it is non-zero.  This should only
 872         | affect the rounding in what would otherwise be equal-
 873         | distance situations, which is what we want it to do.
 874         bset    #0,%d0
 875 1:      clr.l   (%a0)                   | zap it from memory.
 876         | now, round off the low 8 bits of the hi lword.
 877         tst.b   %d0                     | 8 low bits.
 878         jne     fp_ns_checkround        | Are they non-zero?
 879         | nothing to do here
 880         subq.l  #8,%a0
 881         printf  PNORM,"%p(",1,%a0
 882         printx  PNORM,%a0@
 883         printf  PNORM,")\n"
 884         rts
 885 fp_ns_checkround:
 886         fp_set_sr FPSR_EXC_INEX2        | INEX2 bit
 887         clr.b   -(%a0)                  | clear low byte of high lword
 888         subq.l  #3,%a0
 889         move.w  (FPD_RND,FPDATA),%d2    | rounding mode
 890         jne     2f                      | %d2 == 0, round to nearest
 891         tst.b   %d0                     | test guard bit
 892         jpl     9f                      | zero is closer
 893         btst    #8,%d0                  | test lsb bit
 894         | round to even behaviour, see above.
 895         jne     fp_ns_doroundup         | round to infinity
 896         lsl.b   #1,%d0                  | check low bits
 897         jeq     9f                      | round to zero
 898 fp_ns_doroundup:
 899         | round (the mantissa, that is) towards infinity
 900         add.l   #0x100,(%a0)
 901         jcc     9f                      | no overflow, good.
 902         | Overflow.  This means that the %d1 was 0xffffff00, so it
 903         | is now zero.  We will set the mantissa to reflect this, and
 904         | increment the exponent (checking for overflow there too)
 905         move.w  #0x8000,(%a0)
 906         addq.w  #1,-(%a0)
 907         cmp.w   #0x407f,(%a0)+          | exponent now overflown?
 908         jeq     fp_ns_large             | yes, so make it infinity.
 909 9:      subq.l  #4,%a0
 910         printf  PNORM,"%p(",1,%a0
 911         printx  PNORM,%a0@
 912         printf  PNORM,")\n"
 913         rts
 914         | check nondefault rounding modes
 915 2:      subq.w  #2,%d2
 916         jcs     9b                      | %d2 < 2, round to zero
 917         jhi     3f                      | %d2 > 2, round to +infinity
 918         tst.b   (-3,%a0)                | to -inf
 919         jne     fp_ns_doroundup         | negative, round to infinity
 920         jra     9b                      | positive, round to zero
 921 3:      tst.b   (-3,%a0)                | to +inf
 922         jeq     fp_ns_doroundup         | positive, round to infinity
 923         jra     9b                      | negative, round to zero
 924         | Exponent underflow.  Try to make a denormal, and set it to
 925         | the smallest possible fraction if this fails.
 926 fp_ns_small:
 927         fp_set_sr FPSR_EXC_UNFL         | set UNFL bit
 928         move.w  #0x3f81,(-2,%a0)        | 2**-126
 929         neg.w   %d2                     | degree of underflow
 930         cmp.w   #32,%d2                 | single or double shift?
 931         jcc     2f
 932         | a 32-bit shift.
 933         move.l  (%a0),%d0
 934         move.l  %d0,%d1
 935         lsr.l   %d2,%d0
 936         move.l  %d0,(%a0)+
 937         | Check to see if we shifted off any significant bits.
 938         neg.w   %d2
 939         add.w   #32,%d2
 940         lsl.l   %d2,%d1
 941         jeq     1f
 942         bset    #0,%d0                  | Sticky bit.
 943         | Check the lower lword
 944 1:      tst.l   (%a0)
 945         jeq     fp_ns_round
 946         clr     (%a0)
 947         bset    #0,%d0                  | Sticky bit.
 948         jra     fp_ns_round
 949         | Sorry, the number is just too small.
 950 2:      clr.l   (%a0)+
 951         clr.l   (%a0)
 952         moveq   #1,%d0                  | Smallest possible fraction,
 953         jra     fp_ns_round             | round as desired.
 954         | Exponent overflow.  Just call it infinity.
 955 fp_ns_large:
 956         tst.b   (3,%a0)
 957         jeq     1f
 958         fp_set_sr FPSR_EXC_INEX2
 959 1:      fp_set_sr FPSR_EXC_OVFL
 960         move.w  (FPD_RND,FPDATA),%d2
 961         jne     3f                      | %d2 = 0 round to nearest
 962 1:      move.w  #0x7fff,(-2,%a0)
 963         clr.l   (%a0)+
 964         clr.l   (%a0)
 965 2:      subq.l  #8,%a0
 966         printf  PNORM,"%p(",1,%a0
 967         printx  PNORM,%a0@
 968         printf  PNORM,")\n"
 969         rts
 970 3:      subq.w  #2,%d2
 971         jcs     5f                      | %d2 < 2, round to zero
 972         jhi     4f                      | %d2 > 2, round to +infinity
 973         tst.b   (-3,%a0)                | to -inf
 974         jne     1b
 975         jra     5f
 976 4:      tst.b   (-3,%a0)                | to +inf
 977         jeq     1b
 978 5:      move.w  #0x407e,(-2,%a0)
 979         move.l  #0xffffff00,(%a0)+
 980         clr.l   (%a0)
 981         jra     2b
 982         | zero and denormalized
 983 fp_ns_zero:
 984         tst.l   (%a0)+
 985         jne     1f
 986         tst.l   (%a0)
 987         jne     1f
 988         subq.l  #8,%a0
 989         printf  PNORM,"%p(",1,%a0
 990         printx  PNORM,%a0@
 991         printf  PNORM,")\n"
 992         rts                             | zero.  nothing to do.
 993         | These are not merely subnormal numbers, but true denormals,
 994         | i.e. pathologically small (exponent is 2**-16383) numbers.
 995         | It is clearly impossible for even a normal extended number
 996         | with that exponent to fit into single precision, so just
 997         | write these ones off as "too darn small".
 998 1:      fp_set_sr FPSR_EXC_UNFL         | Set UNFL bit
 999         clr.l   (%a0)
1000         clr.l   -(%a0)
1001         move.w  #0x3f81,-(%a0)          | i.e. 2**-126
1002         addq.l  #6,%a0
1003         moveq   #1,%d0
1004         jra     fp_ns_round             | round.
1005         | Infinities or NaNs
1006 fp_ns_huge:
1007         subq.l  #4,%a0
1008         printf  PNORM,"%p(",1,%a0
1009         printx  PNORM,%a0@
1010         printf  PNORM,")\n"
1011         rts
1012
1013         | fp_normalize_single_fast:
1014         | normalize an extended with single (23-bit) precision
1015         | this is only used by fsgldiv/fsgdlmul, where the
1016         | operand is not completly normalized.
1017         | args:  %a0 (struct fp_ext *)
1018
1019 fp_normalize_single_fast:
1020         printf  PNORM,"nsf: %p(",1,%a0
1021         printx  PNORM,%a0@
1022         printf  PNORM,") "
1023         addq.l  #2,%a0
1024         move.w  (%a0)+,%d2
1025         cmp.w   #0x7fff,%d2
1026         jeq     fp_nsf_huge             | NaN / infinitive.
1027         move.l  (%a0)+,%d0              | get high lword of mantissa
1028 fp_nsf_round:
1029         tst.l   (%a0)                   | check the low lword
1030         jeq     1f
1031         | Set a sticky bit if it is non-zero.  This should only
1032         | affect the rounding in what would otherwise be equal-
1033         | distance situations, which is what we want it to do.
1034         bset    #0,%d0
1035 1:      clr.l   (%a0)                   | zap it from memory.
1036         | now, round off the low 8 bits of the hi lword.
1037         tst.b   %d0                     | 8 low bits.
1038         jne     fp_nsf_checkround       | Are they non-zero?
1039         | nothing to do here
1040         subq.l  #8,%a0
1041         printf  PNORM,"%p(",1,%a0
1042         printx  PNORM,%a0@
1043         printf  PNORM,")\n"
1044         rts
1045 fp_nsf_checkround:
1046         fp_set_sr FPSR_EXC_INEX2        | INEX2 bit
1047         clr.b   -(%a0)                  | clear low byte of high lword
1048         subq.l  #3,%a0
1049         move.w  (FPD_RND,FPDATA),%d2    | rounding mode
1050         jne     2f                      | %d2 == 0, round to nearest
1051         tst.b   %d0                     | test guard bit
1052         jpl     9f                      | zero is closer
1053         btst    #8,%d0                  | test lsb bit
1054         | round to even behaviour, see above.
1055         jne     fp_nsf_doroundup                | round to infinity
1056         lsl.b   #1,%d0                  | check low bits
1057         jeq     9f                      | round to zero
1058 fp_nsf_doroundup:
1059         | round (the mantissa, that is) towards infinity
1060         add.l   #0x100,(%a0)
1061         jcc     9f                      | no overflow, good.
1062         | Overflow.  This means that the %d1 was 0xffffff00, so it
1063         | is now zero.  We will set the mantissa to reflect this, and
1064         | increment the exponent (checking for overflow there too)
1065         move.w  #0x8000,(%a0)
1066         addq.w  #1,-(%a0)
1067         cmp.w   #0x407f,(%a0)+          | exponent now overflown?
1068         jeq     fp_nsf_large            | yes, so make it infinity.
1069 9:      subq.l  #4,%a0
1070         printf  PNORM,"%p(",1,%a0
1071         printx  PNORM,%a0@
1072         printf  PNORM,")\n"
1073         rts
1074         | check nondefault rounding modes
1075 2:      subq.w  #2,%d2
1076         jcs     9b                      | %d2 < 2, round to zero
1077         jhi     3f                      | %d2 > 2, round to +infinity
1078         tst.b   (-3,%a0)                | to -inf
1079         jne     fp_nsf_doroundup        | negative, round to infinity
1080         jra     9b                      | positive, round to zero
1081 3:      tst.b   (-3,%a0)                | to +inf
1082         jeq     fp_nsf_doroundup                | positive, round to infinity
1083         jra     9b                      | negative, round to zero
1084         | Exponent overflow.  Just call it infinity.
1085 fp_nsf_large:
1086         tst.b   (3,%a0)
1087         jeq     1f
1088         fp_set_sr FPSR_EXC_INEX2
1089 1:      fp_set_sr FPSR_EXC_OVFL
1090         move.w  (FPD_RND,FPDATA),%d2
1091         jne     3f                      | %d2 = 0 round to nearest
1092 1:      move.w  #0x7fff,(-2,%a0)
1093         clr.l   (%a0)+
1094         clr.l   (%a0)
1095 2:      subq.l  #8,%a0
1096         printf  PNORM,"%p(",1,%a0
1097         printx  PNORM,%a0@
1098         printf  PNORM,")\n"
1099         rts
1100 3:      subq.w  #2,%d2
1101         jcs     5f                      | %d2 < 2, round to zero
1102         jhi     4f                      | %d2 > 2, round to +infinity
1103         tst.b   (-3,%a0)                | to -inf
1104         jne     1b
1105         jra     5f
1106 4:      tst.b   (-3,%a0)                | to +inf
1107         jeq     1b
1108 5:      move.w  #0x407e,(-2,%a0)
1109         move.l  #0xffffff00,(%a0)+
1110         clr.l   (%a0)
1111         jra     2b
1112         | Infinities or NaNs
1113 fp_nsf_huge:
1114         subq.l  #4,%a0
1115         printf  PNORM,"%p(",1,%a0
1116         printx  PNORM,%a0@
1117         printf  PNORM,")\n"
1118         rts
1119
1120         | conv_ext2int (macro):
1121         | Generates a subroutine that converts an extended value to an
1122         | integer of a given size, again, with the appropriate type of
1123         | rounding.
1124
1125         | Macro arguments:
1126         | s:    size, as given in an assembly instruction.
1127         | b:    number of bits in that size.
1128
1129         | Subroutine arguments:
1130         | %a0:  source (struct fp_ext *)
1131
1132         | Returns the integer in %d0 (like it should)
1133
1134 .macro conv_ext2int s,b
1135         .set    inf,(1<<(\b-1))-1       | i.e. MAXINT
1136         printf  PCONV,"e2i%d: %p(",2,#\b,%a0
1137         printx  PCONV,%a0@
1138         printf  PCONV,") "
1139         addq.l  #2,%a0
1140         move.w  (%a0)+,%d2              | exponent
1141         jeq     fp_e2i_zero\b           | zero / denorm (== 0, here)
1142         cmp.w   #0x7fff,%d2
1143         jeq     fp_e2i_huge\b           | Inf / NaN
1144         sub.w   #0x3ffe,%d2
1145         jcs     fp_e2i_small\b
1146         cmp.w   #\b,%d2
1147         jhi     fp_e2i_large\b
1148         move.l  (%a0),%d0
1149         move.l  %d0,%d1
1150         lsl.l   %d2,%d1
1151         jne     fp_e2i_round\b
1152         tst.l   (4,%a0)
1153         jne     fp_e2i_round\b
1154         neg.w   %d2
1155         add.w   #32,%d2
1156         lsr.l   %d2,%d0
1157 9:      tst.w   (-4,%a0)
1158         jne     1f
1159         tst.\s  %d0
1160         jmi     fp_e2i_large\b
1161         printf  PCONV,"-> %p\n",1,%d0
1162         rts
1163 1:      neg.\s  %d0
1164         jeq     1f
1165         jpl     fp_e2i_large\b
1166 1:      printf  PCONV,"-> %p\n",1,%d0
1167         rts
1168 fp_e2i_round\b:
1169         fp_set_sr FPSR_EXC_INEX2        | INEX2 bit
1170         neg.w   %d2
1171         add.w   #32,%d2
1172         .if     \b>16
1173         jeq     5f
1174         .endif
1175         lsr.l   %d2,%d0
1176         move.w  (FPD_RND,FPDATA),%d2    | rounding mode
1177         jne     2f                      | %d2 == 0, round to nearest
1178         tst.l   %d1                     | test guard bit
1179         jpl     9b                      | zero is closer
1180         btst    %d2,%d0                 | test lsb bit (%d2 still 0)
1181         jne     fp_e2i_doroundup\b
1182         lsl.l   #1,%d1                  | check low bits
1183         jne     fp_e2i_doroundup\b
1184         tst.l   (4,%a0)
1185         jeq     9b
1186 fp_e2i_doroundup\b:
1187         addq.l  #1,%d0
1188         jra     9b
1189         | check nondefault rounding modes
1190 2:      subq.w  #2,%d2
1191         jcs     9b                      | %d2 < 2, round to zero
1192         jhi     3f                      | %d2 > 2, round to +infinity
1193         tst.w   (-4,%a0)                | to -inf
1194         jne     fp_e2i_doroundup\b      | negative, round to infinity
1195         jra     9b                      | positive, round to zero
1196 3:      tst.w   (-4,%a0)                | to +inf
1197         jeq     fp_e2i_doroundup\b      | positive, round to infinity
1198         jra     9b      | negative, round to zero
1199         | we are only want -2**127 get correctly rounded here,
1200         | since the guard bit is in the lower lword.
1201         | everything else ends up anyway as overflow.
1202         .if     \b>16
1203 5:      move.w  (FPD_RND,FPDATA),%d2    | rounding mode
1204         jne     2b                      | %d2 == 0, round to nearest
1205         move.l  (4,%a0),%d1             | test guard bit
1206         jpl     9b                      | zero is closer
1207         lsl.l   #1,%d1                  | check low bits
1208         jne     fp_e2i_doroundup\b
1209         jra     9b
1210         .endif
1211 fp_e2i_zero\b:
1212         clr.l   %d0
1213         tst.l   (%a0)+
1214         jne     1f
1215         tst.l   (%a0)
1216         jeq     3f
1217 1:      subq.l  #4,%a0
1218         fp_clr_sr FPSR_EXC_UNFL         | fp_normalize_ext has set this bit
1219 fp_e2i_small\b:
1220         fp_set_sr FPSR_EXC_INEX2
1221         clr.l   %d0
1222         move.w  (FPD_RND,FPDATA),%d2    | rounding mode
1223         subq.w  #2,%d2
1224         jcs     3f                      | %d2 < 2, round to nearest/zero
1225         jhi     2f                      | %d2 > 2, round to +infinity
1226         tst.w   (-4,%a0)                | to -inf
1227         jeq     3f
1228         subq.\s #1,%d0
1229         jra     3f
1230 2:      tst.w   (-4,%a0)                | to +inf
1231         jne     3f
1232         addq.\s #1,%d0
1233 3:      printf  PCONV,"-> %p\n",1,%d0
1234         rts
1235 fp_e2i_large\b:
1236         fp_set_sr FPSR_EXC_OPERR
1237         move.\s #inf,%d0
1238         tst.w   (-4,%a0)
1239         jeq     1f
1240         addq.\s #1,%d0
1241 1:      printf  PCONV,"-> %p\n",1,%d0
1242         rts
1243 fp_e2i_huge\b:
1244         move.\s (%a0),%d0
1245         tst.l   (%a0)
1246         jne     1f
1247         tst.l   (%a0)
1248         jeq     fp_e2i_large\b
1249         | fp_normalize_ext has set this bit already
1250         | and made the number nonsignaling
1251 1:      fp_tst_sr FPSR_EXC_SNAN
1252         jne     1f
1253         fp_set_sr FPSR_EXC_OPERR
1254 1:      printf  PCONV,"-> %p\n",1,%d0
1255         rts
1256 .endm
1257
1258 fp_conv_ext2long:
1259         conv_ext2int l,32
1260
1261 fp_conv_ext2short:
1262         conv_ext2int w,16
1263
1264 fp_conv_ext2byte:
1265         conv_ext2int b,8
1266
1267 fp_conv_ext2double:
1268         jsr     fp_normalize_double
1269         printf  PCONV,"e2d: %p(",1,%a0
1270         printx  PCONV,%a0@
1271         printf  PCONV,"), "
1272         move.l  (%a0)+,%d2
1273         cmp.w   #0x7fff,%d2
1274         jne     1f
1275         move.w  #0x7ff,%d2
1276         move.l  (%a0)+,%d0
1277         jra     2f
1278 1:      sub.w   #0x3fff-0x3ff,%d2
1279         move.l  (%a0)+,%d0
1280         jmi     2f
1281         clr.w   %d2
1282 2:      lsl.w   #5,%d2
1283         lsl.l   #7,%d2
1284         lsl.l   #8,%d2
1285         move.l  %d0,%d1
1286         lsl.l   #1,%d0
1287         lsr.l   #4,%d0
1288         lsr.l   #8,%d0
1289         or.l    %d2,%d0
1290         putuser.l %d0,(%a1)+,fp_err_ua2,%a1
1291         moveq   #21,%d0
1292         lsl.l   %d0,%d1
1293         move.l  (%a0),%d0
1294         lsr.l   #4,%d0
1295         lsr.l   #7,%d0
1296         or.l    %d1,%d0
1297         putuser.l %d0,(%a1),fp_err_ua2,%a1
1298 #ifdef FPU_EMU_DEBUG
1299         getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
1300         getuser.l %a1@(0),%d1,fp_err_ua2,%a1
1301         printf  PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
1302 #endif
1303         rts
1304
1305 fp_conv_ext2single:
1306         jsr     fp_normalize_single
1307         printf  PCONV,"e2s: %p(",1,%a0
1308         printx  PCONV,%a0@
1309         printf  PCONV,"), "
1310         move.l  (%a0)+,%d1
1311         cmp.w   #0x7fff,%d1
1312         jne     1f
1313         move.w  #0xff,%d1
1314         move.l  (%a0)+,%d0
1315         jra     2f
1316 1:      sub.w   #0x3fff-0x7f,%d1
1317         move.l  (%a0)+,%d0
1318         jmi     2f
1319         clr.w   %d1
1320 2:      lsl.w   #8,%d1
1321         lsl.l   #7,%d1
1322         lsl.l   #8,%d1
1323         bclr    #31,%d0
1324         lsr.l   #8,%d0
1325         or.l    %d1,%d0
1326         printf  PCONV,"%08x\n",1,%d0
1327         rts
1328
1329         | special return addresses for instr that
1330         | encode the rounding precision in the opcode
1331         | (e.g. fsmove,fdmove)
1332
1333 fp_finalrounding_single:
1334         addq.l  #8,%sp
1335         jsr     fp_normalize_ext
1336         jsr     fp_normalize_single
1337         jra     fp_finaltest
1338
1339 fp_finalrounding_single_fast:
1340         addq.l  #8,%sp
1341         jsr     fp_normalize_ext
1342         jsr     fp_normalize_single_fast
1343         jra     fp_finaltest
1344
1345 fp_finalrounding_double:
1346         addq.l  #8,%sp
1347         jsr     fp_normalize_ext
1348         jsr     fp_normalize_double
1349         jra     fp_finaltest
1350
1351         | fp_finaltest:
1352         | set the emulated status register based on the outcome of an
1353         | emulated instruction.
1354
1355 fp_finalrounding:
1356         addq.l  #8,%sp
1357 |       printf  ,"f: %p\n",1,%a0
1358         jsr     fp_normalize_ext
1359         move.w  (FPD_PREC,FPDATA),%d0
1360         subq.w  #1,%d0
1361         jcs     fp_finaltest
1362         jne     1f
1363         jsr     fp_normalize_single
1364         jra     2f
1365 1:      jsr     fp_normalize_double
1366 2:|     printf  ,"f: %p\n",1,%a0
1367 fp_finaltest:
1368         | First, we do some of the obvious tests for the exception
1369         | status byte and condition code bytes of fp_sr here, so that
1370         | they do not have to be handled individually by every
1371         | emulated instruction.
1372         clr.l   %d0
1373         addq.l  #1,%a0
1374         tst.b   (%a0)+                  | sign
1375         jeq     1f
1376         bset    #FPSR_CC_NEG-24,%d0     | N bit
1377 1:      cmp.w   #0x7fff,(%a0)+          | exponent
1378         jeq     2f
1379         | test for zero
1380         moveq   #FPSR_CC_Z-24,%d1
1381         tst.l   (%a0)+
1382         jne     9f
1383         tst.l   (%a0)
1384         jne     9f
1385         jra     8f
1386         | infinitiv and NAN
1387 2:      moveq   #FPSR_CC_NAN-24,%d1
1388         move.l  (%a0)+,%d2
1389         lsl.l   #1,%d2                  | ignore high bit
1390         jne     8f
1391         tst.l   (%a0)
1392         jne     8f
1393         moveq   #FPSR_CC_INF-24,%d1
1394 8:      bset    %d1,%d0
1395 9:      move.b  %d0,(FPD_FPSR+0,FPDATA) | set condition test result
1396         | move instructions enter here
1397         | Here, we test things in the exception status byte, and set
1398         | other things in the accrued exception byte accordingly.
1399         | Emulated instructions can set various things in the former,
1400         | as defined in fp_emu.h.
1401 fp_final:
1402         move.l  (FPD_FPSR,FPDATA),%d0
1403 #if 0
1404         btst    #FPSR_EXC_SNAN,%d0      | EXC_SNAN
1405         jne     1f
1406         btst    #FPSR_EXC_OPERR,%d0     | EXC_OPERR
1407         jeq     2f
1408 1:      bset    #FPSR_AEXC_IOP,%d0      | set IOP bit
1409 2:      btst    #FPSR_EXC_OVFL,%d0      | EXC_OVFL
1410         jeq     1f
1411         bset    #FPSR_AEXC_OVFL,%d0     | set OVFL bit
1412 1:      btst    #FPSR_EXC_UNFL,%d0      | EXC_UNFL
1413         jeq     1f
1414         btst    #FPSR_EXC_INEX2,%d0     | EXC_INEX2
1415         jeq     1f
1416         bset    #FPSR_AEXC_UNFL,%d0     | set UNFL bit
1417 1:      btst    #FPSR_EXC_DZ,%d0        | EXC_INEX1
1418         jeq     1f
1419         bset    #FPSR_AEXC_DZ,%d0       | set DZ bit
1420 1:      btst    #FPSR_EXC_OVFL,%d0      | EXC_OVFL
1421         jne     1f
1422         btst    #FPSR_EXC_INEX2,%d0     | EXC_INEX2
1423         jne     1f
1424         btst    #FPSR_EXC_INEX1,%d0     | EXC_INEX1
1425         jeq     2f
1426 1:      bset    #FPSR_AEXC_INEX,%d0     | set INEX bit
1427 2:      move.l  %d0,(FPD_FPSR,FPDATA)
1428 #else
1429         | same as above, greatly optimized, but untested (yet)
1430         move.l  %d0,%d2
1431         lsr.l   #5,%d0
1432         move.l  %d0,%d1
1433         lsr.l   #4,%d1
1434         or.l    %d0,%d1
1435         and.b   #0x08,%d1
1436         move.l  %d2,%d0
1437         lsr.l   #6,%d0
1438         or.l    %d1,%d0
1439         move.l  %d2,%d1
1440         lsr.l   #4,%d1
1441         or.b    #0xdf,%d1
1442         and.b   %d1,%d0
1443         move.l  %d2,%d1
1444         lsr.l   #7,%d1
1445         and.b   #0x80,%d1
1446         or.b    %d1,%d0
1447         and.b   #0xf8,%d0
1448         or.b    %d0,%d2
1449         move.l  %d2,(FPD_FPSR,FPDATA)
1450 #endif
1451         move.b  (FPD_FPSR+2,FPDATA),%d0
1452         and.b   (FPD_FPCR+2,FPDATA),%d0
1453         jeq     1f
1454         printf  ,"send signal!!!\n"
1455 1:      jra     fp_end