;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm)

;;;;
;;;; Number-related functions and constants
;;;;

(defining-consts
  (+half-pi+    (/ pi 2))
  (+quarter-pi+ (/ pi 4))
  (+two-pi+     (* pi 2))
  (+four-pi+    (* pi 4))

  (+positive-inf/sfloat+ #+sbcl sb-ext:single-float-positive-infinity
                         #+ccl  1f++0
                         #-(or sbcl ccl) most-positive-single-float)

  (+positive-inf/dfloat+ #+sbcl sb-ext:double-float-positive-infinity
                         #+ccl  1d++0
                         #-(or sbcl ccl) most-positive-double-float)

  (+negative-inf/sfloat+ #+sbcl sb-ext:single-float-negative-infinity
                         #+ccl  -1f++0
                         #-(or sbcl ccl) most-negative-single-float)

  (+negative-inf/dfloat+ #+sbcl sb-ext:double-float-negative-infinity
                         #+ccl  -1d++0
                         #-(or sbcl ccl) most-negative-double-float))


(define-typed-fn num-8bit-bytes-needed ((integer number))
    ((integer 0 *) t)
  "Determines the number of 8-bit bytes that NUMBER needs in order to be stored
somewhere."
  (declare (optimize (speed 3) (debug 1))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)) ;; For INTEGER-LENGTH
  (if (= number 0)
       1
       (ash (logand (+ 7 (integer-length number))
                    (lognot 7))
            -3)))

(define-typed-fn int->uint/2c ((integer num) ((integer 1 *) width-in-bits))
    ((integer 0 *) t)
  "Converts NUM to an unsigned integer of WIDTH bits."
  (declare (type integer num)
           (type (integer 1 *) width-in-bits)
           (optimize speed (debug 1) (safety 0) (compilation-speed 0)))
  (case width-in-bits
    (8  (the t/uint8  (ldb (byte 8  0) (the t/int8  num))))
    (16 (the t/uint16 (ldb (byte 16 0) (the t/int16 num))))
    (24 (the t/uint24 (ldb (byte 24 0) (the t/int24 num))))
    (32 (the t/uint32 (ldb (byte 32 0) (the t/int32 num))))
    (64 (muffling (the t/uint64 (ldb (byte 64 0) num))))
    (128 (muffling (the t/uint128 (ldb (byte 128 0) num))))
    (otherwise (muffling (ldb (byte width-in-bits 0) num)))))

(define-compiler-macro int->uint/2c (&whole form num width-in-bits)
  (cond
    ((and (constantp width-in-bits)
          (integerp width-in-bits)
          (typep width-in-bits '(member 8 16 24 32 64 128)))
     ;; Emit code using some pre-defined bit widths that include more type information.
     (ecase width-in-bits
       (8  `(the t/uint8  (ldb (byte 8  0) (the t/int8  ,num))))
       (16 `(the t/uint16 (ldb (byte 16 0) (the t/int16 ,num))))
       (24 `(the t/uint24 (ldb (byte 24 0) (the t/int24 ,num))))
       (32 `(the t/uint32 (ldb (byte 32 0) (the t/int32 ,num))))
       (64 `(muffling (the t/uint64 (ldb (byte 64 0) ,num))))
       (128 `(muffling (the t/uint128 (ldb (byte 128 0) ,num))))))

    ((and (constantp width-in-bits)
          (integerp width-in-bits))
     ;; We still have an integer WIDTH-IN-BITS value, so emit a slightly
     ;; slower, but still efficient, form.
     `(muffling (ldb (byte ,width-in-bits 0) ,num)))

    (t
     ;; Emit the full function call
     form)))

(define-typed-fn uint->int/2c (((integer 0 *) num) ((integer 1 *) width-in-bits))
    (integer t)
  "Converts NUM to an signed integer of WIDTH bits."
  (declare (optimize speed (debug 1) (safety 0) (compilation-speed 0)))
  (if (muffling (logbitp (1- width-in-bits) num))
      (case width-in-bits
        (8  (the t/int8  (dpb (the t/uint8  num) (byte 8  0) -1)))
        (16 (the t/int16 (dpb (the t/uint16 num) (byte 16 0) -1)))
        (24 (the t/int24 (dpb (the t/uint24 num) (byte 24 0) -1)))
        (32 (the t/int32 (dpb (the t/uint32 num) (byte 32 0) -1)))
        (64 (the t/int64 (dpb (the t/uint64 num) (byte 64 0) -1)))
        (128 (muffling (the t/int128 (dpb (the t/uint128 num) (byte 128 0) -1))))
        (otherwise
         (muffling
           (dpb (the t/uint128 num) (byte width-in-bits 0) -1))))

      num))

(define-compiler-macro uint->int/2c (&whole form num width-in-bits)
  (if (typep width-in-bits '(integer 1 *))
      (if (typep width-in-bits '(member 8 16 24 32 64 128))
          ;; Emit code using some pre-defined bit widths that include more type information.
          (ecase width-in-bits
            (8
             `(let ((val ,num))
                (if (muffling (logbitp 7 val))
                    (the t/int8 (dpb (the t/uint8 val) (byte 8 0) -1))
                    val)))

             (16
              `(let ((val ,num))
                 (if (muffling (logbitp 15 val))
                     (the t/int16 (dpb (the t/uint16 val) (byte 16 0) -1))
                     val)))

             (24
              `(let ((val ,num))
                 (if (muffling (logbitp 23 val))
                     (the t/int24 (dpb (the t/uint24 val) (byte 24 0) -1))
                     val)))

             (32
              `(let ((val ,num))
                 (if (muffling (logbitp 31 val))
                     (the t/int32 (dpb (the t/uint32 val) (byte 32 0) -1))
                     val)))

             (64
              `(let ((val ,num))
                 (if (muffling (logbitp 63 val))
                     (the t/int64 (dpb (the t/uint64 val) (byte 64 0) -1))
                     val)))

             (128
              `(let ((val ,num))
                 (if (muffling (logbitp 127 val))
                     (muffling (the t/int128 (dpb (the t/uint128 val) (byte 128 0) -1)))
                     val))))

          ;; We still have an integer WIDTH-IN-BITS value, so emit a slightly
          ;; slower, but still efficient, form.
          `(let ((val ,num))
             (if (muffling (logbitp (1- ,width-in-bits) val))
                 (muffling (dpb val (byte ,width-in-bits 0) -1))
                 val)))
      ;; Emit the full function call
      form))

(define-typed-fn %mask-signed ((integer x) (t/uint8 size))
    (integer t)
  "Internal utility function used to convert X to a signed integer for the given
SIZE in bits.  This does not check that X fits in SIZE bits."
  (declare (optimize (speed 3) (safety 0) (debug 1) (space 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (logior x (- (mask-field (byte 1 (1- size)) x))))

(define-compiler-macro %mask-signed (&whole form x size)
  (if (integerp size)
      ;; Do the size stuff now since it's a literal integer.
      `(let ((val ,x))
         (logior val (- (mask-field (byte 1 ,(1- size)) val))))
      form))

(define-typed-fn signed->unsigned ((integer num) ((integer 1 *) size-in-bits))
    (integer t)
  "Converts NUM from an signed value to an unsigned value.  If NUM is already
unsigned, this effectively does nothing.  You must provide the size in bits, and
NUM is not checked to see if it fits in SIZE-IN-BITS bits."
  (declare (optimize (speed 3) (space 0) (debug 1) (safety 1))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (deposit-field num (byte size-in-bits 0) 0))

(define-compiler-macro signed->unsigned (&whole form num size-in-bits)
  (if (typep size-in-bits '(integer 1 *))
      ;; Emit with the constant placed into the form
      `(deposit-field ,num (byte ,size-in-bits 0) 0)
      form))

(defmacro define-signed-coercion-fn (name bit-width doc)
  (let ((res-type (ecase bit-width
                    (8  't/int8)
                    (16 't/int16)
                    (24 't/int24)
                    (32 't/int32)
                    (64 't/int64)))

        (full-type (list 'signed-byte bit-width))
        (mask (1- (expt 2 bit-width))))
    (declare (ignorable full-type mask))

    `(progn
       (define-typed-fn ,name ((integer num))
           (,res-type t)
         ,doc
         (declare (optimize (speed 3) (space 0) (debug 1) (safety 0))
                  #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
         (the ,full-type (%mask-signed (logand ,mask num) ,bit-width)))

       (define-compiler-macro ,name (&whole form num &environment env)
         (declare (ignorable env num))
         (let ((num-type
                 #+sbcl (when (symbolp num) (nth-value 2 (sb-cltl2:variable-information num env)))
                 #-sbcl nil))
           (declare (ignorable num-type))
           (cond
             #+sbcl
             ((and (listp (car num-type))
                   (listp (cdar num-type))
                   (= (length (cdar num-type)) 2)
                   (eq (cadar num-type) 'signed-byte)
                   (numberp (caddar num-type))
                   (<= (caddar num-type) ,bit-width))
              ;; NUM is already in the expected type, so just emit NUM.
              `,num)
             (t
              ;; Emit full call
              form)))))))

(define-signed-coercion-fn coerce-to-int8  8  "Converts NUM into a signed 8-bit integer.")
(define-signed-coercion-fn coerce-to-int16 16 "Converts NUM into a signed 16-bit integer.")
(define-signed-coercion-fn coerce-to-int24 24 "Converts NUM into a signed 24-bit integer.")
(define-signed-coercion-fn coerce-to-int32 32 "Converts NUM into a signed 32-bit integer.")
(define-signed-coercion-fn coerce-to-int64 64 "Converts NUM into a signed 64-bit integer.")

(defmacro define-unsigned-coercion-fn (name bit-width doc)
  (let ((res-type (ecase bit-width
                    (8  't/uint8)
                    (16 't/uint16)
                    (24 't/uint24)
                    (32 't/uint32)
                    (64 't/uint64)))

        (full-type (list 'unsigned-byte bit-width))
        (mask (1- (expt 2 bit-width))))
    (declare (ignorable full-type mask))

    `(progn
       #+(or sbcl ccl clisp)
       (define-typed-fn ,name ((integer num))
           (,res-type t)
         ,doc
         (declare (optimize (speed 3) (space 0) (debug 1) (safety 0))
                  #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
         (the ,full-type (logand ,mask num)))

       #-(or sbcl ccl clisp)
       (define-typed-fn ,name ((integer num))
           (,res-type t)
         ,doc
         (declare (optimize (speed 3) (space 0) (debug 1) (safety 1)))
         (signed->unsigned (%mask-signed num ,bit-width) ,bit-width))

       (define-compiler-macro ,name (&whole form num &environment env)
         (declare (ignorable env num))
         (let ((num-type
                 #+sbcl (when (symbolp num) (nth-value 2 (sb-cltl2:variable-information num env)))
                 #-sbcl nil))
           (declare (ignorable num-type))
           (cond
             #+sbcl
             ((and (listp (car num-type))
                   (listp (cdar num-type))
                   (= (length (cdar num-type)) 2)
                   (eq (cadar num-type) 'unsigned-byte)
                   (numberp (caddar num-type))
                   (<= (caddar num-type) ,bit-width))
              ;; NUM is already in the expected type, so just emit NUM.
              `,num)
             (t
              ;; Emit full call
              form)))))))

(define-unsigned-coercion-fn coerce-to-uint8  8  "Converts NUM into an unsigned 8-bit integer.")
(define-unsigned-coercion-fn coerce-to-uint16 16 "Converts NUM into an unsigned 16-bit integer.")
(define-unsigned-coercion-fn coerce-to-uint24 24 "Converts NUM into an unsigned 24-bit integer.")
(define-unsigned-coercion-fn coerce-to-uint32 32 "Converts NUM into an unsigned 32-bit integer.")
(define-unsigned-coercion-fn coerce-to-uint64 64 "Converts NUM into an unsigned 64-bit integer.")

(define-typed-fn coerce-to-bit-size ((integer num) (fixnum bits) &optional as-unsigned)
    (integer t)
  "Converts NUM to an integer that is BITS bits wide.  If AS-UNSIGNED is truthy,
then the converted value will further be converted into an unsigned integer of
that size.

This is similar to the COERCE-TO-INTx and COERCE-TO-UINTx functions, except it
can take an arbitrary bit size."
  (declare (optimize (speed 3) (debug 1) (safety 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (let ((ret (%mask-signed (logand num (1- (ash 2 (1- bits)))) bits)))
    (if as-unsigned
        (signed->unsigned ret bits)
        ret)))

(define-compiler-macro coerce-to-bit-size (&whole form num bits &optional as-unsigned)
  (if (and (constantp bits)
           (integerp bits))
      ;; Emit a form with a few things pre-computed
      (progn
        `(let* ((val ,num) ;; Get the value now since %MASK-SIGNED is also a
                           ;; compiler macro, and we don't want to expand NUM
                           ;; twice.
                (ret (muffling (%mask-signed (logand val ,(1- (ash 2 (1- bits)))) ,bits))))
           ,(if (constantp as-unsigned)
                (if as-unsigned
                    `(signed->unsigned ret ,bits)
                    `ret)
                `(if ,as-unsigned
                     (signed->unsigned ret ,bits)
                     ret))))

      ;; Emit the whole form.
      (progn
        form)))

(defmacro clamp (value min max)
  "If VALUE is less than MIN, this returns MIN.  If VALUE is greater than MAX,
this returns MAX.  Otherwise this returns VALUE."
  (if (and (numberp min) (numberp max))
      ;; MIN and MAX are both literal numbers, so we can tighten up the form a
      ;; bit.
      (if (numberp value)
          ;; Easiest case, everything is a number literal
          `(cond
             ((< ,value ,min) ,min)
             ((> ,value ,max) ,max)
             (t ,value))

          ;; VALUE isn't a literal number, so expand that out.
          (with-gensyms (val-val)
            `(let ((,val-val ,value))
               (cond
                 ((< ,val-val ,min) ,min)
                 ((> ,val-val ,max) ,max)
                 (t ,val-val)))))

      ;; MIN and/or MAX are not literal numbers, so let's just do a larger form.
      (with-gensyms (min-val max-val val-val)
        ;; Only call MIN, MAX, and VALUE once in case they're forms.
        `(let ((,min-val ,min)
               (,max-val ,max)
               (,val-val ,value))
           (cond
             ((< ,val-val ,min-val)
              ,min-val)
             ((> ,val-val ,max-val)
              ,max-val)
             (t ,val-val))))))

(defmacro clampf (destination min max)
  `(setf ,destination (clamp ,destination ,min ,max)))

(define-typed-fn num-leading-zeros ((integer num))
    (integer)
  "Returns the number of zero bits in the number before a 1 bit is encountered.
NUM is treated as a number requiring X bits, where X is computed using
NUM-8BIT-BYTES-NEEDED, and thus may be padded with extra zero bits."
  (declare (optimize (speed 3) (debug 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (let ((bit-size (* (num-8bit-bytes-needed num) 8)))
    (declare (type integer bit-size))
    (loop for i from (1- bit-size) downto 0
          with total integer = 0
          if (not (ldb-test (byte 1 i) num)) do
            (incf total)
          else do (loop-finish)
          finally (return total))))

(define-typed-fn num-leading-ones ((integer num))
    (integer)
  "Returns the number of 1 bits in the number before a zero bit is encountered.
NUM is treated as a number requiring X bits, where X is computed using
NUM-8BIT-BYTES-NEEDED, and thus may be padded with extra zero bits."
  (declare (optimize (speed 3) (debug 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (let ((bit-size (* (num-8bit-bytes-needed num) 8)))
    (declare (type integer bit-size))
    (loop for i from (1- bit-size) downto 0
          with total integer = 0
          if (ldb-test (byte 1 i) num) do
            (incf total)
          else do (loop-finish)
          finally (return total))))

(defmacro define-endianness-swap-fn (name size &rest types)
  (unless (> size 1)
    (error "SIZE must be at least 2"))

  `(define-typed-fn ,name (((or ,@types) number))
       ((or ,@types) t)
     (declare #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)
              (optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)))
     (let* (,@(loop for i from 0 below size
                    for bpos from 0 by 8
                    for sym = (intern (format nil "B~a" (1+ i)) :cl-sdm)
                    collect `(,sym (ldb (byte 8 ,bpos) number))))
       ;; or this: (dpb b1 (byte 8 8) (dpb b2 (byte 8 0) 0)))
       (logior ,@(loop for i from 1
                       for shift from (* (1- size) 8) downto 0 by 8
                       for sym = (intern (format nil "B~a" i) :cl-sdm)
                       if (< i size)
                         collect `(ash ,sym ,shift)
                       else
                         collect `,sym)))))

(define-endianness-swap-fn %swap-endianness/16-byte 16 t/uint128 t/int128)
(define-endianness-swap-fn %swap-endianness/8-byte 8 t/uint64 t/int64)
(define-endianness-swap-fn %swap-endianness/4-byte 4 t/uint32 t/int32)
(define-endianness-swap-fn %swap-endianness/3-byte 3 t/uint24 t/int24)
(define-endianness-swap-fn %swap-endianness/2-byte 2 t/uint16 t/int16)

(defmacro swap-endianness (number size-in-bits &key force-signed safe)
  "Swaps the endianness of the bytes in NUMBER.  SIZE-IN-BITS should be the size
of the number in bits, and can be one of: 8, 16, 32, 64, 128.

If FORCE-SIGNED is truthy, then the resulting number will be converted to a
signed integer.  Otherwise, this results in an unsigned integer.

If SAFE is truthy, then the size of NUMBER is first checked to ensure it will
fit within SIZE-IN-BITS bits.  Otherwise, this is not checked and it's just
assumed that NUMBER will fit."
  (with-gensyms (ret)
    `(locally
         (declare #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note)
                  (optimize (speed 3) (debug 0) (safety 0) (compilation-speed 0)))
       (let ((,ret ,(ecase size-in-bits
                      (8 (if safe
                             `(progn
                                (check-type ,number (or t/uint8 t/int8))
                                ,number)
                             `,number))

                      (16
                       (if safe
                           `(progn
                              (check-type ,number (or t/uint16 t/int16))
                              (%swap-endianness/2-byte ,number))
                           `(%swap-endianness/2-byte ,number)))

                      (24
                       (if safe
                           `(progn
                              (check-type ,number (or t/uint24 t/int24))
                              (%swap-endianness/3-byte ,number))
                           `(%swap-endianness/3-byte ,number)))

                      (32
                       (if safe
                           `(progn
                              (check-type ,number (or t/uint32 t/int32))
                              (%swap-endianness/4-byte ,number))
                           `(%swap-endianness/4-byte ,number)))

                      (64
                       (if safe
                           `(progn
                              (check-type ,number (or t/uint64 t/int64))
                              (%swap-endianness/8-byte ,number))
                           `(%swap-endianness/8-byte ,number)))

                      (128
                       (if safe
                           `(progn
                              (check-type ,number (or t/uint128 t/int128))
                              (%swap-endianness/16-byte ,number))
                           `(%swap-endianness/16-byte ,number))))))
         ,(if force-signed
              `(uint->int/2c ,ret ,size-in-bits)
              `,ret)))))

(defmacro flag? (num check)
  "Checks to see if CHECK is set in NUM.  That is, if (LOGAND NUM CHECK) is not
equal to 0, this returns T, otherwise it returns NIL.

This is equivalent to LOGTEST."
  `(logtest ,check ,num))
;;  `(/= (logand ,num ,check) 0))

(defmacro logxorf (num1 num2)
  "Performs a LOGXOR with NUM1 and NUM2, then stores the results in NUM1.  This
is essentially shorthand for (SETF NUM1 (LOGXOR NUM1 NUM2))."
  `(setf ,num1 (logxor ,num1 ,num2)))

(defmacro logiorf (num1 num2)
  "Performs a LOGIOR with NUM1 and NUM2, then stores the results in NUM1.  This
is essentially shorthand for (SETF NUM1 (LOGIOR NUM1 NUM2))."
  `(setf ,num1 (logior ,num1 ,num2)))

(defmacro logandf (num1 num2)
  "Performs a LOGAND with NUM1 and NUM2, then stores the results in NUM1.  This
is essentially shorthand for (SETF NUM1 (LOGAND NUM1 NUM2))."
  `(setf ,num1 (logand ,num1 ,num2)))

(defmacro logandc2f (num1 num2)
  "Performs a LOGAND with NUM1 and (LOGNOT NUM2), then stores the results in NUM1.
This is essentially shorthand for (SETF NUM1 (LOGANDC2 NUM1 NUM2))."
  `(setf ,num1 (logandc2 ,num1 ,num2)))

(defmacro if-within ((thing min max) true-form &optional false-form)
  "If THING evaluates to a form that is both <= MIN and >= MAX, then TRUE-FORM
is executed.  Otherwise, FALSE-FORM is executed."
  `(if (and (>= ,thing ,min) (<= ,thing ,max))
       ,true-form
       ,false-form))

(defmacro if-within! ((thing min max) true-form &optional false-form)
  "If THING evaluates to a form that is both < MIN and > MAX, then TRUE-FORM is
executed.  Otherwise, FALSE-FORM is executed."
  `(if (and (> ,thing ,min) (< ,thing ,max))
       ,true-form
       ,false-form))

(defmacro when-within ((thing min max) &body forms)
  "If THING evaluates to a form such that (AND (<= THING MIN) (>= THING MAX)) is
truthy, then FORMS are executed."
  `(when (and (>= ,thing ,min) (<= ,thing ,max))
     ,@forms))

(defmacro when-within! ((thing min max) &body forms)
  "If THING evaluates to a form such that (AND (< THING MIN) (> THING MAX)) is
truthy, then FORMS are executed."
  `(when (and (> ,thing ,min) (< ,thing ,max))
     ,@forms))

(defmacro unless-within ((thing min max) &body forms)
  "If THING evaluates to a form such that (AND (<= THING MIN) (>= THING MAX)) is
not truthy, then FORMS are executed."
  `(unless (and (>= ,thing ,min) (<= ,thing ,max))
     ,@forms))

(defmacro unless-within! ((thing min max) &body forms)
  "If THING evaluates to a form such that (AND (< THING MIN) (> THING MAX)) is
not truthy, then FORMS are executed."
  `(unless (and (> ,thing ,min) (< ,thing ,max))
     ,@forms))

;;;
;;; Fast Sin/Cos/Tan/InvSin/InvCos/InvTan approximations
;;; Domains:
;;;   Sin/Cos [0, pi/2]
;;;   Tan [0,pi/4]
;;;   InvSin/Cos [0, 1]
;;;   InvTan [-1, 1]
;;;
;;; We use macros to define separate varieties of each so that we can
;;; use different literals and let the compiler have more
;;; opportunities to produce fast code while still maintaining
;;; accuracy with double floats.  And without any COERCE calls.  It's
;;; a bit overkill and makes this code harder to read, but I figure
;;; these are used mainly when speed is already a concern, so more
;;; opportunities for the compiler to produce fast code is worth it.
;;;
;;; Source: https://www.musicdsp.org/en/latest/Other/115-sin-cos-tan-approximation.html
;;;

(defmacro %define-fast-sin (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float -2.39d-08)
                      (long-float -2.39l-08)
                      (otherwise -2.39e-08))))
       (declare (type ,type sqr ret)
                (dynamic-extent sqr))
       (setf sqr (* val val))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 2.7526d-06)
                       (long-float 2.7526l-06)
                       (otherwise 2.7526e-06)))
       (setf ret (* ret sqr))

       (decf ret ,(case type
                       (double-float 1.98409d-04)
                       (long-float 1.98409l-04)
                       (otherwise 1.98409e-04)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 8.3333315d-03)
                       (long-float 8.3333315l-03)
                       (otherwise 8.3333315e-03)))
       (setf ret (* ret sqr))

       (decf ret ,(case type
                       (double-float 1.666666664d-01)
                       (long-float 1.666666664l-01)
                       (otherwise 1.666666664e-01)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 1.0d0)
                       (long-float 1.0l0)
                       (otherwise 1.0)))
       (* ret val))))

(defmacro %define-fast-cos (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float -2.605d-07)
                      (long-float -2.605l-07)
                      (otherwise -2.605e-07))))
       (declare (type ,type sqr ret)
                (dynamic-extent sqr))
       (setf sqr (* val val))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 2.47609d-05)
                       (long-float 2.47609l-05)
                       (otherwise 2.47609e-05)))
       (setf ret (* ret sqr))

       (decf ret ,(case type
                       (double-float 1.3888397d-03)
                       (long-float 1.3888397l-03)
                       (otherwise 1.3888397e-03)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 4.16666418d-02)
                       (long-float 4.16666418l-02)
                       (otherwise 4.16666418e-02)))
       (setf ret (* ret sqr))

       (decf ret ,(case type
                       (double-float 4.999999963d-01)
                       (long-float 4.999999963l-01)
                       (otherwise 4.999999963e-01)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 1.0d0)
                       (long-float 1.0l0)
                       (otherwise 1.0))))))

(defmacro %define-fast-tan (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float 9.5168091d-03)
                      (long-float 9.5168091l-03)
                      (otherwise 9.5168091e-03))))
       (declare (type ,type sqr ret)
                (dynamic-extent sqr))
       (setf sqr (* val val))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 2.900525d-03)
                       (long-float 2.900525l-03)
                       (otherwise 2.900525e-03)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 2.45650893d-02)
                       (long-float 2.45650893l-02)
                       (otherwise 2.45650893e-02)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 5.33740603d-02)
                       (long-float 5.33740603l-02)
                       (otherwise 5.33740603e-02)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 1.333923995d-01)
                       (long-float 1.333923995l-01)
                       (otherwise 1.333923995e-01)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 3.333314036d-01)
                       (long-float 3.333314036l-01)
                       (otherwise 3.333314036e-01)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 1.0d0)
                       (long-float 1.0l0)
                       (otherwise 1.0)))
       (* ret val))))

(%define-fast-sin fast-sin double-float)
(%define-fast-sin fast-sin/l long-float)
(%define-fast-sin fast-sin/s single-float)
(%define-fast-sin fast-sin/r real)

(%define-fast-cos fast-cos double-float)
(%define-fast-cos fast-cos/l long-float)
(%define-fast-cos fast-cos/s single-float)
(%define-fast-cos fast-cos/r real)

(%define-fast-tan fast-tan double-float)
(%define-fast-tan fast-tan/l long-float)
(%define-fast-tan fast-tan/s single-float)
(%define-fast-tan fast-tan/r real)

(defmacro %define-faster-sin (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float 7.61d-03)
                      (long-float 7.61l-03)
                      (otherwise 7.61e-03))))
       (declare (type ,type sqr ret)
                (dynamic-extent sqr))
       (setf sqr (* val val))
       (setf ret (* ret sqr))

       (decf ret ,(case type
                       (double-float 1.6605d-01)
                       (long-float 1.6605l-01)
                       (otherwise 1.6605e-01)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 1.0d0)
                       (long-float 1.0l0)
                       (otherwise 1.0)))
       (* ret val))))

(defmacro %define-faster-cos (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float 3.705d-02)
                      (long-float 3.705l-02)
                      (otherwise 3.705e-02))))
    (declare (type ,type sqr ret)
             (dynamic-extent sqr))

       (setf sqr (* val val))
       (setf ret (* ret sqr))

       (decf ret ,(case type
                       (double-float 4.967d-01)
                       (long-float 4.967l-01)
                       (otherwise 4.967e-01)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 1.0d0)
                       (long-float 1.0l0)
                       (otherwise 1.0))))))

(defmacro %define-faster-tan (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float 2.033d-01)
                      (long-float 2.033l-01)
                      (otherwise 2.033e-01))))
       (declare (type ,type sqr ret)
                (dynamic-extent sqr))

       (setf sqr (* val val))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 3.1755d-01)
                       (long-float 3.1755l-01)
                       (otherwise 3.1755e-01)))
       (setf ret (* ret sqr))

       (incf ret ,(case type
                       (double-float 1.0d0)
                       (long-float 1.0l0)
                       (otherwise 1.0)))
       (* ret val))))

(%define-faster-sin faster-sin double-float)
(%define-faster-sin faster-sin/l long-float)
(%define-faster-sin faster-sin/s single-float)
(%define-faster-sin faster-sin/r real)

(%define-faster-cos faster-cos double-float)
(%define-faster-cos faster-cos/l long-float)
(%define-faster-cos faster-cos/s single-float)
(%define-faster-cos faster-cos/r real)

(%define-faster-tan faster-tan double-float)
(%define-faster-tan faster-tan/l long-float)
(%define-faster-tan faster-tan/s single-float)
(%define-faster-tan faster-tan/r real)

(defmacro %define-fast-inv-sin (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((root ,(case type
                    (double-float 0.0d0)
                    (long-float 0.0l0)
                    (otherwise 0.0)))
           (ret ,(case type
                      (double-float -0.0187293d0)
                      (long-float -0.0187293l0)
                      (otherwise -0.0187293))))
       (declare (type ,type root ret)
                (dynamic-extent root))

       (setf root (sqrt (- ,(case type
                             (double-float 1.0d0)
                             (long-float 1.0l0)
                             (otherwise 1.0))
                          val)))

       (setf ret (* ret val))
       (incf ret ,(case type
                       (double-float 0.0742610d0)
                       (long-float 0.0742610l0)
                       (otherwise 0.0742610)))

       (setf ret (* ret val))
       (decf ret ,(case type
                       (double-float 0.2121144d0)
                       (long-float 0.2121144l0)
                       (otherwise 0.2121144)))

       (setf ret (* ret val))
       (incf ret ,(case type
                       (double-float 1.5707288d0)
                       (long-float 1.5707288l0)
                       (otherwise 1.5707288)))
       (- ,(coerce (/ pi 2) type) (* root ret)))))

(defmacro %define-fast-inv-cos (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((root ,(case type
                    (double-float 0.0d0)
                    (long-float 0.0l0)
                    (otherwise 0.0)))
           (ret ,(case type
                      (double-float -0.0187293d0)
                      (long-float -0.0187293l0)
                      (otherwise -0.0187293))))
       (declare (type ,type root ret)
                (dynamic-extent root))

       (setf root (sqrt (- ,(case type
                             (double-float 1.0d0)
                             (long-float 1.0l0)
                             (otherwise 1.0))
                          val)))

       (setf ret (* ret val))
       (incf ret ,(case type
                       (double-float 0.0742610d0)
                       (long-float 0.0742610l0)
                       (otherwise 0.0742610)))

       (setf ret (* ret val))
       (decf ret ,(case type
                       (double-float 0.2121144d0)
                       (long-float 0.2121144l0)
                       (otherwise 0.2121144)))

       (setf ret (* ret val))
       (incf ret ,(case type
                       (double-float 1.5707288d0)
                       (long-float 1.5707288l0)
                       (otherwise 1.5707288)))
       (* root ret))))

(defmacro %define-fast-inv-tan (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float 0.0028662257d0)
                      (long-float 0.0028662257l0)
                      (otherwise 0.0028662257))))
       (declare (type ,type sqr ret)
                (dynamic-extent sqr))
       (setf sqr (* val val))

       (setf ret (* ret sqr))
       (decf ret ,(case type
                       (double-float 0.0161657367d0)
                       (long-float 0.0161657367l0)
                       (otherwise 0.0161657367)))

       (setf ret (* ret sqr))
       (incf ret ,(case type
                       (double-float 0.0429096138d0)
                       (long-float 0.0429096138l0)
                       (otherwise 0.0429096138)))

       (setf ret (* ret sqr))
       (decf ret ,(case type
                       (double-float 0.0752896400d0)
                       (long-float 0.0752896400l0)
                       (otherwise 0.0752896400)))

       (setf ret (* ret sqr))
       (incf ret ,(case type
                       (double-float 0.1065626393d0)
                       (long-float 0.1065626393l0)
                       (otherwise 0.1065626393)))

       (setf ret (* ret sqr))
       (decf ret ,(case type
                       (double-float 0.1420889944d0)
                       (long-float 0.1420889944l0)
                       (otherwise 0.1420889944)))

       (setf ret (* ret sqr))
       (incf ret ,(case type
                       (double-float 0.1999355085d0)
                       (long-float 0.1999355085l0)
                       (otherwise 0.1999355085)))

       (setf ret (* ret sqr))
       (decf ret ,(case type
                       (double-float 0.3333314528d0)
                       (long-float 0.3333314528l0)
                       (otherwise 0.3333314528)))

       (setf ret (* ret sqr))
       (incf ret ,(case type
                       (double-float 1.0d0)
                       (long-float 1.0l0)
                       (otherwise 1.0)))
       (* ret val))))

(defmacro %define-faster-inv-tan (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (let ((sqr ,(case type
                   (double-float 0.0d0)
                   (long-float 0.0l0)
                   (otherwise 0.0)))
           (ret ,(case type
                      (double-float 0.0208351d0)
                      (long-float 0.0208351l0)
                      (otherwise 0.0208351))))
       (declare (type ,type sqr ret)
                (dynamic-extent sqr))
       (setf sqr (* val val))

       (setf ret (* ret sqr))
       (decf ret ,(case type
                       (double-float 0.085133d0)
                       (long-float 0.085133l0)
                       (otherwise 0.085133)))

       (setf ret (* ret sqr))
       (incf ret ,(case type
                       (double-float 0.180141d0)
                       (long-float 0.180141l0)
                       (otherwise 0.180141)))

       (setf ret (* ret sqr))
       (decf ret ,(case type
                       (double-float 0.3302995d0)
                       (long-float 0.3302995l0)
                       (otherwise 0.3302995)))

       (setf ret (* ret sqr))
       (incf ret ,(case type
                       (double-float 0.999866d0)
                       (long-float 0.999866l0)
                       (otherwise 0.999866)))

       (* ret val))))

(%define-fast-inv-sin fast-inv-sin double-float)
(%define-fast-inv-sin fast-inv-sin/l long-float)
(%define-fast-inv-sin fast-inv-sin/s short-float)
(%define-fast-inv-sin fast-inv-sin/r real)

(%define-fast-inv-cos fast-inv-cos double-float)
(%define-fast-inv-cos fast-inv-cos/l long-float)
(%define-fast-inv-cos fast-inv-cos/s short-float)
(%define-fast-inv-cos fast-inv-cos/r real)

(%define-fast-inv-tan fast-inv-tan double-float)
(%define-fast-inv-tan fast-inv-tan/l long-float)
(%define-fast-inv-tan fast-inv-tan/s short-float)
(%define-fast-inv-tan fast-inv-tan/r real)

(%define-faster-inv-tan faster-inv-tan double-float)
(%define-faster-inv-tan faster-inv-tan/l long-float)
(%define-faster-inv-tan faster-inv-tan/s short-float)
(%define-faster-inv-tan faster-inv-tan/r real)

(defmacro %define-fast-tanh (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     "A version of inverse hyperbolic tangent that trades accuracy for speed.
The intended domain is [-3, 3].  The minimum error in this range is about 0.0,
and the maximum error is about 0.024.

Note: This enforces its intended domain.  If `x` is less than -3, then this
always returns -1.0.  Likewise, if `x` is greater than 3, this always returns
1.0."
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (cond
       ((< val -3)
        ,(case type
           (double-float -1.0d0)
           (long-float -1.0l0)
           (otherwise -1.0)))

       ((> val 3)
        ,(case type
           (double-float 1.0d0)
           (long-float 1.0l0)
           (otherwise 1.0)))

       (t
        (/ (* val (+ 27 (* val val)))
           (+ 27 (* 9 val val)))))))

(%define-fast-tanh fast-tanh double-float)
(%define-fast-tanh fast-tanh/l long-float)
(%define-fast-tanh fast-tanh/s short-float)
(%define-fast-tanh fast-tanh/r real)

(defmacro %define-fast-atan (name type)
  `(define-typed-fn ,name ((,type val))
       (,type t)
     "A version of arc tangent that trades accuracy for speed.  The intended
domain is [-1, 1].  The maximum error in this range is about 0.0015089 radians."
     (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0))
              #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
     (- (* ,(case type
              (double-float (coerce (/ pi 4) 'double-float))
              (long-float   (coerce (/ pi 4) 'long-float))
              (short-float  (coerce (/ pi 4) 'short-float))
              (otherwise    (/ pi 4)))
           val)
        (* val
           (1- (abs val))
           (+ ,(case type
                 (double-float 0.2447d0)
                 (long-float 0.2447l0)
                 (otherwise 0.2447))
              (* ,(case type
                    (double-float 0.0663d0)
                    (long-float 0.0663l0)
                    (otherwise 0.0663))
                 (abs val)))))))

(%define-fast-atan fast-atan double-float)
(%define-fast-atan fast-atan/l long-float)
(%define-fast-atan fast-atan/s short-float)
(%define-fast-atan fast-atan/r real)

;;;
;;; Multiply-Add
;;;

(defmacro %define-multiply-adds (name-suffix val-type seq-type)
  `(progn
     (define-typed-fn ,(intern (format nil "MULTIPLY-ADD~a" (string-upcase name-suffix)) :cl-sdm)
         ((,val-type a) (,seq-type x dest))
         (null :always)
       (declare (optimize (speed 3) (debug 1) (compilation-speed 0))
                #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
       (loop with len fixnum = (length dest)
             for i fixnum from 0 below len
             do (incf (aref dest i)
                      (* a (aref x i))))
       nil)

     (define-typed-fn ,(intern (format nil "MULTIPLY-ADD-2~a" (string-upcase name-suffix)) :cl-sdm)
         ((,val-type a) (,seq-type x-1 x-2 dest-1 dest-2))
         (null :always)
       (declare (optimize (speed 3) (debug 1) (compilation-speed 0))
                #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
       (loop with len fixnum = (length dest-1)
             for i fixnum from 0 below len
             do (incf (aref dest-1 i)
                      (* a (aref x-1 i)))
                (incf (aref dest-2 i)
                      (* a (aref x-2 i))))
       nil)

     (define-typed-fn ,(intern (format nil "MULTIPLY-ADD-STEP~a" (string-upcase name-suffix)) :cl-sdm)
         ((,val-type a step) (,seq-type x dest))
         (null :always)
       (declare (optimize (speed 3) (debug 1) (compilation-speed 0))
                #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
       (let ((a-val a))
         (declare (type ,val-type a-val))
         (loop with len fixnum = (length dest)
               for i fixnum from 0 below len
               do (incf (aref dest i)
                        (* a-val (aref x i)))
                  (incf a-val step))))))

(%define-multiply-adds "" double-float t/dfloat-array)
(%define-multiply-adds "/l" long-float t/lfloat-array)
(%define-multiply-adds "/s" single-float t/sfloat-array)
(%define-multiply-adds "/r" real (simple-array real *))
(%define-multiply-adds "/f" fixnum (simple-array fixnum *))
