#ifdef PACKAGE package PACKAGE; #else package ral; #endif /** * Java integer implementation of 63-bit precision floating point. *
Version 1.10 * *

Copyright 2003-2006 Roar Lauritzsen * *

* *

This library is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * *

This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * *

The following link provides a copy of the GNU General Public License: *
    http://www.gnu.org/licenses/gpl.txt *
If you are unable to obtain the copy from this address, write to the * Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * *

* *

General notes *

*/ #ifdef DO_INLINE #define ISZERO(a) (a.exponent == 0 && a.mantissa == 0) #define ISNEGATIVE(a) (a.sign!=0) #define ISPOSITIVE(a) (a.sign==0) #define ISINFINITY(a) (a.exponent < 0 && a.mantissa == 0) #define ISNAN(a) (a.exponent < 0 && a.mantissa != 0) #define ISFINITE(a) (a.exponent >= 0) #define ISFINITENONZERO(a) (a.exponent >= 0 && a.mantissa != 0) #define ASSIGN(a,b) { a.mantissa = b.mantissa; \ a.exponent = b.exponent; \ a.sign = b.sign; } #define CONST(a, s,e,m) { a.sign=(byte)s; a.exponent=e; a.mantissa=m; } #else #define ISZERO(a) a.isZero() #define ISNEGATIVE(a) a.isNegative() #define ISPOSITIVE(a) !a.isNegative() #define ISINFINITY(a) a.isInfinity() #define ISNAN(a) a.isNan() #define ISFINITE(a) a.isFinite() #define ISFINITENONZERO(a) a.isFiniteNonZero() #define ASSIGN(a,b) a.assign(b) #define CONST(a, s,e,m) a.assign(s,e,m) #endif public final class Real { /** * The mantissa of a Real. To maintain numbers in a * normalized state and to preserve the integrity of abnormal numbers, it * is discouraged to modify the inner representation of a * Real directly. * *

The number represented by a Real equals:
*      -1sign · mantissa · 2-62 · 2exponent-0x40000000 * *

The normalized mantissa of a finite Real must be * between 0x4000000000000000L and * 0x7fffffffffffffffL. Using a denormalized * Real in any operation other than {@link * #normalize()} may produce undefined results. The mantissa of zero and * of an infinite value is 0x0000000000000000L. * *

The mantissa of a NaN is any nonzero value. However, it is * recommended to use the value 0x4000000000000000L. Any * other values are reserved for future extensions. */ public long mantissa; /** * The exponent of a Real. To maintain numbers in a * normalized state and to preserve the integrity of abnormal numbers, it * is discouraged to modify the inner representation of a * Real directly. * *

The exponent of a finite Real must be between * 0x00000000 and 0x7fffffff. The exponent of * zero 0x00000000. * *

The exponent of an infinite value and of a NaN is any negative * value. However, it is recommended to use the value * 0x80000000. Any other values are reserved for future * extensions. */ public int exponent; /** * The sign of a Real. To maintain numbers in a normalized * state and to preserve the integrity of abnormal numbers, it is * discouraged to modify the inner representation of a Real * directly. * *

The sign of a finite, zero or infinite Real is 0 for * positive values and 1 for negative values. Any other values may produce * undefined results. * *

The sign of a NaN is ignored. However, it is recommended to use the * value 0. Any other values are reserved for future * extensions. */ public byte sign; /** * Set to false during numerical algorithms to favor accuracy * over prettyness. This flag is initially set to true. * *

The flag controls the operation of a subtraction of two * almost-identical numbers that differ only in the last three bits of the * mantissa. With this flag enabled, the result of such a subtraction is * rounded down to zero. Probabilistically, this is the correct course of * action in an overwhelmingly large percentage of calculations. * However, certain numerical algorithms such as differentiation depend * on keeping maximum accuracy during subtraction. * *

Note, that because of magicRounding, * a.sub(b) may produce zero even though * a.equalTo(b) returns false. This must be * considered e.g. when trying to avoid division by zero. */ public static boolean magicRounding = true; /** * A Real constant holding the exact value of 0. Among other * uses, this value is used as a result when a positive underflow occurs. */ public static final Real ZERO = new Real(0,0x00000000,0x0000000000000000L); /** * A Real constant holding the exact value of 1. */ public static final Real ONE = new Real(0,0x40000000,0x4000000000000000L); /** * A Real constant holding the exact value of 2. */ public static final Real TWO = new Real(0,0x40000001,0x4000000000000000L); /** * A Real constant holding the exact value of 3. */ public static final Real THREE= new Real(0,0x40000001,0x6000000000000000L); /** * A Real constant holding the exact value of 5. */ public static final Real FIVE = new Real(0,0x40000002,0x5000000000000000L); /** * A Real constant holding the exact value of 10. */ public static final Real TEN = new Real(0,0x40000003,0x5000000000000000L); /** * A Real constant holding the exact value of 100. */ public static final Real HUNDRED=new Real(0,0x40000006,0x6400000000000000L); /** * A Real constant holding the exact value of 1/2. */ public static final Real HALF = new Real(0,0x3fffffff,0x4000000000000000L); /** * A Real constant that is closer than any other to 1/3. */ public static final Real THIRD= new Real(0,0x3ffffffe,0x5555555555555555L); /** * A Real constant that is closer than any other to 1/10. */ public static final Real TENTH= new Real(0,0x3ffffffc,0x6666666666666666L); /** * A Real constant that is closer than any other to 1/100. */ public static final Real PERCENT=new Real(0,0x3ffffff9,0x51eb851eb851eb85L); /** * A Real constant that is closer than any other to the * square root of 2. */ public static final Real SQRT2= new Real(0,0x40000000,0x5a827999fcef3242L); /** * A Real constant that is closer than any other to the * square root of 1/2. */ public static final Real SQRT1_2=new Real(0,0x3fffffff,0x5a827999fcef3242L); /** * A Real constant that is closer than any other to 2π. */ public static final Real PI2 = new Real(0,0x40000002,0x6487ed5110b4611aL); /** * A Real constant that is closer than any other to π, the * ratio of the circumference of a circle to its diameter. */ public static final Real PI = new Real(0,0x40000001,0x6487ed5110b4611aL); /** * A Real constant that is closer than any other to π/2. */ public static final Real PI_2 = new Real(0,0x40000000,0x6487ed5110b4611aL); /** * A Real constant that is closer than any other to π/4. */ public static final Real PI_4 = new Real(0,0x3fffffff,0x6487ed5110b4611aL); /** * A Real constant that is closer than any other to π/8. */ public static final Real PI_8 = new Real(0,0x3ffffffe,0x6487ed5110b4611aL); /** * A Real constant that is closer than any other to e, * the base of the natural logarithms. */ public static final Real E = new Real(0,0x40000001,0x56fc2a2c515da54dL); /** * A Real constant that is closer than any other to the * natural logarithm of 2. */ public static final Real LN2 = new Real(0,0x3fffffff,0x58b90bfbe8e7bcd6L); /** * A Real constant that is closer than any other to the * natural logarithm of 10. */ public static final Real LN10 = new Real(0,0x40000001,0x49aec6eed554560bL); /** * A Real constant that is closer than any other to the * base-2 logarithm of e. */ public static final Real LOG2E= new Real(0,0x40000000,0x5c551d94ae0bf85eL); /** * A Real constant that is closer than any other to the * base-10 logarithm of e. */ public static final Real LOG10E=new Real(0,0x3ffffffe,0x6f2dec549b9438cbL); /** * A Real constant holding the maximum non-infinite positive * number = 4.197e323228496. */ public static final Real MAX = new Real(0,0x7fffffff,0x7fffffffffffffffL); /** * A Real constant holding the minimum non-zero positive * number = 2.383e-323228497. */ public static final Real MIN = new Real(0,0x00000000,0x4000000000000000L); /** * A Real constant holding the value of NaN (not-a-number). * This value is always used as a result to signal an invalid operation. */ public static final Real NAN = new Real(0,0x80000000,0x4000000000000000L); /** * A Real constant holding the value of positive infinity. * This value is always used as a result to signal a positive overflow. */ public static final Real INF = new Real(0,0x80000000,0x0000000000000000L); /** * A Real constant holding the value of negative infinity. * This value is always used as a result to signal a negative overflow. */ public static final Real INF_N= new Real(1,0x80000000,0x0000000000000000L); /** * A Real constant holding the value of negative zero. This * value is used as a result e.g. when a negative underflow occurs. */ public static final Real ZERO_N=new Real(1,0x00000000,0x0000000000000000L); /** * A Real constant holding the exact value of -1. */ public static final Real ONE_N= new Real(1,0x40000000,0x4000000000000000L); private static final int clz_magic = 0x7c4acdd; private static final byte[] clz_tab = { 31,22,30,21,18,10,29, 2,20,17,15,13, 9, 6,28, 1, 23,19,11, 3,16,14, 7,24,12, 4, 8,25, 5,26,27, 0 }; /** * Creates a new Real with a value of zero. */ public Real() { } /** * Creates a new Real, assigning the value of another * Real. See {@link #assign(Real)}. * * @param a the Real to assign. */ public Real(Real a) { ASSIGN(this,a); } /** * Creates a new Real, assigning the value of an integer. See * {@link #assign(int)}. * * @param a the int to assign. */ public Real(int a) { assign(a); } /** * Creates a new Real, assigning the value of a long * integer. See {@link #assign(long)}. * * @param a the long to assign. */ public Real(long a) { assign(a); } /** * Creates a new Real, assigning the value encoded in a * String using base-10. See {@link #assign(String)}. * * @param a the String to assign. */ public Real(String a) { assign(a,10); } /** * Creates a new Real, assigning the value encoded in a * String using the specified number base. See {@link * #assign(String,int)}. * * @param a the String to assign. * @param base the number base of a. Valid base values are 2, * 8, 10 and 16. */ public Real(String a, int base) { assign(a,base); } /** * Creates a new Real, assigning a value by directly setting * the fields of the internal representation. The arguments must represent * a valid, normalized Real. This is the fastest way of * creating a constant value. See {@link #assign(int,int,long)}. * * @param s {@link #sign} bit, 0 for positive sign, 1 for negative sign * @param e {@link #exponent} * @param m {@link #mantissa} */ public Real(int s, int e, long m) { CONST(this, s,e,m); } /** * Creates a new Real, assigning the value previously encoded * into twelve consecutive bytes in a byte array using {@link * #toBytes(byte[],int) toBytes}. See {@link #assign(byte[],int)}. * * @param data byte array to decode into this Real. * @param offset offset to start encoding from. The bytes * data[offset]...data[offset+11] will be * read. */ public Real(byte [] data, int offset) { assign(data,offset); } /** * Assigns this Real the value of another Real. * *

* Equivalent double code: * this = a; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.3 *
* * @param a the Real to assign. */ public void assign(Real a) { if (a == null) { makeZero(); return; } sign = a.sign; exponent = a.exponent; mantissa = a.mantissa; } /** * Assigns this Real the value of an integer. * All integer values can be represented without loss of accuracy. * *

* Equivalent double code: * this = (double)a; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.6 *
* * @param a the int to assign. */ public void assign(int a) { if (a==0) { makeZero(); return; } sign = 0; if (a<0) { sign = 1; a = -a; // Also works for 0x80000000 } // Normalize int int t=a; t|=t>>1; t|=t>>2; t|=t>>4; t|=t>>8; t|=t>>16; t = clz_tab[(t*clz_magic)>>>27]-1; exponent = 0x4000001E-t; mantissa = ((long)a)<<(32+t); } /** * Assigns this Real the value of a signed long integer. * All long values can be represented without loss of accuracy. * *

* Equivalent double code: * this = (double)a; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.0 *
* * @param a the long to assign. */ public void assign(long a) { sign = 0; if (a<0) { sign = 1; a = -a; // Also works for 0x8000000000000000 } exponent = 0x4000003E; mantissa = a; normalize(); } /** * Assigns this Real a value encoded in a String * using base-10, as specified in {@link #assign(String,int)}. * *

* Equivalent double code: * this = Double.{@link Double#valueOf(String) valueOf}(a); *
Approximate error bound: * ½-1 ULPs *
* Execution time relative to add:   * * 100 *
* * @param a the String to assign. */ public void assign(String a) { assign(a,10); } /** * Assigns this Real a value encoded in a String * using the specified number base. The string is parsed as follows: * *

* *

Valid examples:
*     base-2: "-.110010101e+5"
*     base-8: "+5462E-99"
*     base-10: "  3,1415927"
*     base-16: "/FFF800C.CCCE e64" * *

The number is parsed until the end of the string or an unknown * character is encountered, then silently returns even if the whole * string has not been parsed. Please note that specifying an * excessive number of digits in base-10 may in fact decrease the * accuracy of the result because of the extra multiplications performed. * *

*
* Equivalent double code: * this = Double.{@link Double#valueOf(String) valueOf}(a); * // Works only for base-10 *
* Approximate error bound: * base-10 * ½-1 ULPs *
2/8/16 * ½ ULPs *
* Execution time relative to add:   * base-2 * 54 *
base-8 * 60 *
base-10 * 100 *
base-16   * 60 *
* * @param a the String to assign. * @param base the number base of a. Valid base values are * 2, 8, 10 and 16. */ public void assign(String a, int base) { if (a==null || a.length()==0) { assign(ZERO); return; } atof(a,base); } /** * Assigns this Real a value by directly setting the fields * of the internal representation. The arguments must represent a valid, * normalized Real. This is the fastest way of assigning a * constant value. * *

* Equivalent double code: * this = (1-2*s) * m * * Math.{@link Math#pow(double,double) * pow}(2.0,e-0x400000e3); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.3 *
* * @param s {@link #sign} bit, 0 for positive sign, 1 for negative sign * @param e {@link #exponent} * @param m {@link #mantissa} */ public void assign(int s, int e, long m) { sign = (byte)s; exponent = e; mantissa = m; } /** * Assigns this Real a value previously encoded into into * twelve consecutive bytes in a byte array using {@link * #toBytes(byte[],int) toBytes}. * *

* Error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.2 *
* * @param data byte array to decode into this Real. * @param offset offset to start encoding from. The bytes * data[offset]...data[offset+11] will be * read. */ public void assign(byte [] data, int offset) { sign = (byte)((data[offset+4]>>7)&1); exponent = (((data[offset ]&0xff)<<24)+ ((data[offset +1]&0xff)<<16)+ ((data[offset +2]&0xff)<<8)+ ((data[offset +3]&0xff))); mantissa = (((long)(data[offset+ 4]&0x7f)<<56)+ ((long)(data[offset+ 5]&0xff)<<48)+ ((long)(data[offset+ 6]&0xff)<<40)+ ((long)(data[offset+ 7]&0xff)<<32)+ ((long)(data[offset+ 8]&0xff)<<24)+ ((long)(data[offset+ 9]&0xff)<<16)+ ((long)(data[offset+10]&0xff)<< 8)+ ( (data[offset+11]&0xff))); } /** * Encodes an accurate representation of this Real value into * twelve consecutive bytes in a byte array. Can be decoded using {@link * #assign(byte[],int)}. * *

* Execution time relative to add:   * * 1.2 *
* * @param data byte array to save this Real in. * @param offset offset to start encoding to. The bytes * data[offset]...data[offset+11] will be * written. */ public void toBytes(byte [] data, int offset) { data[offset ] = (byte)(exponent>>24); data[offset+ 1] = (byte)(exponent>>16); data[offset+ 2] = (byte)(exponent>>8); data[offset+ 3] = (byte)(exponent); data[offset+ 4] = (byte)((sign<<7)+(mantissa>>56)); data[offset+ 5] = (byte)(mantissa>>48); data[offset+ 6] = (byte)(mantissa>>40); data[offset+ 7] = (byte)(mantissa>>32); data[offset+ 8] = (byte)(mantissa>>24); data[offset+ 9] = (byte)(mantissa>>16); data[offset+10] = (byte)(mantissa>>8); data[offset+11] = (byte)(mantissa); } /** * Assigns this Real the value corresponding to a given bit * representation. The argument is considered to be a representation of a * floating-point value according to the IEEE 754 floating-point "single * format" bit layout. * *

* Equivalent float code: * this = Float.{@link Float#intBitsToFloat(int) * intBitsToFloat}(bits); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.6 *
* * @param bits a float value encoded in an int. */ public void assignFloatBits(int bits) { sign = (byte)(bits>>>31); exponent = (bits>>23)&0xff; mantissa = (long)(bits&0x007fffff)<<39; if (exponent == 0 && mantissa == 0) return; // Valid zero if (exponent == 0 && mantissa != 0) { // Degenerate small float exponent = 0x40000000-126; normalize(); return; } if (exponent <= 254) { // Normal IEEE 754 float exponent += 0x40000000-127; mantissa |= 1L<<62; return; } if (mantissa == 0) makeInfinity(sign); else makeNan(); } /** * Assigns this Real the value corresponding to a given bit * representation. The argument is considered to be a representation of a * floating-point value according to the IEEE 754 floating-point "double * format" bit layout. * *

* Equivalent double code: * this = Double.{@link Double#longBitsToDouble(long) * longBitsToDouble}(bits); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.6 *
* * @param bits a double value encoded in a long. */ public void assignDoubleBits(long bits) { sign = (byte)((bits>>63)&1); exponent = (int)((bits>>52)&0x7ff); mantissa = (bits&0x000fffffffffffffL)<<10; if (exponent == 0 && mantissa == 0) return; // Valid zero if (exponent == 0 && mantissa != 0) { // Degenerate small float exponent = 0x40000000-1022; normalize(); return; } if (exponent <= 2046) { // Normal IEEE 754 float exponent += 0x40000000-1023; mantissa |= 1L<<62; return; } if (mantissa == 0) makeInfinity(sign); else makeNan(); } /** * Returns a representation of this Real according to the * IEEE 754 floating-point "single format" bit layout. * *

* Equivalent float code: * Float.{@link Float#floatToIntBits(float) * floatToIntBits}(this) *
* Execution time relative to add:   * * 0.7 *
* * @return the bits that represent the floating-point number. */ public int toFloatBits() { if (ISNAN(this)) return 0x7fffffff; // nan int e = exponent-0x40000000+127; long m = mantissa; // Round properly! m += 1L<<38; if (m<0) { m >>>= 1; e++; if (exponent < 0) // Overflow return (sign<<31)|0x7f800000; // inf } if (ISINFINITY(this) || e > 254) return (sign<<31)|0x7f800000; // inf if (ISZERO(this) || e < -22) return (sign<<31); // zero if (e <= 0) // Degenerate small float return (sign<<31)|((int)(m>>>(40-e))&0x007fffff); // Normal IEEE 754 float return (sign<<31)|(e<<23)|((int)(m>>>39)&0x007fffff); } /** * Returns a representation of this Real according to the * IEEE 754 floating-point "double format" bit layout. * *

* Equivalent double code: * Double.{@link Double#doubleToLongBits(double) * doubleToLongBits}(this) *
* Execution time relative to add:   * * 0.7 *
* * @return the bits that represent the floating-point number. */ public long toDoubleBits() { if (ISNAN(this)) return 0x7fffffffffffffffL; // nan int e = exponent-0x40000000+1023; long m = mantissa; // Round properly! m += 1L<<9; if (m<0) { m >>>= 1; e++; if (exponent < 0) return ((long)sign<<63)|0x7ff0000000000000L; // inf } if (ISINFINITY(this) || e > 2046) return ((long)sign<<63)|0x7ff0000000000000L; // inf if (ISZERO(this) || e < -51) return ((long)sign<<63); // zero if (e <= 0) // Degenerate small double return ((long)sign<<63)|((m>>>(11-e))&0x000fffffffffffffL); // Normal IEEE 754 double return ((long)sign<<63)|((long)e<<52)|((m>>>10)&0x000fffffffffffffL); } /** * Makes this Real the value of positive zero. * *

* Equivalent double code: * this = 0; *
* Execution time relative to add:   * * 0.2 *
*/ public void makeZero() { sign = 0; mantissa = 0; exponent = 0; } /** * Makes this Real the value of zero with the specified sign. * *

* Equivalent double code: * this = 0.0 * (1-2*s); *
* Execution time relative to add:   * * 0.2 *
* * @param s sign bit, 0 to make a positive zero, 1 to make a negative zero */ public void makeZero(int s) { sign = (byte)s; mantissa = 0; exponent = 0; } /** * Makes this Real the value of infinity with the specified * sign. * *

* Equivalent double code: * this = Double.{@link Double#POSITIVE_INFINITY POSITIVE_INFINITY} * * (1-2*s); *
* Execution time relative to add:   * * 0.3 *
* * @param s sign bit, 0 to make positive infinity, 1 to make negative * infinity */ public void makeInfinity(int s) { sign = (byte)s; mantissa = 0; exponent = 0x80000000; } /** * Makes this Real the value of Not-a-Number (NaN). * *

* Equivalent double code: * this = Double.{@link Double#NaN NaN}; *
* Execution time relative to add:   * * 0.3 *
*/ public void makeNan() { sign = 0; mantissa = 0x4000000000000000L; exponent = 0x80000000; } /** * Returns true if the value of this Real is * zero, false otherwise. * *

* Equivalent double code: * (this == 0) *
* Execution time relative to add:   * * 0.3 *
* * @return true if the value represented by this object is * zero, false otherwise. */ public boolean isZero() { return (exponent == 0 && mantissa == 0); } /** * Returns true if the value of this Real is * infinite, false otherwise. * *

* Equivalent double code: * Double.{@link Double#isInfinite(double) isInfinite}(this) *
* Execution time relative to add:   * * 0.3 *
* * @return true if the value represented by this object is * infinite, false if it is finite or NaN. */ public boolean isInfinity() { return (exponent < 0 && mantissa == 0); } /** * Returns true if the value of this Real is * Not-a-Number (NaN), false otherwise. * *

* Equivalent double code: * Double.{@link Double#isNaN(double) isNaN}(this) *
* Execution time relative to add:   * * 0.3 *
* * @return true if the value represented by this object is * NaN, false otherwise. */ public boolean isNan() { return (exponent < 0 && mantissa != 0); } /** * Returns true if the value of this Real is * finite, false otherwise. * *

* Equivalent double code: * (!Double.{@link Double#isNaN(double) isNaN}(this) && * !Double.{@link Double#isInfinite(double) * isInfinite}(this)) *
* Execution time relative to add:   * * 0.3 *
* * @return true if the value represented by this object is * finite, false if it is infinite or NaN. */ public boolean isFinite() { // That is, non-infinite and non-nan return (exponent >= 0); } /** * Returns true if the value of this Real is * finite and nonzero, false otherwise. * *

* Equivalent double code: * (!Double.{@link Double#isNaN(double) isNaN}(this) && * !Double.{@link Double#isInfinite(double) isInfinite}(this) && * (this!=0)) *
* Execution time relative to add:   * * 0.3 *
* * @return true if the value represented by this object is * finite and nonzero, false if it is infinite, NaN or * zero. */ public boolean isFiniteNonZero() { // That is, non-infinite and non-nan and non-zero return (exponent >= 0 && mantissa != 0); } /** * Returns true if the value of this Real is * negative, false otherwise. * *

* Equivalent double code: * (this < 0) *
* Execution time relative to add:   * * 0.3 *
* * @return true if the value represented by this object * is negative, false if it is positive or NaN. */ public boolean isNegative() { return sign!=0; } /** * Calculates the absolute value. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = Math.{@link Math#abs(double) abs}(this); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.2 *
*/ public void abs() { sign = 0; } /** * Negates this Real. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = -this; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.2 *
*/ public void neg() { if (!ISNAN(this)) sign ^= 1; } /** * Copies the sign from a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = Math.{@link Math#abs(double) * abs}(this)*Math.{@link Math#signum(double) signum}(a); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.2 *
* * @param a the Real to copy the sign from. */ public void copysign(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } sign = a.sign; } /** * Readjusts the mantissa of this Real. The exponent is * adjusted accordingly. This is necessary when the mantissa has been * {@link #mantissa modified directly} for some purpose and may be * denormalized. The normalized mantissa of a finite Real * must have bit 63 cleared and bit 62 set. Using a denormalized * Real in any other operation may produce undefined * results. * *

* Error bound: * ½ ULPs *
* Execution time relative to add:   * * 0.7 *
*/ public void normalize() { if (ISFINITE(this)) { if (mantissa > 0) { int clz = 0; int t = (int)(mantissa>>>32); if (t == 0) { clz = 32; t = (int)mantissa; } t|=t>>1; t|=t>>2; t|=t>>4; t|=t>>8; t|=t>>16; clz += clz_tab[(t*clz_magic)>>>27]-1; mantissa <<= clz; exponent -= clz; if (exponent < 0) // Underflow makeZero(sign); } else if (mantissa < 0) { mantissa = (mantissa+1)>>>1; exponent ++; if (mantissa == 0) { // Ooops, it was 0xffffffffffffffffL mantissa = 0x4000000000000000L; exponent ++; } if (exponent < 0) // Overflow makeInfinity(sign); } else // mantissa == 0 { exponent = 0; } } } /** * Readjusts the mantissa of a Real with extended * precision. The exponent is adjusted accordingly. This is necessary when * the mantissa has been {@link #mantissa modified directly} for some * purpose and may be denormalized. The normalized mantissa of a finite * Real must have bit 63 cleared and bit 62 set. Using a * denormalized Real in any other operation may * produce undefined results. * *

* Approximate error bound: * 2-64 ULPs (i.e. of a normal precision Real) *
* Execution time relative to add:   * * 0.7 *
* * @param extra the extra 64 bits of mantissa of this extended precision * Real. * @return the extra 64 bits of mantissa of the resulting extended * precision Real. */ public long normalize128(long extra) { if (!ISFINITE(this)) return 0; if (mantissa == 0) { if (extra == 0) { exponent = 0; return 0; } mantissa = extra; extra = 0; exponent -= 64; if (exponent < 0) { // Underflow makeZero(sign); return 0; } } if (mantissa < 0) { extra = (mantissa<<63)+(extra>>>1); mantissa >>>= 1; exponent ++; if (exponent < 0) { // Overflow makeInfinity(sign); return 0; } return extra; } int clz = 0; int t = (int)(mantissa>>>32); if (t == 0) { clz = 32; t = (int)mantissa; } t|=t>>1; t|=t>>2; t|=t>>4; t|=t>>8; t|=t>>16; clz += clz_tab[(t*clz_magic)>>>27]-1; if (clz == 0) return extra; mantissa = (mantissa<>>(64-clz)); extra <<= clz; exponent -= clz; if (exponent < 0) { // Underflow makeZero(sign); return 0; } return extra; } /** * Rounds an extended precision Real to the nearest * Real of normal precision. Replaces the contents of this * Real with the result. * *

* Error bound: * ½ ULPs *
* Execution time relative to add:   * * 1.0 *
* * @param extra the extra 64 bits of mantissa of this extended precision * Real. */ public void roundFrom128(long extra) { mantissa += (extra>>63)&1; normalize(); } /** * Returns true if this Java object is the same * object as a. Since a Real should be * thought of as a "register holding a number", this method compares the * object references, not the contents of the two objects. * This is very different from {@link #equalTo(Real)}. * * @param a the object to compare to this. * @return true if this object is the same as a. */ public boolean equals(Object a) { return this==a; } private int compare(Real a) { // Compare of normal floats, zeros, but not nan or equal-signed inf if (ISZERO(this) && ISZERO(a)) return 0; if (sign != a.sign) return a.sign-sign; int s = ISPOSITIVE(this) ? 1 : -1; if (ISINFINITY(this)) return s; if (ISINFINITY(a)) return -s; if (exponent != a.exponent) return exponenttrue if this Real is equal to * a. * If the numbers are incomparable, i.e. the values are infinities of * the same sign or any of them is NaN, false is always * returned. This method must not be confused with {@link #equals(Object)}. * *

* Equivalent double code: * (this == a) *
* Execution time relative to add:   * * 1.0 *
* * @param a the Real to compare to this. * @return true if the value represented by this object is * equal to the value represented by a. false * otherwise, or if the numbers are incomparable. */ public boolean equalTo(Real a) { if (invalidCompare(a)) return false; return compare(a) == 0; } /** * Returns true if this Real is equal to * the integer a. * *

* Equivalent double code: * (this == a) *
* Execution time relative to add:   * * 1.7 *
* * @param a the int to compare to this. * @return true if the value represented by this object is * equal to the integer a. false * otherwise. */ public boolean equalTo(int a) { tmp0.assign(a); return equalTo(tmp0); } /** * Returns true if this Real is not equal to * a. * If the numbers are incomparable, i.e. the values are infinities of * the same sign or any of them is NaN, false is always * returned. * This distinguishes notEqualTo(a) from the expression * !equalTo(a). * *

* Equivalent double code: * (this != a) *
* Execution time relative to add:   * * 1.0 *
* * @param a the Real to compare to this. * @return true if the value represented by this object is not * equal to the value represented by a. false * otherwise, or if the numbers are incomparable. */ public boolean notEqualTo(Real a) { if (invalidCompare(a)) return false; return compare(a) != 0; } /** * Returns true if this Real is not equal to * the integer a. * If this Real is NaN, false is always * returned. * This distinguishes notEqualTo(a) from the expression * !equalTo(a). * *

* Equivalent double code: * (this != a) *
* Execution time relative to add:   * * 1.7 *
* * @param a the int to compare to this. * @return true if the value represented by this object is not * equal to the integer a. false * otherwise, or if this Real is NaN. */ public boolean notEqualTo(int a) { tmp0.assign(a); return notEqualTo(tmp0); } /** * Returns true if this Real is less than * a. * If the numbers are incomparable, i.e. the values are infinities of * the same sign or any of them is NaN, false is always * returned. * *

* Equivalent double code: * (this < a) *
* Execution time relative to add:   * * 1.0 *
* * @param a the Real to compare to this. * @return true if the value represented by this object is * less than the value represented by a. * false otherwise, or if the numbers are incomparable. */ public boolean lessThan(Real a) { if (invalidCompare(a)) return false; return compare(a) < 0; } /** * Returns true if this Real is less than * the integer a. * If this Real is NaN, false is always * returned. * *

* Equivalent double code: * (this < a) *
* Execution time relative to add:   * * 1.7 *
* * @param a the int to compare to this. * @return true if the value represented by this object is * less than the integer a. false otherwise, * or if this Real is NaN. */ public boolean lessThan(int a) { tmp0.assign(a); return lessThan(tmp0); } /** * Returns true if this Real is less than or * equal to a. * If the numbers are incomparable, i.e. the values are infinities of * the same sign or any of them is NaN, false is always * returned. * *

* Equivalent double code: * (this <= a) *
* Execution time relative to add:   * * 1.0 *
* * @param a the Real to compare to this. * @return true if the value represented by this object is * less than or equal to the value represented by a. * false otherwise, or if the numbers are incomparable. */ public boolean lessEqual(Real a) { if (invalidCompare(a)) return false; return compare(a) <= 0; } /** * Returns true if this Real is less than or * equal to the integer a. * If this Real is NaN, false is always * returned. * *

* Equivalent double code: * (this <= a) *
* Execution time relative to add:   * * 1.7 *
* * @param a the int to compare to this. * @return true if the value represented by this object is * less than or equal to the integer a. false * otherwise, or if this Real is NaN. */ public boolean lessEqual(int a) { tmp0.assign(a); return lessEqual(tmp0); } /** * Returns true if this Real is greater than * a. * If the numbers are incomparable, i.e. the values are infinities of * the same sign or any of them is NaN, false is always * returned. * *

* Equivalent double code: * (this > a) *
* Execution time relative to add:   * * 1.0 *
* * @param a the Real to compare to this. * @return true if the value represented by this object is * greater than the value represented by a. * false otherwise, or if the numbers are incomparable. */ public boolean greaterThan(Real a) { if (invalidCompare(a)) return false; return compare(a) > 0; } /** * Returns true if this Real is greater than * the integer a. * If this Real is NaN, false is always * returned. * *

* Equivalent double code: * (this > a) *
* Execution time relative to add:   * * 1.7 *
* * @param a the int to compare to this. * @return true if the value represented by this object is * greater than the integer a. * false otherwise, or if this Real is NaN. */ public boolean greaterThan(int a) { tmp0.assign(a); return greaterThan(tmp0); } /** * Returns true if this Real is greater than * or equal to a. * If the numbers are incomparable, i.e. the values are infinities of * the same sign or any of them is NaN, false is always * returned. * *

* Equivalent double code: * (this >= a) *
* Execution time relative to add:   * * 1.0 *
* * @param a the Real to compare to this. * @return true if the value represented by this object is * greater than or equal to the value represented by a. * false otherwise, or if the numbers are incomparable. */ public boolean greaterEqual(Real a) { if (invalidCompare(a)) return false; return compare(a) >= 0; } /** * Returns true if this Real is greater than * or equal to the integer a. * If this Real is NaN, false is always * returned. * *

* Equivalent double code: * (this >= a) *
* Execution time relative to add:   * * 1.7 *
* * @param a the int to compare to this. * @return true if the value represented by this object is * greater than or equal to the integer a. * false otherwise, or if this Real is NaN. */ public boolean greaterEqual(int a) { tmp0.assign(a); return greaterEqual(tmp0); } /** * Returns true if the absolute value of this * Real is less than the absolute value of * a. * If the numbers are incomparable, i.e. the values are both infinite * or any of them is NaN, false is always returned. * *

* Equivalent double code: * (Math.{@link Math#abs(double) abs}(this) < * Math.{@link Math#abs(double) abs}(a)) *
* Execution time relative to add:   * * 0.5 *
* * @param a the Real to compare to this. * @return true if the absolute of the value represented by * this object is less than the absolute of the value represented by * a. * false otherwise, or if the numbers are incomparable. */ public boolean absLessThan(Real a) { if (ISNAN(this) || ISNAN(a) || ISINFINITY(this)) return false; if (ISINFINITY(a)) return true; if (exponent != a.exponent) return exponentReal by 2 to the power of n. * Replaces the contents of this Real with the result. * This operation is faster than normal multiplication since it only * involves adding to the exponent. * *

* Equivalent double code: * this *= Math.{@link Math#pow(double,double) pow}(2.0,n); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.3 *
* * @param n the integer argument. */ public void scalbn(int n) { if (!ISFINITENONZERO(this)) return; exponent += n; if (exponent < 0) { if (n<0) makeZero(sign); // Underflow else makeInfinity(sign); // Overflow } } /** * Calculates the next representable neighbour of this Real * in the direction towards a. * Replaces the contents of this Real with the result. * If the two values are equal, nothing happens. * *

* Equivalent double code: * this += Math.{@link Math#ulp(double) ulp}(this)*Math.{@link * Math#signum(double) signum}(a-this); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.8 *
* * @param a the Real argument. */ public void nextafter(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISINFINITY(this) && ISINFINITY(a) && sign == a.sign) return; int dir = -compare(a); if (dir == 0) return; if (ISZERO(this)) { ASSIGN(this,MIN); sign = (byte)(dir<0 ? 1 : 0); return; } if (ISINFINITY(this)) { ASSIGN(this,MAX); sign = (byte)(dir<0 ? 0 : 1); return; } if (ISPOSITIVE(this) ^ dir<0) { mantissa ++; } else { if (mantissa == 0x4000000000000000L) { mantissa <<= 1; exponent--; } mantissa --; } normalize(); } /** * Calculates the largest (closest to positive infinity) * Real value that is less than or equal to this * Real and is equal to a mathematical integer. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = Math.{@link Math#floor(double) floor}(this); *
Approximate error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.5 *
*/ public void floor() { if (!ISFINITENONZERO(this)) return; if (exponent < 0x40000000) { if (ISPOSITIVE(this)) makeZero(sign); else { exponent = ONE.exponent; mantissa = ONE.mantissa; // sign unchanged! } return; } int shift = 0x4000003e-exponent; if (shift <= 0) return; if (ISNEGATIVE(this)) mantissa += ((1L<Real value that is greater than or equal to this * Real and is equal to a mathematical integer. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = Math.{@link Math#ceil(double) ceil}(this); *
Approximate error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.8 *
*/ public void ceil() { neg(); floor(); neg(); } /** * Rounds this Real value to the closest value that is equal * to a mathematical integer. If two Real values that are * mathematical integers are equally close, the result is the integer * value with the largest magnitude (positive or negative). Replaces the * contents of this Real with the result. * *

* Equivalent double code: * this = Math.{@link Math#rint(double) rint}(this); *
Approximate error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.3 *
*/ public void round() { if (!ISFINITENONZERO(this)) return; if (exponent < 0x3fffffff) { makeZero(sign); return; } int shift = 0x4000003e-exponent; if (shift <= 0) return; mantissa += 1L<<(shift-1); // Bla-bla, this works almost mantissa &= ~((1L<Real value to the closest value towards * zero that is equal to a mathematical integer. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = (double)((long)this); *
Approximate error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.2 *
*/ public void trunc() { if (!ISFINITENONZERO(this)) return; if (exponent < 0x40000000) { makeZero(sign); return; } int shift = 0x4000003e-exponent; if (shift <= 0) return; mantissa &= ~((1L<Real by subtracting * the closest value towards zero that is equal to a mathematical integer. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this -= (double)((long)this); *
Approximate error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.2 *
*/ public void frac() { if (!ISFINITENONZERO(this) || exponent < 0x40000000) return; int shift = 0x4000003e-exponent; if (shift <= 0) { makeZero(sign); return; } mantissa &= ((1L<Real value to the closest int * value towards zero. * *

If the value of this Real is too large, {@link * Integer#MAX_VALUE} is returned. However, if the value of this * Real is too small, -Integer.MAX_VALUE is * returned, not {@link Integer#MIN_VALUE}. This is done to ensure that * the sign will be correct if you calculate * -this.toInteger(). A NaN is converted to 0. * *

* Equivalent double code: * (int)this *
Approximate error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.6 *
* * @return an int representation of this Real. */ public int toInteger() { if (ISZERO(this) || ISNAN(this)) return 0; if (ISINFINITY(this)) { return (ISPOSITIVE(this)) ? 0x7fffffff : 0x80000001; // 0x80000001, so that you can take -x.toInteger() } if (exponent < 0x40000000) return 0; int shift = 0x4000003e-exponent; if (shift < 32) { return (ISPOSITIVE(this)) ? 0x7fffffff : 0x80000001; // 0x80000001, so that you can take -x.toInteger() } return ISPOSITIVE(this) ? (int)(mantissa>>>shift) : -(int)(mantissa>>>shift); } /** * Converts this Real value to the closest long * value towards zero. * *

If the value of this Real is too large, {@link * Long#MAX_VALUE} is returned. However, if the value of this * Real is too small, -Long.MAX_VALUE is * returned, not {@link Long#MIN_VALUE}. This is done to ensure that the * sign will be correct if you calculate -this.toLong(). * A NaN is converted to 0. * *

* Equivalent double code: * (long)this *
Approximate error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.5 *
* * @return a long representation of this Real. */ public long toLong() { if (ISZERO(this) || ISNAN(this)) return 0; if (ISINFINITY(this)) { return (ISPOSITIVE(this))? 0x7fffffffffffffffL:0x8000000000000001L; // 0x8000000000000001L, so that you can take -x.toLong() } if (exponent < 0x40000000) return 0; int shift = 0x4000003e-exponent; if (shift < 0) { return (ISPOSITIVE(this))? 0x7fffffffffffffffL:0x8000000000000001L; // 0x8000000000000001L, so that you can take -x.toLong() } return ISPOSITIVE(this) ? (mantissa>>>shift) : -(mantissa>>>shift); } /** * Returns true if the value of this Real * represents a mathematical integer. If the value is too large to * determine if it is an integer, true is returned. * *

* Equivalent double code: * (this == (long)this) *
* Execution time relative to add:   * * 0.6 *
* * @return true if the value represented by this object * represents a mathematical integer, false otherwise. */ public boolean isIntegral() { if (ISNAN(this)) return false; if (ISZERO(this) || ISINFINITY(this)) return true; if (exponent < 0x40000000) return false; int shift = 0x4000003e-exponent; if (shift <= 0) return true; return (mantissa&((1L<true if the mathematical integer represented * by this Real is odd. You must first determine * that the value is actually an integer using {@link * #isIntegral()}. If the value is too large to determine if the * integer is odd, false is returned. * *

* Equivalent double code: * ((((long)this)&1) == 1) *
* Execution time relative to add:   * * 0.6 *
* * @return true if the mathematical integer represented by * this Real is odd, false otherwise. */ public boolean isOdd() { if (!ISFINITENONZERO(this) || exponent < 0x40000000 || exponent > 0x4000003e) return false; int shift = 0x4000003e-exponent; return ((mantissa>>>shift)&1) != 0; } /** * Exchanges the contents of this Real and a. * *

* Equivalent double code: * tmp=this; this=a; a=tmp; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 0.5 *
* * @param a the Real to exchange with this. */ public void swap(Real a) { long tmpMantissa=mantissa; mantissa=a.mantissa; a.mantissa=tmpMantissa; int tmpExponent=exponent; exponent=a.exponent; a.exponent=tmpExponent; byte tmpSign =sign; sign =a.sign; a.sign =tmpSign; } // Temporary values used by functions (to avoid "new" inside functions) private static Real tmp0 = new Real(); // tmp for basic functions private static Real recipTmp = new Real(); private static Real recipTmp2 = new Real(); private static Real sqrtTmp = new Real(); private static Real expTmp = new Real(); private static Real expTmp2 = new Real(); private static Real expTmp3 = new Real(); private static Real tmp1 = new Real(); private static Real tmp2 = new Real(); private static Real tmp3 = new Real(); private static Real tmp4 = new Real(); private static Real tmp5 = new Real(); /** * Calculates the sum of this Real and a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this += a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * «« 1.0 »» *
* * @param a the Real to add to this. */ public void add(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISINFINITY(this) || ISINFINITY(a)) { if (ISINFINITY(this) && ISINFINITY(a) && sign != a.sign) makeNan(); else makeInfinity(ISINFINITY(this) ? sign : a.sign); return; } if (ISZERO(this) || ISZERO(a)) { if (ISZERO(this)) ASSIGN(this,a); if (ISZERO(this)) sign=0; return; } byte s; int e; long m; if (exponent > a.exponent || (exponent == a.exponent && mantissa>=a.mantissa)) { s = a.sign; e = a.exponent; m = a.mantissa; } else { s = sign; e = exponent; m = mantissa; sign = a.sign; exponent = a.exponent; mantissa = a.mantissa; } int shift = exponent-e; if (shift>=64) return; if (sign == s) { mantissa += m>>>shift; if (mantissa >= 0 && shift>0 && ((m>>>(shift-1))&1) != 0) mantissa ++; // We don't need normalization, so round now if (mantissa < 0) { // Simplified normalize() mantissa = (mantissa+1)>>>1; exponent ++; if (exponent < 0) { // Overflow makeInfinity(sign); return; } } } else { if (shift>0) { // Shift mantissa up to increase accuracy mantissa <<= 1; exponent --; shift --; } m = -m; mantissa += m>>shift; if (mantissa >= 0 && shift>0 && ((m>>>(shift-1))&1) != 0) mantissa ++; // We don't need to shift down, so round now if (mantissa < 0) { // Simplified normalize() mantissa = (mantissa+1)>>>1; exponent ++; // Can't overflow } else if (shift==0) { // Operands have equal exponents => many bits may be cancelled // Magic rounding: if result of subtract leaves only a few bits // standing, the result should most likely be 0... if (magicRounding && mantissa > 0 && mantissa <= 7) mantissa = 0; normalize(); } // else... if (shift>=1 && mantissa>=0) it should be a-ok } if (ISZERO(this)) sign=0; } /** * Calculates the sum of this Real and the integer * a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this += a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 1.8 *
* * @param a the int to add to this. */ public void add(int a) { tmp0.assign(a); add(tmp0); } /** * Calculates the sum of this Real and a with * extended precision. Replaces the contents of this Real * with the result. Returns the extra mantissa of the extended precision * result. * *

An extra 64 bits of mantissa is added to both arguments for extended * precision. If any of the arguments are not of extended precision, use * 0 for the extra mantissa. * *

Extended prevision can be useful in many situations. For instance, * when accumulating a lot of very small values it is advantageous for the * accumulator to have extended precision. To convert the extended * precision value back to a normal Real for further * processing, use {@link #roundFrom128(long)}. * *

* Equivalent double code: * this += a; *
Approximate error bound: * 2-62 ULPs (i.e. of a normal precision Real) *
* Execution time relative to add:   * * 2.0 *
* * @param extra the extra 64 bits of mantissa of this extended precision * Real. * @param a the Real to add to this. * @param aExtra the extra 64 bits of mantissa of the extended precision * value a. * @return the extra 64 bits of mantissa of the resulting extended * precision Real. */ public long add128(long extra, Real a, long aExtra) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return 0; } if (ISINFINITY(this) || ISINFINITY(a)) { if (ISINFINITY(this) && ISINFINITY(a) && sign != a.sign) makeNan(); else makeInfinity(ISINFINITY(this) ? sign : a.sign); return 0; } if (ISZERO(this) || ISZERO(a)) { if (ISZERO(this)) { ASSIGN(this,a); extra = aExtra; } if (ISZERO(this)) sign=0; return extra; } byte s; int e; long m; long x; if (exponent > a.exponent || (exponent == a.exponent && mantissa>a.mantissa) || (exponent == a.exponent && mantissa==a.mantissa && (extra>>>1)>=(aExtra>>>1))) { s = a.sign; e = a.exponent; m = a.mantissa; x = aExtra; } else { s = sign; e = exponent; m = mantissa; x = extra; sign = a.sign; exponent = a.exponent; mantissa = a.mantissa; extra = aExtra; } int shift = exponent-e; if (shift>=127) return extra; if (shift>=64) { x = m>>>(shift-64); m = 0; } else if (shift>0) { x = (x>>>shift)+(m<<(64-shift)); m >>>= shift; } extra >>>= 1; x >>>= 1; if (sign == s) { extra += x; mantissa += (extra>>63)&1; mantissa += m; } else { extra -= x; mantissa -= (extra>>63)&1; mantissa -= m; // Magic rounding: if result of subtract leaves only a few bits // standing, the result should most likely be 0... if (mantissa == 0 && extra > 0 && extra <= 0x1f) extra = 0; } extra <<= 1; extra = normalize128(extra); if (ISZERO(this)) sign=0; return extra; } /** * Calculates the difference between this Real and * a. * Replaces the contents of this Real with the result. * *

(To achieve extended precision subtraction, it is enough to call * a.{@link #neg() neg}() before calling {@link * #add128(long,Real,long) add128}(extra,a,aExtra), since only * the sign bit of a need to be changed.) * *

* Equivalent double code: * this -= a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 2.0 *
* * @param a the Real to subtract from this. */ public void sub(Real a) { tmp0.mantissa = a.mantissa; tmp0.exponent = a.exponent; tmp0.sign = (byte)(a.sign^1); add(tmp0); } /** * Calculates the difference between this Real and the * integer a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this -= a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 2.4 *
* * @param a the int to subtract from this. */ public void sub(int a) { tmp0.assign(a); sub(tmp0); } /** * Calculates the product of this Real and a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this *= a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 1.3 *
* * @param a the Real to multiply to this. */ public void mul(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } sign ^= a.sign; if (ISZERO(this) || ISZERO(a)) { if (ISINFINITY(this) || ISINFINITY(a)) makeNan(); else makeZero(sign); return; } if (ISINFINITY(this) || ISINFINITY(a)) { makeInfinity(sign); return; } long a0 = mantissa & 0x7fffffff; long a1 = mantissa >>> 31; long b0 = a.mantissa & 0x7fffffff; long b1 = a.mantissa >>> 31; mantissa = a1*b1; // If we're going to need normalization, we don't want to round twice int round = (mantissa<0) ? 0 : 0x40000000; mantissa += ((a0*b1 + a1*b0 + ((a0*b0)>>>31) + round)>>>31); int aExp = a.exponent; exponent += aExp-0x40000000; if (exponent < 0) { if (exponent == -1 && aExp < 0x40000000 && mantissa < 0) { // Not underflow after all, it will be corrected in the // normalization below } else { if (aExp < 0x40000000) makeZero(sign); // Underflow else makeInfinity(sign); // Overflow return; } } // Simplified normalize() if (mantissa < 0) { mantissa = (mantissa+1)>>>1; exponent ++; if (exponent < 0) // Overflow makeInfinity(sign); } } /** * Calculates the product of this Real and the integer * a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this *= a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 1.3 *
* * @param a the int to multiply to this. */ public void mul(int a) { if (ISNAN(this)) return; if (a<0) { sign ^= 1; a = -a; } if (ISZERO(this) || a==0) { if (ISINFINITY(this)) makeNan(); else makeZero(sign); return; } if (ISINFINITY(this)) return; // Normalize int int t=a; t|=t>>1; t|=t>>2; t|=t>>4; t|=t>>8; t|=t>>16; t = clz_tab[(t*clz_magic)>>>27]; exponent += 0x1F-t; a <<= t; if (exponent < 0) { makeInfinity(sign); // Overflow return; } long a0 = mantissa & 0x7fffffff; long a1 = mantissa >>> 31; long b0 = a & 0xffffffffL; mantissa = a1*b0; // If we're going to need normalization, we don't want to round twice int round = (mantissa<0) ? 0 : 0x40000000; mantissa += ((a0*b0 + round)>>>31); // Simplified normalize() if (mantissa < 0) { mantissa = (mantissa+1)>>>1; exponent ++; if (exponent < 0) // Overflow makeInfinity(sign); } } /** * Calculates the product of this Real and a with * extended precision. * Replaces the contents of this Real with the result. * Returns the extra mantissa of the extended precision result. * *

An extra 64 bits of mantissa is added to both arguments for * extended precision. If any of the arguments are not of extended * precision, use 0 for the extra mantissa. See also {@link * #add128(long,Real,long)}. * *

* Equivalent double code: * this *= a; *
Approximate error bound: * 2-60 ULPs *
* Execution time relative to add:   * * 3.1 *
* * @param extra the extra 64 bits of mantissa of this extended precision * Real. * @param a the Real to multiply to this. * @param aExtra the extra 64 bits of mantissa of the extended precision * value a. * @return the extra 64 bits of mantissa of the resulting extended * precision Real. */ public long mul128(long extra, Real a, long aExtra) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return 0; } sign ^= a.sign; if (ISZERO(this) || ISZERO(a)) { if (ISINFINITY(this) || ISINFINITY(a)) makeNan(); else makeZero(sign); return 0; } if (ISINFINITY(this) || ISINFINITY(a)) { makeInfinity(sign); return 0; } int aExp = a.exponent; exponent += aExp-0x40000000; if (exponent < 0) { if (aExp < 0x40000000) makeZero(sign); // Underflow else makeInfinity(sign); // Overflow return 0; } long ffffffffL = 0xffffffffL; long a0 = extra & ffffffffL; long a1 = extra >>> 32; long a2 = mantissa & ffffffffL; long a3 = mantissa >>> 32; long b0 = aExtra & ffffffffL; long b1 = aExtra >>> 32; long b2 = a.mantissa & ffffffffL; long b3 = a.mantissa >>> 32; a0 = ((a3*b0>>>2)+ (a2*b1>>>2)+ (a1*b2>>>2)+ (a0*b3>>>2)+ 0x60000000)>>>28; //(a2*b0>>>34)+(a1*b1>>>34)+(a0*b2>>>34)+0x08000000)>>>28; a1 *= b3; b0 = a2*b2; b1 *= a3; a0 += ((a1<<2)&ffffffffL) + ((b0<<2)&ffffffffL) + ((b1<<2)&ffffffffL); a1 = (a0>>>32) + (a1>>>30) + (b0>>>30) + (b1>>>30); a0 &= ffffffffL; a2 *= b3; b2 *= a3; a1 += ((a2<<2)&ffffffffL) + ((b2<<2)&ffffffffL); extra = (a1<<32) + a0; mantissa = ((a3*b3)<<2) + (a1>>>32) + (a2>>>30) + (b2>>>30); extra = normalize128(extra); return extra; } private void mul10() { if (!ISFINITENONZERO(this)) return; mantissa += (mantissa+2)>>>2; exponent += 3; if (mantissa < 0) { mantissa = (mantissa+1)>>>1; exponent++; } if (exponent < 0) makeInfinity(sign); // Overflow } /** * Calculates the square of this Real. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = this*this; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 1.1 *
*/ public void sqr() { sign = 0; if (!ISFINITENONZERO(this)) return; int e = exponent; exponent += exponent-0x40000000; if (exponent < 0) { if (e < 0x40000000) makeZero(sign); // Underflow else makeInfinity(sign); // Overflow return; } long a0 = mantissa&0x7fffffff; long a1 = mantissa>>>31; mantissa = a1*a1; // If we're going to need normalization, we don't want to round twice int round = (mantissa<0) ? 0 : 0x40000000; mantissa += ((((a0*a1)<<1) + ((a0*a0)>>>31) + round)>>>31); // Simplified normalize() if (mantissa < 0) { mantissa = (mantissa+1)>>>1; exponent ++; if (exponent < 0) // Overflow makeInfinity(sign); } } private static long ldiv(long a, long b) { // Calculate (a<<63)/b, where a<2**64, b<2**63, b<=a and a<2*b The // result will always be 63 bits, leading to a 3-stage radix-2**21 // (very high radix) algorithm, as described here: // S.F. Oberman and M.J. Flynn, "Division Algorithms and // Implementations," IEEE Trans. Computers, vol. 46, no. 8, // pp. 833-854, Aug 1997 Section 4: "Very High Radix Algorithms" int bInv24; // Approximate 1/b, never more than 24 bits int aHi24; // High 24 bits of a (sometimes 25 bits) int next21; // The next 21 bits of result, possibly 1 less long q; // Resulting quotient: round((a<<63)/b) // Preparations bInv24 = (int)(0x400000000000L/((b>>>40)+1)); aHi24 = (int)(a>>32)>>>8; a <<= 20; // aHi24 and a overlap by 4 bits // Now perform the division next21 = (int)(((long)aHi24*(long)bInv24)>>>26); a -= next21*b; // Bits above 2**64 will always be cancelled // No need to remove remainder, this will be cared for in next block q = next21; aHi24 = (int)(a>>32)>>>7; a <<= 21; // Two more almost identical blocks... next21 = (int)(((long)aHi24*(long)bInv24)>>>26); a -= next21*b; q = (q<<21)+next21; aHi24 = (int)(a>>32)>>>7; a <<= 21; next21 = (int)(((long)aHi24*(long)bInv24)>>>26); a -= next21*b; q = (q<<21)+next21; // Remove final remainder if (a<0 || a>=b) { q++; a -= b; } a <<= 1; // Round correctly if (a<0 || a>=b) q++; return q; } /** * Calculates the quotient of this Real and a. * Replaces the contents of this Real with the result. * *

(To achieve extended precision division, call * aExtra=a.{@link #recip128(long) recip128}(aExtra) before * calling {@link #mul128(long,Real,long) * mul128}(extra,a,aExtra).) * *

* Equivalent double code: * this /= a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 2.6 *
* * @param a the Real to divide this with. */ public void div(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } sign ^= a.sign; if (ISINFINITY(this)) { if (ISINFINITY(a)) makeNan(); return; } if (ISINFINITY(a)) { makeZero(sign); return; } if (ISZERO(this)) { if (ISZERO(a)) makeNan(); return; } if (ISZERO(a)) { makeInfinity(sign); return; } exponent += 0x40000000-a.exponent; if (mantissa < a.mantissa) { mantissa <<= 1; exponent--; } if (exponent < 0) { if (a.exponent >= 0x40000000) makeZero(sign); // Underflow else makeInfinity(sign); // Overflow return; } if (a.mantissa == 0x4000000000000000L) return; mantissa = ldiv(mantissa,a.mantissa); } /** * Calculates the quotient of this Real and the integer * a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this /= a; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 2.6 *
* * @param a the int to divide this with. */ public void div(int a) { if (ISNAN(this)) return; if (a<0) { sign ^= 1; a = -a; } if (ISINFINITY(this)) return; if (ISZERO(this)) { if (a==0) makeNan(); return; } if (a==0) { makeInfinity(sign); return; } long denom = a & 0xffffffffL; long remainder = mantissa%denom; mantissa /= denom; // Normalizing mantissa and scaling remainder accordingly int clz = 0; int t = (int)(mantissa>>>32); if (t == 0) { clz = 32; t = (int)mantissa; } t|=t>>1; t|=t>>2; t|=t>>4; t|=t>>8; t|=t>>16; clz += clz_tab[(t*clz_magic)>>>27]-1; mantissa <<= clz; remainder <<= clz; exponent -= clz; // Final division, correctly rounded remainder = (remainder+denom/2)/denom; mantissa += remainder; if (exponent < 0) // Underflow makeZero(sign); } /** * Calculates the quotient of a and this Real. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = a/this; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 3.1 *
* * @param a the Real to be divided by this. */ public void rdiv(Real a) { ASSIGN(recipTmp,a); recipTmp.div(this); ASSIGN(this,recipTmp); } /** * Calculates the quotient of the integer a and this * Real. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = a/this; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 3.9 *
* * @param a the int to be divided by this. */ public void rdiv(int a) { tmp0.assign(a); rdiv(tmp0); } /** * Calculates the reciprocal of this Real. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = 1/this; *
Error bound: * ½ ULPs *
* Execution time relative to add:   * * 2.3 *
*/ public void recip() { if (ISNAN(this)) return; if (ISINFINITY(this)) { makeZero(sign); return; } if (ISZERO(this)) { makeInfinity(sign); return; } exponent = 0x80000000-exponent; if (mantissa == 0x4000000000000000L) { if (exponent < 0) makeInfinity(sign); // Overflow return; } exponent--; mantissa = ldiv(0x8000000000000000L,mantissa); } /** * Calculates the reciprocal of this Real with * extended precision. * Replaces the contents of this Real with the result. * Returns the extra mantissa of the extended precision result. * *

An extra 64 bits of mantissa is added for extended precision. * If the argument is not of extended precision, use 0 * for the extra mantissa. See also {@link #add128(long,Real,long)}. * *

* Equivalent double code: * this = 1/this; *
Approximate error bound: * 2-60 ULPs *
* Execution time relative to add:   * * 17 *
* * @param extra the extra 64 bits of mantissa of this extended precision * Real. * @return the extra 64 bits of mantissa of the resulting extended * precision Real. */ public long recip128(long extra) { if (ISNAN(this)) return 0; if (ISINFINITY(this)) { makeZero(sign); return 0; } if (ISZERO(this)) { makeInfinity(sign); return 0; } byte s = sign; sign = 0; // Special case, simple power of 2 if (mantissa == 0x4000000000000000L && extra == 0) { exponent = 0x80000000-exponent; if (exponent<0) // Overflow makeInfinity(s); return 0; } // Normalize exponent int exp = 0x40000000-exponent; exponent = 0x40000000; // Save -A ASSIGN(recipTmp,this); long recipTmpExtra = extra; recipTmp.neg(); // First establish approximate result (actually 63 bit accurate) recip(); // Perform one Newton-Raphson iteration // Xn+1 = Xn + Xn*(1-A*Xn) ASSIGN(recipTmp2,this); extra = mul128(0,recipTmp,recipTmpExtra); extra = add128(extra,ONE,0); extra = mul128(extra,recipTmp2,0); extra = add128(extra,recipTmp2,0); // Fix exponent scalbn(exp); // Fix sign if (!isNan()) sign = s; return extra; } /** * Calculates the mathematical integer that is less than or equal to * this Real divided by a. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = Math.{@link Math#floor(double) floor}(this/a); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 22 *
* * @param a the Real argument. */ public void divf(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISINFINITY(this)) { if (ISINFINITY(a)) makeNan(); return; } if (ISINFINITY(a)) { makeZero(sign); return; } if (ISZERO(this)) { if (ISZERO(a)) makeNan(); return; } if (ISZERO(a)) { makeInfinity(sign); return; } ASSIGN(tmp0,a); // tmp0 should be free // Perform same division as with mod, and don't round up long extra = tmp0.recip128(0); extra = mul128(0,tmp0,extra); if ((ISNEGATIVE(tmp0) && (extra < 0 || extra > 0x1f)) || (!ISNEGATIVE(tmp0) && extra < 0 && extra > 0xffffffe0)) { // For accurate floor() mantissa++; normalize(); } floor(); } private void modInternal(/*long thisExtra,*/ Real a, long aExtra) { ASSIGN(tmp0,a); // tmp0 should be free long extra = tmp0.recip128(aExtra); extra = tmp0.mul128(extra,this,0/*thisExtra*/); // tmp0 == this/a if (tmp0.exponent > 0x4000003e) { // floor() will be inaccurate makeZero(a.sign); // What else can be done? makeNan? return; } if ((ISNEGATIVE(tmp0) && (extra < 0 || extra > 0x1f)) || (!ISNEGATIVE(tmp0) && extra < 0 && extra > 0xffffffe0)) { // For accurate floor() with a bit of "magical rounding" tmp0.mantissa++; tmp0.normalize(); } tmp0.floor(); tmp0.neg(); // tmp0 == -floor(this/a) extra = tmp0.mul128(0,a,aExtra); extra = add128(0/*thisExtra*/,tmp0,extra); roundFrom128(extra); } /** * Calculates the value of this Real modulo a. * Replaces the contents of this Real with the result. * The modulo in this case is defined as the remainder after subtracting * a multiplied by the mathematical integer that is less than * or equal to this Real divided by a. * *

* Equivalent double code: * this = this - * a*Math.{@link Math#floor(double) floor}(this/a); *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 27 *
* * @param a the Real argument. */ public void mod(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISINFINITY(this)) { makeNan(); return; } if (ISZERO(this)) { if (ISZERO(a)) makeNan(); else sign = a.sign; return; } if (ISINFINITY(a)) { if (sign != a.sign) makeInfinity(a.sign); return; } if (ISZERO(a)) { makeZero(a.sign); return; } modInternal(a,0); } /** * Calculates the logical AND of this Real and * a. * Replaces the contents of this Real with the result. * *

Semantics of bitwise logical operations exactly mimic those of * Java's bitwise integer operators. In these operations, the * internal binary representation of the numbers are used. If the * values represented by the operands are not mathematical * integers, the fractional bits are also included in the operation. * *

Negative numbers are interpreted as two's-complement, * generalized to real numbers: Negating the number inverts all * bits, including an infinite number of 1-bits before the radix * point and an infinite number of 1-bits after the radix point. The * infinite number of 1-bits after the radix is rounded upwards * producing an infinite number of 0-bits, until the first 0-bit is * encountered which will be switched to a 1 (rounded or not, these * two forms are mathematically equivalent). For example, the number * "1" negated, becomes (in binary form) * ...1111110.111111.... Rounding of the infinite * number of 1's after the radix gives the number * ...1111111.000000..., which is exactly the way we * usually see "-1" as two's-complement. * *

This method calculates a negative value if and only * if this and a are both negative. * *

* Equivalent int code: * this &= a; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.5 *
* * @param a the Real argument */ public void and(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISZERO(this) || ISZERO(a)) { makeZero(); return; } if (ISINFINITY(this) || ISINFINITY(a)) { if (!ISINFINITY(this) && ISNEGATIVE(this)) { ASSIGN(this,a); } else if (!ISINFINITY(a) && ISNEGATIVE(a)) ; // ASSIGN(this,this) else if (ISINFINITY(this) && ISINFINITY(a) && ISNEGATIVE(this) && ISNEGATIVE(a)) ; // makeInfinity(1) else makeZero(); return; } byte s; int e; long m; if (exponent >= a.exponent) { s = a.sign; e = a.exponent; m = a.mantissa; } else { s = sign; e = exponent; m = mantissa; sign = a.sign; exponent = a.exponent; mantissa = a.mantissa; } int shift = exponent-e; if (shift>=64) { if (s == 0) makeZero(sign); return; } if (s != 0) m = -m; if (ISNEGATIVE(this)) mantissa = -mantissa; mantissa &= m>>shift; sign = 0; if (mantissa < 0) { mantissa = -mantissa; sign = 1; } normalize(); } /** * Calculates the logical OR of this Real and * a. * Replaces the contents of this Real with the result. * *

See {@link #and(Real)} for an explanation of the * interpretation of a Real in bitwise operations. * This method calculates a negative value if and only * if either this or a is negative. * *

* Equivalent int code: * this |= a; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.6 *
* * @param a the Real argument */ public void or(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISZERO(this) || ISZERO(a)) { if (ISZERO(this)) ASSIGN(this,a); return; } if (ISINFINITY(this) || ISINFINITY(a)) { if (!ISINFINITY(this) && ISNEGATIVE(this)) ; // ASSIGN(this,this); else if (!ISINFINITY(a) && ISNEGATIVE(a)) { ASSIGN(this,a); } else makeInfinity(sign | a.sign); return; } byte s; int e; long m; if ((ISNEGATIVE(this) && exponent <= a.exponent) || (ISPOSITIVE(a) && exponent >= a.exponent)) { s = a.sign; e = a.exponent; m = a.mantissa; } else { s = sign; e = exponent; m = mantissa; sign = a.sign; exponent = a.exponent; mantissa = a.mantissa; } int shift = exponent-e; if (shift>=64 || shift<=-64) return; if (s != 0) m = -m; if (ISNEGATIVE(this)) mantissa = -mantissa; if (shift>=0) mantissa |= m>>shift; else mantissa |= m<<(-shift); sign = 0; if (mantissa < 0) { mantissa = -mantissa; sign = 1; } normalize(); } /** * Calculates the logical XOR of this Real and * a. * Replaces the contents of this Real with the result. * *

See {@link #and(Real)} for an explanation of the * interpretation of a Real in bitwise operations. * This method calculates a negative value if and only * if exactly one of this and a is negative. * *

The operation NOT has been omitted in this library * because it cannot be generalized to fractional numbers. If this * Real represents a mathematical integer, the * operation NOT can be calculated as "this XOR -1", * which is equivalent to "this XOR * /FFFFFFFF.0000". * *

* Equivalent int code: * this ^= a; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.5 *
* * @param a the Real argument */ public void xor(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISZERO(this) || ISZERO(a)) { if (ISZERO(this)) ASSIGN(this,a); return; } if (ISINFINITY(this) || ISINFINITY(a)) { makeInfinity(sign ^ a.sign); return; } byte s; int e; long m; if (exponent >= a.exponent) { s = a.sign; e = a.exponent; m = a.mantissa; } else { s = sign; e = exponent; m = mantissa; sign = a.sign; exponent = a.exponent; mantissa = a.mantissa; } int shift = exponent-e; if (shift>=64) return; if (s != 0) m = -m; if (ISNEGATIVE(this)) mantissa = -mantissa; mantissa ^= m>>shift; sign = 0; if (mantissa < 0) { mantissa = -mantissa; sign = 1; } normalize(); } /** * Calculates the value of this Real AND NOT * a. The opeation is read as "bit clear". * Replaces the contents of this Real with the result. * *

See {@link #and(Real)} for an explanation of the * interpretation of a Real in bitwise operations. * This method calculates a negative value if and only * if this is negative and not a is negative. * *

* Equivalent int code: * this &= ~a; *
Error bound: * 0 ULPs *
* Execution time relative to add:   * * 1.5 *
* * @param a the Real argument */ public void bic(Real a) { if (ISNAN(this) || ISNAN(a)) { makeNan(); return; } if (ISZERO(this) || ISZERO(a)) return; if (ISINFINITY(this) || ISINFINITY(a)) { if (!ISINFINITY(this)) { if (ISNEGATIVE(this)) if (ISNEGATIVE(a)) makeInfinity(0); else makeInfinity(1); } else if (ISNEGATIVE(a)) { if (ISINFINITY(a)) makeInfinity(0); else makeZero(); } return; } int shift = exponent-a.exponent; if (shift>=64 || (shift<=-64 && ISPOSITIVE(this))) return; long m = a.mantissa; if (ISNEGATIVE(a)) m = -m; if (ISNEGATIVE(this)) mantissa = -mantissa; if (shift<0) { if (ISNEGATIVE(this)) { if (shift<=-64) mantissa = ~m; else mantissa = (mantissa>>(-shift)) & ~m; exponent = a.exponent; } else mantissa &= ~(m<<(-shift)); } else mantissa &= ~(m>>shift); sign = 0; if (mantissa < 0) { mantissa = -mantissa; sign = 1; } normalize(); } private int compare(int a) { tmp0.assign(a); return compare(tmp0); } /** * Calculates the square root of this Real. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = Math.{@link Math#sqrt(double) sqrt}(this); *
Approximate error bound: * 1 ULPs *
* Execution time relative to add:   * * 19 *
*/ public void sqrt() { /* * Adapted from: * Cephes Math Library Release 2.2: December, 1990 * Copyright 1984, 1990 by Stephen L. Moshier * * sqrtl.c * * long double sqrtl(long double x); */ if (ISNAN(this)) return; if (ISZERO(this)) { sign=0; return; } if (ISNEGATIVE(this)) { makeNan(); return; } if (ISINFINITY(this)) return; // Save X ASSIGN(recipTmp,this); // normalize to range [0.5, 1) int e = exponent-0x3fffffff; exponent = 0x3fffffff; // quadratic approximation, relative error 6.45e-4 ASSIGN(recipTmp2,this); CONST(sqrtTmp,1,0x3ffffffd,0x68a7e193370ff21bL);//-0.2044058315473477195990 mul(sqrtTmp); CONST(sqrtTmp,0,0x3fffffff,0x71f1e120690deae8L);//0.89019407351052789754347 add(sqrtTmp); mul(recipTmp2); CONST(sqrtTmp,0,0x3ffffffe,0x5045ee6baf28677aL);//0.31356706742295303132394 add(sqrtTmp); // adjust for odd powers of 2 if ((e&1) != 0) mul(SQRT2); // calculate exponent exponent += e>>1; // Newton iteratios: // Yn+1 = (Yn + X/Yn)/2 for (int i=0; i<3; i++) { ASSIGN(recipTmp2, recipTmp); recipTmp2.div(this); add(recipTmp2); scalbn(-1); } } /** * Calculates the reciprocal square root of this Real. * Replaces the contents of this Real with the result. * *

* Equivalent double code: * this = 1/Math.{@link Math#sqrt(double) sqrt}(this); *
Approximate error bound: * 1 ULPs *
* Execution time relative to add:   * * 21 *
*/ public void rsqrt() { sqrt(); recip(); } /** * Calculates the cube root of this Real. * Replaces the contents of this Real with the result. * The cube root of a negative value is the negative of the cube * root of that value's magnitude. * *

* Equivalent double code: * this = Math.{@link Math#cbrt(double) cbrt}(this); *
Approximate error bound: * 2 ULPs *
* Execution time relative to add:   * * 32 *
*/ public void cbrt() { if (!ISFINITENONZERO(this)) return; byte s = sign; sign = 0; // Calculates recipocal cube root of normalized Real, // not zero, nan or infinity final long start = 0x5120000000000000L; // Save -A ASSIGN(recipTmp,this); recipTmp.neg(); // First establish approximate result mantissa = start-(mantissa>>>2); int expRmd = exponent==0 ? 2 : (exponent-1)%3; exponent = 0x40000000-(exponent-0x40000000-expRmd)/3; normalize(); if (expRmd>0) { CONST(recipTmp2, 0,0x3fffffff,0x6597fa94f5b8f20bL); // cbrt(1/2) mul(recipTmp2); if (expRmd>1) mul(recipTmp2); } // Now perform Newton-Raphson iteration // Xn+1 = (4*Xn - A