diff -uNr a/ffa/HISTORY.TXT b/ffa/HISTORY.TXT
--- a/ffa/HISTORY.TXT e47ab3f867f34ff2d307bf328d9fd39d51e3c0c93cfa4e328997b9322a642542f66e623a658d5f2fc87acfd2a72555790bd08f25948d9cb227866c7a24853985
+++ b/ffa/HISTORY.TXT false
@@ -1,47 +0,0 @@
-###############
-### HISTORY ###
-###############
-
-"Chapter 1: Genesis."
-ffa_ch1_genesis.vpatch
-http://www.loper-os.org/?p=1913
-
-"Chapter 2: Logical and Bitwise Operations."
-ffa_ch2_logicals.vpatch
-http://www.loper-os.org/?p=2026
-
-"Chapter 3: Shifts."
-ffa_ch3_shifts.vpatch
-http://www.loper-os.org/?p=2032
-
-"Chapter 4: Interlude: FFACalc."
-ffa_ch4_ffacalc.vpatch
-http://www.loper-os.org/?p=2051
-
-"Chapter 5: "Egyptological" Multiplication and Division."
-ffa_ch5_egypt.vpatch
-http://www.loper-os.org/?p=2071
-
-"Chapter 6: "Geological" RSA."
-ffa_ch6_simplest_rsa.vpatch
-http://www.loper-os.org/?p=2105
-
-"Chapter 7: "Turbo Egyptians.""
-ffa_ch7_turbo_egyptians.vpatch
-http://www.loper-os.org/?p=2118
-
-"Chapter 8: Interlude: Randomism."
-ffa_ch8_randomism.vpatch
-http://www.loper-os.org/?p=2175
-
-"Chapter 9: "Exodus from Egypt" with Comba’s Algorithm."
-ffa_ch9_exodus.vpatch
-http://www.loper-os.org/?p=2186
-
-"Chapter 10: Introducing Karatsuba’s Multiplication."
-ffa_ch10_karatsuba.vpatch
-http://www.loper-os.org/?p=2238
-
-"Chapter 11: Tuning and Unified API."
-ffa_ch11_tuning_and_api.vpatch
-### YOU ARE HERE ###
diff -uNr a/ffa/MANIFEST.TXT b/ffa/MANIFEST.TXT
--- a/ffa/MANIFEST.TXT false
+++ b/ffa/MANIFEST.TXT e5b1858f8d2c3ad2aeb9c4ba9e1fe28b5c79588305357385db3ac1afe50796268014b8480ca0c48508dcdb3416172e478b1291ef8a62b584b41cd6f9d6bcf988
@@ -0,0 +1,12 @@
+xxxxxxx ffa_ch1_genesis                "Genesis."
+xxxxxxx ffa_ch2_logicals               "Logical and Bitwise Operations."
+xxxxxxx ffa_ch3_shifts                 "Shifts."
+xxxxxxx ffa_ch4_ffacalc                "Interlude: FFACalc."
+xxxxxxx ffa_ch5_egypt                  ""Egyptological" Multiplication and Division."
+xxxxxxx ffa_ch6_simplest_rsa           ""Geological" RSA."
+xxxxxxx ffa_ch7_turbo_egyptians        ""Turbo Egyptians.""
+xxxxxxx ffa_ch8_randomism              "Interlude: Randomism."
+xxxxxxx ffa_ch9_exodus                 ""Exodus from Egypt" with Comba’s Algorithm."
+xxxxxxx ffa_ch10_karatsuba             "Introducing Karatsuba's Multiplication."
+xxxxxxx ffa_ch11_tuning_and_api        "Tuning and Unified API."
+ 551091 ffa_ch12_karatsuba_redux       "Karatsuba Redux."
diff -uNr a/ffa/ffacalc/ffa_calc.adb b/ffa/ffacalc/ffa_calc.adb
--- a/ffa/ffacalc/ffa_calc.adb 74dce12dc3fa30b4fd9e60ae209c1deba564b19dd95b413f02fa9a0e45120242cd9682a079c92b7e5a2eb19941a7eec6f0f3ec9c2b88058f88cf891828496d58
+++ b/ffa/ffacalc/ffa_calc.adb 4a851245719704af35888e2d1bc00b4cec16e66093e7d79408e98e8a2eaba0e0f5a7ed1eee26d0f75fa651dbe4bddd48c31722a2b9b98464362b681f5d7b4c8a
@@ -474,6 +474,17 @@
                end loop;
                Quit(0);
                
+               ---------------------------------------------------------
+               -- Ch. 12B:
+               -- Square, give bottom and top halves
+            when 'S' =>
+               Want(1);
+               Push;
+               FFA_FZ_Square(X     => Stack(SP - 1),
+                             XX_Lo => Stack(SP - 1),
+                             XX_Hi => Stack(SP));
+               ---------------------------------------------------------
+               
                ----------
                -- NOPs --
                ----------
diff -uNr a/ffa/libffa/ffa.adb b/ffa/libffa/ffa.adb
--- a/ffa/libffa/ffa.adb 9bd9383546e9b4e6ebd42f6e4dc7bb07a449487cea22ae1d5efac8d74af48907d827295e6e388a1912d964e3a6d5c818a4250bfcb95505ca6e5310f2ca0fe355
+++ b/ffa/libffa/ffa.adb 720237083e8d484b06d432529e8547d78adb2eb1f72cf0934f7596ce8ca7aec883c43f0951e133294b5139d4386e749c0939efe3715ec46bc2515e3b23493a7b
@@ -20,6 +20,7 @@
 with FZ_Arith;
 with FZ_Shift;
 with FZ_Mul;
+with FZ_Sqr;
 
 
 -- Wrapper bodies for routines that we inline, but must enforce preconditions
@@ -107,4 +108,13 @@
                                   XY_Lo => XY_Lo, XY_Hi => XY_Hi);
    end FFA_FZ_Multiply;
    
+   
+   -- Square. Preserves the inputs.
+   procedure FFA_FZ_Square(X     : in  FZ;
+                           XX_Lo : out FZ;
+                           XX_Hi : out FZ) is
+   begin
+      FZ_Sqr.FZ_Square_Buffered(X => X, XX_Lo => XX_Lo, XX_Hi => XX_Hi);
+   end FFA_FZ_Square;
+   
 end FFA;
diff -uNr a/ffa/libffa/ffa.ads b/ffa/libffa/ffa.ads
--- a/ffa/libffa/ffa.ads 98eaa2028c09d5abfdbeb11c8665906e67392bd4e25cd0434565d0e350a9211eedb31ee4327793ba9678722c15868c249e6ceaa06616f9ce27918375ee6bd69a
+++ b/ffa/libffa/ffa.ads aa1b855dffa6fb750cf1778f0dfb532328fc555dfa0c1fab99b3d9dd85b6c5bc16508a1422273f9706acc917e1b873a38ae9b358fb9a070f25f5403195cac01c
@@ -256,6 +256,14 @@
      XY_Lo'Length = XY_Hi'Length and
      XY_Lo'Length = ((X'Length + Y'Length) / 2);
    
+   -- Square. Preserves the inputs.
+   procedure FFA_FZ_Square(X     : in  FZ;
+                           XX_Lo : out FZ;
+                           XX_Hi : out FZ)
+     with Pre => XX_Lo'Length = X'Length and
+     XX_Hi'Length = X'Length and
+     X'Length mod 2 = 0;
+   
    ----------------------------------------------------------------------------
    --- Modular Operations on FZ
    ----------------------------------------------------------------------------
diff -uNr a/ffa/libffa/fz_arith.adb b/ffa/libffa/fz_arith.adb
--- a/ffa/libffa/fz_arith.adb 990abd54e1786da34415cde07148189077b5ad6a36e527b1de4fb22dde308d1ba505fc6923af2f7affcc9846a59cb6c465de9f02f215cfbe59696cfd3b036b2f
+++ b/ffa/libffa/fz_arith.adb 118a03b1beab9bfc7c517c193f333b5f9a1169cd7b3364ca7b10b7050f8aa8f85e4323b925f6534041f0f02de5242a72cc3a9829bf9e5cf25a075aa162cb1244
@@ -120,6 +120,26 @@
    end FZ_Add_Gated;
    
    
+   -- Destructive Sub: X := X - Y; Underflow := Borrow
+   procedure FZ_Sub_D(X          : in out FZ;
+                      Y          : in     FZ;
+                      Underflow  : out    WBool) is
+      Borrow : WBool := 0;
+   begin
+      for i in 0 .. Word_Index(X'Length - 1) loop
+         declare
+            A : constant Word := X(X'First + i);
+            B : constant Word := Y(Y'First + i);
+            S : constant Word := A - B - Borrow;
+         begin
+            X(X'First + i) := S;
+            Borrow         := W_Borrow(A, B, S);
+         end;
+      end loop;
+      Underflow := Borrow;
+   end FZ_Sub_D;
+   
+   
    -- Difference := X - Y; Underflow := Borrow
    procedure FZ_Sub(X          : in  FZ;
                     Y          : in  FZ;
diff -uNr a/ffa/libffa/fz_arith.ads b/ffa/libffa/fz_arith.ads
--- a/ffa/libffa/fz_arith.ads 23437d66c7048dadb0d307d41a30d3de3c879a107d4804c6c95766ca62ca7f875c7c4cc98efefd1ee085d6c2ad7844a3c7cd15dd61ed79f5f7ed0017f8f90262
+++ b/ffa/libffa/fz_arith.ads 1be9e99c85140fdbcaa506ea97de7b3e469ed5f0d30a252cca92dc74c0f8d77ddea63f7552907cd0705879984cb74259a228982504c441286c56bbf1d18a14be
@@ -62,6 +62,12 @@
                           Sum        : out FZ);
    pragma Inline_Always(FZ_Add_Gated);
    
+   -- Destructive Sub: X := X - Y; Underflow := Borrow
+   procedure FZ_Sub_D(X          : in out FZ;
+                      Y          : in     FZ;
+                      Underflow  : out    WBool);
+   pragma Inline_Always(FZ_Sub_D);
+   
    -- Difference := X - Y; Underflow := Borrow
    procedure FZ_Sub(X          : in  FZ;
                     Y          : in  FZ;
diff -uNr a/ffa/libffa/fz_modex.adb b/ffa/libffa/fz_modex.adb
--- a/ffa/libffa/fz_modex.adb ed92bd76ae835f0c692eddabe634adad968c4b698f70716192b3156c3f28f7c41d5859c05b7337f6990576b0a8c5964b9f39d84c074f7763acae3253ddcee771
+++ b/ffa/libffa/fz_modex.adb 84c9976aa492f507f8f21c8bbeb3b7dc671f8aa45c9479808dffda6b62fd23a1891f08740937d5d61b8d5fad9da7ad9e890aaa79059c0a1ee69096d877a93577
@@ -21,6 +21,7 @@
 with FZ_Pred;  use FZ_Pred;
 with FZ_Shift; use FZ_Shift;
 with FZ_Mul;   use FZ_Mul;
+with FZ_Sqr;   use FZ_Sqr;
 with FZ_Divis; use FZ_Divis;
 
 
@@ -53,6 +54,32 @@
    end FZ_Mod_Mul;
    
    
+   -- Modular Squaring: Product := X*X mod Modulus
+   procedure FZ_Mod_Sqr(X        : in  FZ;
+                        Modulus  : in  FZ;
+                        Product  : out FZ) is
+      
+      -- The wordness of both operands is equal:
+      L     : constant Indices := X'Length;
+      
+      -- Double-width register for squaring and modulus operations
+      XX    : FZ(1 .. L * 2);
+      
+      -- To refer to the lower and upper halves of the working register:
+      XX_Lo : FZ renames XX(1     .. L);
+      XX_Hi : FZ renames XX(L + 1 .. XX'Last);
+   
+   begin
+      
+      -- XX_Lo:XX_Hi := X^2
+      FZ_Square_Buffered(X, XX_Lo, XX_Hi);
+      
+      -- Product := XX mod M
+      FZ_Mod(XX, Modulus, Product);
+      
+   end FZ_Mod_Sqr;
+   
+   
    -- Modular Exponent: Result := Base^Exponent mod Modulus
    procedure FZ_Mod_Exp(Base     : in  FZ;
                         Exponent : in  FZ;
@@ -89,8 +116,8 @@
          -- Advance to the next bit of E
          FZ_ShiftRight(E, E, 1);
          
-         -- B := B*B mod Modulus
-         FZ_Mod_Mul(X => B, Y => B, Modulus => Modulus, Product => B);
+         -- B := B^2 mod Modulus
+         FZ_Mod_Sqr(X => B, Modulus => Modulus, Product => B);
          
       end loop;
       
diff -uNr a/ffa/libffa/fz_modex.ads b/ffa/libffa/fz_modex.ads
--- a/ffa/libffa/fz_modex.ads afc76cf7ae5fd7f854e19546e2c6a7804c613c3cd60be68ec4f8c7da8c5b0e33e656017b4c402b8b668b7ef021bb3db3dc861b54cf3a2b836d2a3d2b4f9d454b
+++ b/ffa/libffa/fz_modex.ads 4d75a96582c1897f5a85da8983033487653b0b0678d88bc3c7f895fcbb4a0a63b9019426d119cb3eddf959856959cbba295bc1d46ae36b0c4b7f4a6f9e2a00a7
@@ -33,6 +33,13 @@
      Modulus'Length = X'Length and
      Product'Length = Modulus'Length;
    
+   -- Modular Square: Product := X*X mod Modulus
+   procedure FZ_Mod_Sqr(X        : in  FZ;
+                        Modulus  : in  FZ;
+                        Product  : out FZ)
+     with Pre => Modulus'Length = X'Length and
+     Product'Length = Modulus'Length;
+   
    -- Modular Exponent: Result := Base^Exponent mod Modulus
    procedure FZ_Mod_Exp(Base     : in  FZ;
                         Exponent : in  FZ;
diff -uNr a/ffa/libffa/fz_mul.adb b/ffa/libffa/fz_mul.adb
--- a/ffa/libffa/fz_mul.adb a819415bc60308fe0b550eee13da2d6d4819064e4d336e9766e65a63768aef24ac7db9f5ac238725587f4660c2992ae17abc85ef5047d62beb04ba2c12d1172a
+++ b/ffa/libffa/fz_mul.adb 92bd5f707cebadbabc29f5f02a452f8d223b640ad39bf3b08cea613caa9ca92a7ae91096812ffc09e895c97bcc5aba5054e89c30a6d550926d9cc4814b3180a7
@@ -166,8 +166,14 @@
       -- Carry from individual term additions.
       C          : WBool;
       
-      -- Tail-Carry accumulator, for the final ripple
-      TC         : Word;
+      -- Barring a cosmic ray, 0 <= TC <= 2
+      subtype TailCarry is Word range 0 .. 2;
+      
+      -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
+      TC         : TailCarry := 0;
+      
+      -- Barring a cosmic ray, the tail ripple will NOT overflow.
+      FinalCarry : WZeroOrDie := 0;
       
    begin
       
@@ -212,15 +218,9 @@
       -- Compute the final Tail Carry for the ripple
       TC := TC + C - DD_Sub;
       
-      -- Barring a cosmic ray, 0 <= TC <= 2 .
-      pragma Assert(TC <= 2);
-      
       -- Ripple the Tail Carry into the tail.
-      FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => C);
+      FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => FinalCarry);
       
-      -- Barring a cosmic ray, the tail ripple will NOT overflow.
-      pragma Assert(C = 0);
-
    end Mul_Karatsuba;
    -- CAUTION: Inlining prohibited for Mul_Karatsuba !
    
diff -uNr a/ffa/libffa/fz_sqr.adb b/ffa/libffa/fz_sqr.adb
--- a/ffa/libffa/fz_sqr.adb false
+++ b/ffa/libffa/fz_sqr.adb 8f0062ed157d9eb9910c160640f3f6da391d6724a0023da3b21b6bb00f5391e53a35569073ea698d82a259a26f910de0803fb8eabac6b2694f3d43ac709ae76e
@@ -0,0 +1,261 @@
+------------------------------------------------------------------------------
+------------------------------------------------------------------------------
+-- This file is part of 'Finite Field Arithmetic', aka 'FFA'.               --
+--                                                                          --
+-- (C) 2018 Stanislav Datskovskiy ( www.loper-os.org )                      --
+-- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html     --
+--                                                                          --
+-- You do not have, nor can you ever acquire the right to use, copy or      --
+-- distribute this software ; Should you use this software for any purpose, --
+-- or copy and distribute it to anyone or in any manner, you are breaking   --
+-- the laws of whatever soi-disant jurisdiction, and you promise to         --
+-- continue doing so for the indefinite future. In any case, please         --
+-- always : read and understand any software ; verify any PGP signatures    --
+-- that you use - for any purpose.                                          --
+--                                                                          --
+-- See also http://trilema.com/2015/a-new-software-licensing-paradigm .     --
+------------------------------------------------------------------------------
+------------------------------------------------------------------------------
+
+with Words;    use Words;
+with Word_Ops; use Word_Ops;
+with W_Mul;    use W_Mul;
+with FZ_Arith; use FZ_Arith;
+
+
+package body FZ_Sqr is
+   
+   -- Square case of Comba's multiplier. (CAUTION: UNBUFFERED)
+   procedure FZ_Sqr_Comba(X  : in  FZ;
+                          XX : out FZ) is
+      
+      -- Words in each multiplicand
+      L : constant Word_Index := X'Length;
+      
+      -- Length of Product, i.e. double the length of X
+      LP : constant Word_Index := 2 * L;
+      
+      -- 3-word Accumulator
+      A2, A1, A0 : Word := 0;
+      
+      Lo, Hi  : Word; -- Output of WxW multiply/square
+      
+      -- Type for referring to a column of XX
+      subtype ColXX is Word_Index range 0 .. LP - 1;
+      
+      procedure Accum is
+         C      : WBool; -- Carry for the Accumulator addition
+         Sum    : Word;  -- Sum for Accumulator addition
+      begin
+         -- Now add add double-Word Hi:Lo to accumulator A2:A1:A0:
+         -- A0 += Lo; C := Carry
+         Sum := A0 + Lo;
+         C   := W_Carry(A0, Lo, Sum);
+         A0  := Sum;
+         -- A1 += Hi + C; C := Carry
+         Sum := A1 + Hi + C;
+         C   := W_Carry(A1, Hi, Sum);
+         A1  := Sum;
+         -- A2 += A2 + C
+         A2  := A2 + C;
+      end Accum;
+      pragma Inline_Always(Accum);
+      
+      procedure SymmDigits(N : in ColXX; From : in ColXX; To : in ColXX) is
+      begin
+         for j in From .. To loop
+            -- Hi:Lo := j-th * (N - j)-th Word, and then,
+            Mul_Word(X(X'First + j),
+                     X(X'First - j + N),
+                     Lo, Hi);
+            Accum;
+            Accum; -- Accum += 2 * (Hi:Lo)
+         end loop;
+      end SymmDigits;
+      pragma Inline_Always(SymmDigits);
+      
+      procedure SqrDigit(N : in ColXX) is
+      begin
+         Sqr_Word(X(X'First + N), Lo, Hi); -- Hi:Lo := Square(N-th digit)
+         Accum;                            -- Accum += Hi:Lo
+      end SqrDigit;
+      pragma Inline_Always(SqrDigit);
+      
+      procedure HaveDigit(N : in ColXX) is
+      begin
+         -- Save the Nth (indexed from zero) word of XX:
+         XX(XX'First + N) := A0;
+         -- Right-Shift the Accumulator by one Word width:
+         A0 := A1;
+         A1 := A2;
+         A2 := 0;
+      end HaveDigit;
+      pragma Inline_Always(HaveDigit);
+      
+      -- Compute the Nth (indexed from zero) column of the Product
+      procedure Col(N : in ColXX; U : in ColXX; V : in ColXX) is
+      begin
+         -- The branch pattern depends only on FZ wordness
+         if N mod 2 = 0 then         -- If we're doing an EVEN-numbered column:
+            SymmDigits(N, U, V - 1); --   Stop before V: it is the square case
+            SqrDigit(V);             --   Compute the square case at V
+         else                        -- If we're doing an ODD-numbered column:
+            SymmDigits(N, U, V);     --   All of them are the symmetric case
+         end if;                     -- After either even or odd column:
+         HaveDigit(N);               --   We have the N-th digit of the result.
+      end Col;
+      pragma Inline_Always(Col);
+      
+   begin
+      -- First col always exists:
+      SqrDigit(ColXX'First);
+      HaveDigit(ColXX'First);
+      
+      -- Compute the lower half of the Product:
+      for i in 1 .. L - 1 loop
+         Col(i, 0, i / 2);
+      end loop;
+      
+      -- Compute the upper half (sans last Word) of the Product:
+      for i in L .. LP - 2 loop
+         Col(i, i - L + 1, i / 2);
+      end loop;
+      
+      -- The very last Word of the Product:
+      XX(XX'Last) := A0; -- Equiv. of XX(XX'First + 2*L - 1) := A0;
+   end FZ_Sqr_Comba;
+   
+   
+   -- Karatsuba's Squaring. (CAUTION: UNBUFFERED)
+   procedure Sqr_Karatsuba(X  : in  FZ;
+                           XX : out FZ) is
+      
+      -- L is the wordness of X. Guaranteed to be a power of two.
+      L : constant Word_Count := X'Length;
+      
+      -- An 'LSeg' is the same length as X.
+      subtype LSeg is FZ(1 .. L);
+      
+      -- K is HALF of the length of X.
+      K : constant Word_Index := L / 2;
+      
+      -- A 'KSeg' is the same length as HALF of X.
+      subtype KSeg is FZ(1 .. K);
+      
+      -- The three L-sized variables of the product equation, i.e.:
+      -- XX = LL + 2^(K*Bitness)(LL + HH - DD) + 2^(2*K*Bitness)HH
+      LL, DD, HH : LSeg;
+      
+      -- K-sized term Dx, Dx := abs(XLo - XHi) to then get DD := Dx^2
+      Dx         : KSeg;
+      
+      -- IGNORED Subtraction borrow, sign of (XL - XH)
+      Cx         : WBool; -- given as DD := Dx^2, DD is always positive
+      pragma Unreferenced(Cx);
+      
+      -- Bottom and Top K-sized halves of X.
+      XLo        : KSeg  renames    X(    X'First       ..    X'Last - K );
+      XHi        : KSeg  renames    X(    X'First + K   ..    X'Last     );
+      
+      -- L-sized middle segment of the product XX (+/- K from the midpoint).
+      XXMid      : LSeg  renames   XX(   XX'First + K   ..   XX'Last - K );
+      
+      -- Bottom and Top L-sized halves of the product XX.
+      XXLo       : LSeg  renames   XX(   XX'First       ..   XX'Last - L );
+      XXHi       : LSeg  renames   XX(   XX'First + L   ..   XX'Last     );
+      
+      -- Topmost K-sized quarter segment of the product XX, or 'tail'
+      XXHiHi     : KSeg  renames XXHi( XXHi'First + K   .. XXHi'Last     );
+      
+      -- Carry from addition of HH and LL terms; borrow from subtraction of DD
+      C          : WBool;
+      
+      -- Barring a cosmic ray, 0 <= TC <= 2
+      subtype TailCarry is Word range 0 .. 2;
+      
+      -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
+      TC         : TailCarry := 0;
+      
+      -- Barring a cosmic ray, the tail ripple will NOT overflow.
+      FinalCarry : WZeroOrDie := 0;
+      
+   begin
+      
+      -- Recurse: LL := XLo^2
+      FZ_Square_Unbuffered(XLo, LL);
+      
+      -- Recurse: HH := XHi^2
+      FZ_Square_Unbuffered(XHi, HH);
+      
+      -- Dx := |XLo - XHi| , and we don't care about the borrow
+      FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
+      
+      -- Recurse: DD := Dx^2 (always positive, which is why we don't need Cx)
+      FZ_Square_Unbuffered(Dx, DD);
+      
+      -- XX := LL + 2^(2 * K * Bitness) * HH
+      XXLo := LL;
+      XXHi := HH;
+      
+      -- XX += 2^(K * Bitness) * HH, carry goes into Tail Carry accumulator.
+      FZ_Add_D(X => XXMid, Y => HH, Overflow => TC);
+      
+      -- XX += 2^(K * Bitness) * LL, ...
+      FZ_Add_D(X => XXMid, Y => LL, Overflow => C);
+      
+      -- ... add the carry from LL addition into the Tail Carry accumulator.
+      TC := TC + C;
+      
+      -- XX -= 2^(K * Bitness) * DD
+      FZ_Sub_D(X         => XXMid, -- Because DD is always positive,
+               Y         => DD,    -- this term is always subtractive.
+               Underflow => C); -- C is the borrow from DD term subtraction
+      
+      -- Get final Tail Carry for the ripple by subtracting DD term's borrow
+      TC := TC - C;
+      
+      -- Ripple the Tail Carry into the tail.
+      FZ_Add_D_W(X => XXHiHi, W => TC, Overflow => FinalCarry);
+      
+   end Sqr_Karatsuba;
+   -- CAUTION: Inlining prohibited for Sqr_Karatsuba !
+   
+   
+   -- Squaring. (CAUTION: UNBUFFERED)
+   procedure FZ_Square_Unbuffered(X     : in  FZ;
+                                  XX    : out FZ) is
+   begin
+      
+      if X'Length <= Sqr_Karatsuba_Thresh then
+         
+         -- Base case:
+         FZ_Sqr_Comba(X, XX);
+         
+      else
+         
+         -- Recursive case:
+         Sqr_Karatsuba(X, XX);
+         
+      end if;
+      
+   end FZ_Square_Unbuffered;
+   
+   
+   -- Squaring. Preserves the inputs.
+   procedure FZ_Square_Buffered(X     : in  FZ;
+                                XX_Lo : out FZ;
+                                XX_Hi : out FZ) is
+      
+      -- Product buffer.
+      P : FZ(1 .. 2 * X'Length);
+      
+   begin
+      
+      FZ_Square_Unbuffered(X, P);
+      
+      XX_Lo := P(P'First            .. P'First + X'Length - 1);
+      XX_Hi := P(P'First + X'Length .. P'Last);
+      
+   end FZ_Square_Buffered;
+   
+end FZ_Sqr;
diff -uNr a/ffa/libffa/fz_sqr.ads b/ffa/libffa/fz_sqr.ads
--- a/ffa/libffa/fz_sqr.ads false
+++ b/ffa/libffa/fz_sqr.ads eb5cbe08505a865fdbec2d54434b3c1a2460d1bbf78b0cc90499b198b57be364844baa8a8921056a13b6f0ebc35c281e5021d7d60d931ff42f9bbca8ef253575
@@ -0,0 +1,53 @@
+------------------------------------------------------------------------------
+------------------------------------------------------------------------------
+-- This file is part of 'Finite Field Arithmetic', aka 'FFA'.               --
+--                                                                          --
+-- (C) 2018 Stanislav Datskovskiy ( www.loper-os.org )                      --
+-- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html     --
+--                                                                          --
+-- You do not have, nor can you ever acquire the right to use, copy or      --
+-- distribute this software ; Should you use this software for any purpose, --
+-- or copy and distribute it to anyone or in any manner, you are breaking   --
+-- the laws of whatever soi-disant jurisdiction, and you promise to         --
+-- continue doing so for the indefinite future. In any case, please         --
+-- always : read and understand any software ; verify any PGP signatures    --
+-- that you use - for any purpose.                                          --
+--                                                                          --
+-- See also http://trilema.com/2015/a-new-software-licensing-paradigm .     --
+------------------------------------------------------------------------------
+------------------------------------------------------------------------------
+
+with FZ_Type; use FZ_Type;
+
+
+package FZ_Sqr is
+   
+   pragma Pure;
+   
+   -- Karatsuba Threshhold - at or below this many Words, we use Comba mult.
+   Sqr_Karatsuba_Thresh : constant Indices := 8;
+   
+   -- Square. (CAUTION: UNBUFFERED)
+   procedure FZ_Square_Unbuffered(X     : in  FZ;
+                                  XX    : out FZ);
+   pragma Inline_Always(FZ_Square_Unbuffered);
+   
+   -- Comba's squaring. (CAUTION: UNBUFFERED)
+   procedure FZ_Sqr_Comba(X     : in  FZ;
+                          XX    : out FZ);
+   pragma Inline_Always(FZ_Sqr_Comba);
+   
+   -- Karatsuba's Squaring. (CAUTION: UNBUFFERED)
+   procedure Sqr_Karatsuba(X  : in  FZ;
+                           XX : out FZ)
+     with Pre => XX'Length = 2 * X'Length and
+     X'Length mod 2 = 0;
+   -- CAUTION: Inlining prohibited for Sqr_Karatsuba !
+   
+   -- Squaring. Preserves the inputs.
+   procedure FZ_Square_Buffered(X     : in  FZ;
+                                XX_Lo : out FZ;
+                                XX_Hi : out FZ);
+   pragma Inline_Always(FZ_Square_Buffered);
+   
+end FZ_Sqr;
diff -uNr a/ffa/libffa/w_mul.adb b/ffa/libffa/w_mul.adb
--- a/ffa/libffa/w_mul.adb b37749e920be9f2ca06679ba00c8d924c0e0d9f309921ddd36096a4770b6380ddd4deb9a56a84f535a8d02deb2e379c13c45b2ca40045e8bc4b230bd1c1f3ef5
+++ b/ffa/libffa/w_mul.adb f79f6f0c2069d6d35e407ca4bdab3f1b1f90cb9ca55664e64e73e7130d03ad3056c62e0deeac005dcb3728f3ff5ca1357ae608277f8b062bc610f7459b93e561
@@ -134,4 +134,46 @@
       
    end Mul_Word;
    
+   ---------------------------------------------------------------------------
+   -- LET A CURSE FALL FOREVER on the authors of GCC, and on the Ada committee,
+   -- neither of whom saw it fit to decree a primitive which returns both
+   -- upper and lower halves of an iron MUL instruction's result. Consequently,
+   -- portable Mul_Word demands ~four~ MULs (and several additions and shifts);
+   -- while portable Sqr_Word demands ~three~ MULs (and likewise adds/shifts.)
+   -- If it were not for their idiocy, BOTH routines would weigh 1 CPU instr.!
+   ---------------------------------------------------------------------------
+   
+   -- Carry out X*X squaring, return lower word XX_LW and upper word XX_HW.
+   procedure Sqr_Word(X       : in  Word;
+                      XX_LW   : out Word;
+                      XX_HW   : out Word) is
+      
+      -- Bottom half of multiplicand X
+      XL : constant HalfWord := BottomHW(X);
+      
+      -- Top half of multiplicand X
+      XH : constant HalfWord := TopHW(X);
+      
+      -- XL^2
+      LL : constant Word := Mul_HalfWord_Iron(XL, XL);
+      
+      -- XL * XH
+      LH : constant Word := Mul_HalfWord_Iron(XL, XH);
+      
+      -- XH^2
+      HH : constant Word := Mul_HalfWord_Iron(XH, XH);
+      
+      -- Carry
+      CL : constant Word := TopHW(TopHW(LL) + Shift_Left(BottomHW(LH), 1));
+      
+   begin
+      
+      -- Get the bottom half of the Product:
+      XX_LW := LL + Shift_Left(LH, HalfBitness + 1);
+      
+      -- Get the top half of the Product:
+      XX_HW := HH + Shift_Left(TopHW(LH), 1) + CL;
+      
+   end Sqr_Word;
+   
 end W_Mul;
diff -uNr a/ffa/libffa/w_mul.ads b/ffa/libffa/w_mul.ads
--- a/ffa/libffa/w_mul.ads 0d60e63cf1527de3c76d96fe3f58151b0e02814eef0fba0d7f4ab5e28b938ffe0430d3b05b378695d942217110d323eb41934c818b187f5ee50d6c402ca35c57
+++ b/ffa/libffa/w_mul.ads cbc2184b13f61f3540baa6350673bc3735e29c3b9f4ea9b2ebcd7525d75a51cc9a11b84fdf5f92b61661daf3607643d6a935cb2113f8f5edd0ae46d37f41bb9f
@@ -52,4 +52,10 @@
                       XY_LW : out Word; XY_HW : out Word);
    pragma Inline_Always(Mul_Word);
    
+   -- Carry out X*X squaring, return lower word XX_LW and upper word XX_HW.
+   procedure Sqr_Word(X       : in  Word;
+                      XX_LW   : out Word;
+                      XX_HW   : out Word);
+   pragma Inline_Always(Sqr_Word);
+   
 end W_Mul;
diff -uNr a/ffa/libffa/words.ads b/ffa/libffa/words.ads
--- a/ffa/libffa/words.ads c84e7e88846a8c51ccd53f1b704b66f7e23107fd56da9dbd5054a55dba963e730a9f5a8e62846d5a451b3bbae26b976746547df526689ab7548b261fe6b4fb2d
+++ b/ffa/libffa/words.ads 864714838c024ee2b0f4a2f8b3fcf5ccae3b055429b3e1763fdfb596b9115bf964c95214674e8eea859ada4caf41691362df3083cb4ef25dd218bc9237f28dfa
@@ -49,4 +49,7 @@
    -- When we must refer to individual bit positions of a machine word:
    subtype WBit_Index is Natural range 0 .. Bitness - 1;
    
+   -- For when a computed Word is mandatorily expected to equal zero:
+   subtype WZeroOrDie is Word range 0 .. 0;
+   
 end Words;