-- Copyright (c) 2006-2011, David Amos. All rights reserved.

{-# LANGUAGE BangPatterns #-}

-- |A module for finding prime factors.
module Math.NumberTheory.Factor (module Math.NumberTheory.Prime,
                                 pfactors, ppfactors, pfactorsTo, ppfactorsTo) where

import Control.Arrow (second, (&&&))
import Data.Either (lefts)
import Data.List as L
import Math.Core.Utils (multisetSumAsc)
import Math.NumberTheory.Prime

-- |List the prime factors of n (with multiplicity). For example:
-- >>> pfactors 60
-- [2,2,3,5]
--
-- This says that 60 = 2 * 2 * 3 * 5
-- 
-- The algorithm uses trial division to find small factors,
-- followed if necessary by the elliptic curve method to find larger factors.
-- The running time increases with the size of the second largest prime factor of n.
-- It can find 10-digit prime factors in seconds, but can struggle with 20-digit prime factors.
pfactors :: Integer -> [Integer]
pfactors n | n > 0 = pfactors' n $ takeWhile (< 10000) primes
           | n < 0 = -1 : pfactors' (-n) (takeWhile (< 10000) primes)
    where pfactors' n (d:ds) | n == 1 = []
                             | n < d*d = [n]
                             | r == 0 = d : pfactors' q (d:ds)
                             | otherwise = pfactors' n ds
                             where (q,r) = quotRem n d
          pfactors' n [] = pfactors'' n
          pfactors'' n = if isMillerRabinPrime n then [n]
                         else let d = findFactorParallelECM n -- findFactorECM n
                              in multisetSumAsc (pfactors'' d) (pfactors'' (n `div` d))

-- |List the prime power factors of n. For example:
-- >>> ppfactors 60
-- [(2,2),(3,1),(5,1)]
--
-- This says that 60 = 2^2 * 3^1 * 5^1
ppfactors :: Integer -> [(Integer,Int)]
ppfactors = map (head &&& length) . L.group . pfactors
-- ppfactors = map (\xs -> (head xs, length xs)) . L.group . pfactors

-- |Find the prime factors of all numbers up to n. Thus @pfactorsTo n@ is equivalent to @[(m, pfactors m) | m <- [1..n]]@,
-- except that the results are not returned in order. For example:
-- >>> pfactorsTo 10
-- [(8,[2,2,2]),(4,[2,2]),(6,[3,2]),(10,[5,2]),(2,[2]),(9,[3,3]),(3,[3]),(5,[5]),(7,[7]),(1,[])]
--
-- @pfactorsTo n@ is significantly faster than @map pfactors [1..n]@ for larger n.
pfactorsTo n = pfactorsTo' (1,[]) primes where
    pfactorsTo' (!m,!qs) ps@(ph:pt) | m' > n = [(m,qs)]
                                    | otherwise = pfactorsTo' (m',ph:qs) ps ++ pfactorsTo' (m,qs) pt
        where m' = m*ph
-- We avoid a reverse call, because it does make a noticeable difference to the speed.

-- |Find the prime power factors of all numbers up to n. Thus @ppfactorsTo n@ is equivalent to @[(m, ppfactors m) | m <- [1..n]]@,
-- except that the results are not returned in order. For example:
-- >>> ppfactorsTo 10
-- [(8,[(2,3)]),(4,[(2,2)]),(6,[(3,1),(2,1)]),(10,[(5,1),(2,1)]),(2,[(2,1)]),(9,[(3,2)]),(3,[(3,1)]),(5,[(5,1)]),(7,[(7,1)]),(1,[])]
--
-- @ppfactorsTo n@ is significantly faster than @map ppfactors [1..n]@ for larger n.
ppfactorsTo = map (second (map (head &&& length) . L.group)) . pfactorsTo


-- Cohen, A Course in Computational Algebraic Number Theory, p488

-- return (u,v,d) s.t ua+vb = d, with d = gcd a b
extendedEuclid a b
    | b == 0 = (1,0,a)
    | otherwise = let (q,r) = a `quotRem` b        -- a == qb + r
                      (s,t,d) = extendedEuclid b r -- s*b+t*r == d
                  in (t,s-q*t,d)                   -- s*b+t*(a-q*b) == d

-- ELLIPTIC CURVE ARITHMETIC

data EllipticCurve = EC Integer Integer Integer deriving (Eq, Show)
-- EC p a b represents the curve y^2 == x^3+ax+b over Fp

data EllipticCurvePt = Inf | P Integer Integer deriving (Eq, Show)
-- P x y

isEltEC _ Inf = True
isEltEC (EC n a b) (P x y) = (y*y - x*x*x - a*x - b) `mod` n == 0


-- Koblitz p34

-- Addition in an elliptic curve, with bailout if the arithmetic fails (giving a potential factor of n)
ecAdd _ Inf pt = Right pt
ecAdd _ pt Inf = Right pt
ecAdd (EC n a b) (P x1 y1) (P x2 y2)
    | x1 /= x2 = let (_,v,d) = extendedEuclid n ((x1-x2) `mod` n)  -- we're expecting d == 1, v == 1/(x1-x2) (mod n)
                     m = (y1-y2) * v `mod` n
                     x3 = (m*m - x1 - x2) `mod` n
                     y3 = (-y1 + m*(x1 - x3)) `mod` n
                  in if d == 1 then Right (P x3 y3) else Left d
    | x1 == x2 = if (y1 + y2) `mod` n == 0  -- includes the case y1 == y2 == 0
                 then Right Inf
                 else let (_,v,d) = extendedEuclid n ((2*y1) `mod` n)  -- we're expecting d == 1, v == 1/(2*y1) (mod n)
                          m = (3*x1*x1 + a) * v `mod` n
                          x3 = (m*m - 2*x1) `mod` n
                          y3 = (-y1 + m*(x1 - x3)) `mod` n
                      in if d == 1 then Right (P x3 y3) else Left d
-- Note that b isn't actually used anywhere

-- Note, only the final `mod` n calls when calculating x3, y3 are necessary
-- and the code is faster if the others are removed

-- Scalar multiplication in an elliptic curve
ecSmult _ 0 _ = Right Inf
ecSmult ec k pt | k > 0 = ecSmult' k pt Inf
    where -- ecSmult' k q p = k * q + p
          ecSmult' 0 _ p = Right p
          ecSmult' i q p = let p' = if odd i then ecAdd ec p q else Right p
                               q' = ecAdd ec q q
                           in case (p',q') of
                              (Right p'', Right q'') -> ecSmult' (i `div` 2) q'' p''
                              (Left _, _) -> p'
                              (_, Left _) -> q'

-- ELLIPTIC CURVE FACTORISATION

-- We choose an elliptic curve E over Zn, and a point P on the curve
-- We then try to calculate kP, for suitably chosen k
-- What we are hoping is that at some stage we will fail because we can't invert an element in Zn
-- This will lead to finding a non-trivial factor of n

discriminantEC a b = 4 * a * a * a + 27 * b * b

-- perform a sequence of scalar multiplications in the elliptic curve, hoping for a bailout
ecTrial ec@(EC n a b) ms pt
    | d == 1 = ecTrial' ms pt
    | otherwise = Left d
    where d = gcd n (discriminantEC a b)
          ecTrial' [] pt = Right pt
          ecTrial' (m:ms) pt = case ecSmult ec m pt of
                               Right pt' -> ecTrial' ms pt'
                               Left d -> Left d
-- In effect, we're calculating ecSmult ec (product ms) pt, but an m at a time

l n = exp (sqrt (log n * log (log n)))
-- L(n) is some sort of measure of the average smoothness of numbers up to n
-- # [x <= n | x is L(n)^a-smooth] = n L(n)^(-1/2a+o(1))  -- Cohen p 482

-- q is the largest prime we're looking for - normally sqrt n
-- the b figure here is from Cohen p488
multipliers q = [p' | p <- takeWhile (<= b) primes, let p' = last (takeWhile (<= b) (powers p))]
    where b = round ((l q) ** (1/sqrt 2))
          powers x = iterate (*x) x

findFactorECM n | gcd n 6 == 1 =
    let ms = multipliers (sqrt $ fromInteger n)
    in head $ filter ( (/= 0) . (`mod` n) ) $
       lefts [ecTrial (EC n a 1) ms (P 0 1) | a <- [1..] ]
    -- the filter is because d might be a multiple of n,
    -- for example if the problem was that the discriminant was divisible by n

-- TESTING MULTIPLE CURVES IN PARALLEL

-- Cohen p489
-- find inverse of as mod n in parallel, or a non-trivial factor of n
parallelInverse n as = if d == 1 then Right bs else Left $ head [d' | a <- as, let d' = gcd a n, d' /= 1]
    where c:cs = reverse $ scanl (\x y -> x*y `mod` n) 1 as
          ds = scanl (\x y -> x*y `mod` n) 1 (reverse as)
          (u,_,d) = extendedEuclid c n
          bs = reverse [ u*nota `mod` n | nota <- zipWith (*) cs ds]
-- let m = length as
-- then the above code requires O(m) mod calls - in fact 3m-3 calls (?)

parallelEcAdd n ecs ps1 ps2 =
    case parallelInverse n (zipWith f ps1 ps2) of
    Right invs -> Right [g ec p1 p2 inv | (ec,p1,p2,inv) <- L.zip4 ecs ps1 ps2 invs]
    Left d -> Left d
    where f Inf pt = 1
          f pt Inf = 1
          f (P x1 y1) (P x2 y2) | x1 /= x2 = x1-x2 -- slightly faster not to `mod` n here
                                | x1 == x2 = 2*y1 -- slightly faster not to `mod` n here
          -- inverses = parallelInverse n $ zipWith f ps1 ps2
          g _ Inf pt _ = pt
          g _ pt Inf _ = pt
          g (EC n a b) (P x1 y1) (P x2 y2) inv
              | x1 /= x2 = let m = (y1-y2) * inv -- slightly faster not to `mod` n here
                               x3 = (m*m - x1 - x2) `mod` n
                               y3 = (-y1 + m*(x1 - x3)) `mod` n
                           in P x3 y3
              | x1 == x2 = if (y1 + y2) `elem` [0,n] -- `mod` n == 0  -- includes the case y1 == y2 == 0
                           then Inf
                           else let m = (3*x1*x1 + a) * inv -- slightly faster not to `mod` n here
                                    x3 = (m*m - 2*x1) `mod` n
                                    y3 = (-y1 + m*(x1 - x3)) `mod` n
                                 in P x3 y3

parallelEcSmult _ _ 0 pts = Right $ map (const Inf) pts
parallelEcSmult n ecs k pts | k > 0 = ecSmult' k pts (map (const Inf) pts)
    where -- ecSmult' k qs ps = k * qs + ps
          ecSmult' 0 _ ps = Right ps
          ecSmult' k qs ps = let ps' = if odd k then parallelEcAdd n ecs ps qs else Right ps
                                 qs' = parallelEcAdd n ecs qs qs
                             in case (ps',qs') of
                                (Right ps'', Right qs'') -> ecSmult' (k `div` 2) qs'' ps''
                                (Left _, _) -> ps'
                                (_, Left _) -> qs'

parallelEcTrial n ecs ms pts
    | all (==1) ds = ecTrial' ms pts
    | otherwise = Left $ head $ filter (/=1) ds
    where ds = [gcd n (discriminantEC a b) | EC n a b <- ecs]
          ecTrial' [] pts = Right pts
          ecTrial' (m:ms) pts = case parallelEcSmult n ecs m pts of
                                Right pts' -> ecTrial' ms pts'
                                Left d -> Left d

findFactorParallelECM n | gcd n 6 == 1 =
    let ms = multipliers (sqrt $ fromInteger n)
    in head $ filter ( (/= 0) . (`mod` n) ) $
       lefts [parallelEcTrial n [EC n (a+i) 1 | i <- [1..100]] ms (replicate 100 (P 0 1)) | a <- [0,100..] ]
-- 100 at a time is chosen heuristically.