{-# LANGUAGE OverloadedStrings #-}

-- | This package provides a variety of utilities for parsing and producing
-- SVMlight input files.

module Math.SVM.SVMLight.Utils
    ( -- * Types
      Qid(..)
    , FeatureIdx(..)
    , Point(..)
      -- * Parsing SVMlight files
    , point
    , featureIdx
      -- * Generating SVMlight files
    , renderPoints
    , renderPoint
    ) where

import Data.Monoid
import Data.Foldable (foldMap)
import Data.List (intersperse)
import Control.Applicative

import qualified Data.Map as M

import Data.Attoparsec.ByteString.Char8 as AP
import qualified Data.ByteString.Builder as BSB
import qualified Data.ByteString.Char8 as BS

newtype Qid = Qid Int
	    deriving (Show, Ord, Eq)

-- | A feature identifier
newtype FeatureIdx = FIdx Int
	           deriving (Show, Ord, Eq)

featureIdx :: Parser FeatureIdx
featureIdx = fmap FIdx decimal

qid :: Parser Qid
qid = Qid <$> ("qid:" *> decimal)

-- | A sample point (e.g. a line of an SVMlight input file).
data Point = Point { pLabel    :: Int
                   , pQid      :: Maybe Qid
                   , pFeatures :: M.Map FeatureIdx Double
                   , pComment  :: Maybe BS.ByteString
                   }
           deriving (Show, Ord, Eq)

-- | Parse a sample point
point :: Parser Point
point =  do
    label <- decimal
    skipSpace
    qid <- optional qid
    skipSpace
    features <- feature `sepBy'` char ' '
    skipSpace
    comment <- optional $ do
      char '#'
      BS.pack <$> anyChar `manyTill` endOfLine
    skipSpace
    return $ Point label qid (M.fromList features) comment
  where
    feature = (,) <$> featureIdx <* char ':' <*> double

-- | A @Builder@ containing the given @Point@s
renderPoints :: [Point] -> BSB.Builder
renderPoints pts = mconcat $ intersperse "\n" $ map renderPoint pts

-- | A @Builder@ containing the given @Point@
renderPoint :: Point -> BSB.Builder
renderPoint pt =
    mconcat $ intersperse " " $ [BSB.intDec (pLabel pt)] ++ qid ++ vs ++ c
  where
    vs = map (\(FIdx i,v)->BSB.intDec i<>":"<>BSB.doubleDec v) $ M.assocs (pFeatures pt)
    c = maybe [] (\c->[" #"<>BSB.byteString c]) (pComment pt)
    qid = maybe [] (\(Qid q)->["qid:"<>BSB.intDec q]) (pQid pt)