{-# LANGUAGE OverloadedStrings #-} -- | This package provides a variety of utilities for parsing and producing -- SVMlight input files. module Math.SVM.SVMLight.Utils ( -- * Types Qid(..) , FeatureIdx(..) , Point(..) -- * Parsing SVMlight files , point , featureIdx -- * Generating SVMlight files , renderPoints , renderPoint ) where import Data.Monoid import Data.Foldable (foldMap) import Data.List (intersperse) import Data.Char (ord) import Control.Applicative import qualified Data.Map.Strict as M import Data.Attoparsec.ByteString.Char8 as AP import qualified Data.ByteString.Builder as BSB import qualified Data.ByteString.Char8 as BS newtype Qid = Qid Int deriving (Show, Ord, Eq) -- | A feature identifier newtype FeatureIdx = FIdx Int deriving (Show, Ord, Eq) featureIdx :: Parser FeatureIdx featureIdx = fmap FIdx decimal {-# INLINE featureIdx #-} qid :: Parser Qid qid = Qid <$> ("qid:" *> decimal) {-# INLINE qid #-} -- | A sample point (e.g. a line of an SVMlight input file). data Point = Point { pLabel :: !Int , pQid :: !(Maybe Qid) , pFeatures :: !(M.Map FeatureIdx Double) , pComment :: !(Maybe BS.ByteString) } deriving (Show, Ord, Eq) -- | Parse a sample point point :: Parser Point point = do label <- decimal skipSpace qid <- optional qid skipSpace features <- M.unions <$> feature `sepBy` char ' ' skipSpace comment <- optional $ do char '#' skipSpace takeTill (isEndOfLine . fromIntegral . ord) skipSpace return $ Point label qid features comment where feature = M.singleton <$> featureIdx <* char ':' <*> double {-# INLINE point #-} -- | A @Builder@ containing the given @Point@s renderPoints :: [Point] -> BSB.Builder renderPoints pts = mconcat $ intersperse "\n" $ map renderPoint pts {-# INLINEABLE renderPoints #-} -- | A @Builder@ containing the given @Point@ renderPoint :: Point -> BSB.Builder renderPoint pt = mconcat $ intersperse " " $ [BSB.intDec (pLabel pt)] ++ qid ++ vs ++ c where vs = map (\(FIdx i,v)->BSB.intDec i<>":"<>BSB.doubleDec v) $ M.assocs (pFeatures pt) c = maybe [] (\c->[" #"<>BSB.byteString c]) (pComment pt) qid = maybe [] (\(Qid q)->["qid:"<>BSB.intDec q]) (pQid pt) {-# INLINEABLE renderPoint #-}