-- clusterLibs -- calculate clusters by library, using the lib table -- read patterns from a table, first line is header {- import qualified Data.ByteString.Char8.Lazy as B import Data.Map (Map) import qualified Data.Map as M type Str = B.ByteString type Clusters = Map Str [Str] insert1 m k v = M.insertWith (flip (++)) k [v] m -} import System.Environment (getArgs) import Text.Regex import Data.List (elemIndex,sortBy,intersperse) type Cluster = (String,[String]) main :: IO () main = do [ps,cs] <- do [xs,ys] <- getArgs return [xs,ys] `catch` error "Usage: clusterlibs ps cs" pat <- readPatternTable ps clus <- classClusters pat cs writeClusters clus readPatternTable :: FilePath -> IO [(Regex,String)] readPatternTable f = do (h:ls) <- return . map words . lines =<< readFile f let (p,n) = case (elemIndex "Pattern" h, elemIndex "Name" h) of (Just x,Just y) -> (x,y) _ -> error ("Need both 'Pattern' and 'Name' headers in '"++f++"'") z l | length l < max p n = error ("Line in library table too short:\n"++show l) | otherwise = (mkRegex (l!!p),l!!n) return $ map z ls -- will need to match against all, to check for multiple matches -- tag names with library classClusters :: [(Regex,String)] -> FilePath -> IO [Cluster] classClusters ps f = return . getClusters (classify ps) . lines =<< readFile f classify :: [(Regex,String)] -> String -> String classify ps str = case concatMap (class1 str) ps of [] -> error ("no match for "++str++" in library table") [x] -> x++":"++str s@(_:_) -> error ("multiple matches for "++str++": "++show s) where class1 st (r,s) = maybe [] (const [s]) (matchRegex r st) getClusters :: (String -> String) -> [String] -> [(String,[String])] getClusters _ [] = [] getClusters _ [_] = error "odd number of cluster lines!" getClusters f (c:ss:rest) = case head $ words c of ('>':name) -> (name,map f $ words ss) : getClusters f rest _ -> error ("Cluster '"++c++"' didn't start with '>'") -- transpose and print writeClusters :: [Cluster] -> IO () writeClusters cs = do let cs' = map (\(h,s) -> (h:s)) cs cs'' = sortBy (\x y -> compare (length y) (length x)) cs' unwords' = concat . intersperse "," putStrLn $ unlines $ map unwords' $ transpose cs'' transpose :: [[String]] -> [[String]] transpose [] = [] transpose ls' = let ls = filter (not . null) ls' in map head ls : transpose (map tail ls)