{-
Copyright 2013, 2014 Marcelo Garlet Millani
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{-|
Module      : Simtreelo
Description : 
Copyright   : (c) Marcelo Garlet Millani
License     : GPL-3
Maintainer  : marcelogmillani@gmail.com
Stability   : experimental

This library loads trees that are described through text. In order to preserve readability, the hierarchy of the tree is give exclusively by the indentation level, which can be any number of spaces or tabs, as long as coherence is kept.

The first line of the file describes the comment pattern. Every line contains one node of the tree, and the parent is determined by the indentation level, which is given by tabs or spaces. Whitespaces on the right of names are ignored.
If the first line is empty, comments will be disabled.

Example file:

>--
>A node -- this is a comment
>  Child -- indentation is given by 2 spaces here
>Brother
>  Debora
>    Edward

-}
module Data.Simtreelo(loadString, loadFile) where

import Data.Tree

-- | The input 'String' must be organized in such a way that every child is one indentation lower than its parent, and all siblings have the same indentation.
--
-- The 'String' used for indentation is inferred from the first indentation depth
--
-- The entire first line (except the newline character) represents the beginning of a comment
--
-- Returns the first error message on failure, or the 'Tree' on success
loadString str = do
	let (first:s:r) = lines str
	let (_,spaces) = separate' s
	(tree,_,_) <- if spaces /= [] then
		parse (s:r) 2 1 (Just spaces) first
	else
		parse (s:r) 2 0 Nothing first
	return tree

-- | Just applies loadString to the contents of the given file
loadFile fname = do
	str <- readFile fname
	return $ loadString str

parse [] ln _ _ _ = Right ([],[],ln)
parse (h:r) ln d Nothing comment = do
	let (name,spaces) = separate' h
	if (strip name comment) == "" then parse r (ln+1) d Nothing comment
	else do
		let (indentor,depth) = if spaces == "" then (Nothing,0) else (Just spaces,1)
		(children,rest,ln') <- parse r (ln + 1) (d + 1) indentor comment
		if depth == d then do
			(siblings,rest',ln'') <- parse rest ln' d indentor comment
			return (Node{rootLabel = (strip name comment), subForest = children}:siblings, rest',ln'')
		else return ([],(h:r),ln)
parse (h:r) ln d (Just indentor) comment = do
	(name,depth) <- separate h indentor ln
	if (strip name comment) == "" then parse r (ln+1) d (Just indentor) comment
	else do
		(children,rest,ln') <- parse r (ln + 1) (d + 1) (Just indentor) comment
		if depth == d then do
			(siblings,rest',ln'') <- parse rest ln' d (Just indentor) comment
			return (Node{rootLabel = (strip name comment), subForest = children}:siblings, rest',ln'')
		else return ([],(h:r),ln)

-- checks if the first argument is a prefix of the second
-- returns whether this is true and what is left of the second after the prefix
isPrefix [] r = (True,r)
isPrefix i [] = (False,[])
isPrefix (hi:ri) (h:r)
	| h == hi =
		let (prefix,rest) = isPrefix ri r in
		(prefix,if prefix then rest else h:r)
	| otherwise = (False,h:r)

separate' [] = ([],[])
separate' (h:r)
	| h == ' ' || h == '\t' = let (name,spaces) = separate' r in
		(name,h:spaces)
	| otherwise = (h:r,"")

strip [] _ = []
strip (h:r) [] =
	let rest = strip r [] in
	if rest /= "" then h:rest
	else
		if h == ' ' || h == '\t' then "" else [h]
strip (h:r) comment =
	let (prefix,_) = isPrefix comment (h:r) in
	if prefix then ""
	else
		let rest = strip r comment in
		if rest /= "" then h:rest
		else
			if h == ' ' || h == '\t' then "" else [h]

separate a [] _ = Right (a,0)
separate [] _ _ = Right ([],0)
separate line indentor lineNumber = do
	let (prefix,rest) = isPrefix indentor line
	if prefix then do
		(name,depth) <- separate rest indentor lineNumber
		return (name,depth + 1)
	else if head rest == ' ' || head rest == '\t' then
		Left $ "Invalid indentation at line " ++ show lineNumber
	else
		Right (line,0)