{-# LANGUAGE NamedFieldPuns, RecordWildCards, DeriveDataTypeable #-}
{-# OPTIONS_HADDOCK prune #-}
module Language.Pads.Source where
import qualified Data.ByteString as B
import qualified Text.Regex.Posix as TRP
import Language.Pads.RegExp
import Text.PrettyPrint.Mainland as PP
import Text.PrettyPrint.Mainland.Class
import Data.Int
import Data.Data
import Data.Word
import Data.Char
import Data.Bits (shiftR, shiftL, (.&.))
import qualified Data.ByteString.Char8 as Char8
type RawStream = B.ByteString
data Source = Source
{ current :: B.ByteString
, rest :: B.ByteString
, loc :: Loc
, bit :: Int
, disc :: RecordDiscipline
, eorAtEOF :: Bool
}
data RecordDiscipline =
Single Word8
| Multi B.ByteString
| Bytes Int
| NoPartition
| NoDiscipline
newline = Single (chrToWord8 '\n')
windows = Multi (B.pack (strToWord8s "\r\n"))
bytes n = Bytes n
none = NoPartition
data Loc = Loc
{ recordNumber :: Int64
, byteOffset :: Int64
} deriving (Typeable, Data,Eq, Ord, Show)
data Span = Span
{ begin :: Loc
, end :: Maybe Loc
} deriving (Typeable, Data, Eq, Ord, Show)
zeroLoc = Loc {recordNumber = 0, byteOffset = 0}
zeroSpan = locToSpan zeroLoc
zeroBit = 7
incRecordNumber :: Loc -> Loc
incRecordNumber Loc{recordNumber, ..} = Loc{ recordNumber = recordNumber+1
, byteOffset = 0}
decLineNumber :: Loc -> Loc
decLineNumber Loc{recordNumber, ..} = Loc{recordNumber=recordNumber-1, byteOffset=0}
incOffset :: Loc -> Loc
incOffset l@Loc{byteOffset} = l { byteOffset = byteOffset + 1 }
incOffsetBy :: Loc -> Int -> Loc
incOffsetBy l@Loc{byteOffset} n = l { byteOffset = byteOffset + fromIntegral n }
decOffset :: Loc -> Loc
decOffset l@Loc{byteOffset} = l { byteOffset = byteOffset - 1 }
getSrcLoc :: Source -> Loc
getSrcLoc = loc
getRecordDiscipline :: Source -> RecordDiscipline
getRecordDiscipline = disc
emptySource = Source
{ current = B.empty
, rest = B.empty
, loc = zeroLoc
, bit = zeroBit
, eorAtEOF = False
, disc = newline
}
padsSourceFromString :: String -> Source
padsSourceFromString str = padsSourceFromByteString (strToByteString str)
padsSourceFromStringWithDisc :: RecordDiscipline -> String -> Source
padsSourceFromStringWithDisc d str = padsSourceFromByteStringWithDisc d (strToByteString str)
padsSourceFromFile :: FilePath -> IO Source
padsSourceFromFile file = do
bs <- B.readFile file
return (padsSourceFromByteString bs)
padsSourceFromFileWithDisc :: RecordDiscipline -> FilePath -> IO Source
padsSourceFromFileWithDisc d file = do
bs <- B.readFile file
return (padsSourceFromByteStringWithDisc d bs)
padsSourceFromByteString :: B.ByteString -> Source
padsSourceFromByteString bs =
let rawSource = Source{ current = B.empty
, rest = bs
, loc = zeroLoc
, bit = zeroBit
, disc = newline
, eorAtEOF = False
}
in getNextRecord rawSource
padsSourceFromByteStringWithDisc :: RecordDiscipline -> B.ByteString -> Source
padsSourceFromByteStringWithDisc d bs =
let rawSource = Source{ current = B.empty
, rest = bs
, loc = zeroLoc
, bit = zeroBit
, disc = d
, eorAtEOF = False
}
in getNextRecord rawSource
isEOF :: Source -> Bool
isEOF (s @ Source{current, rest, eorAtEOF, ..}) = B.null current && B.null rest && not eorAtEOF
isEOR :: Source -> Bool
isEOR = B.null . current
getNextRecord :: Source -> Source
getNextRecord (s @ Source {current, rest, loc, bit, disc, eorAtEOF}) =
if isEOF s then s
else if eorAtEOF || B.null rest then
(Source {current = B.empty, rest = B.empty, loc = incRecordNumber loc, bit = zeroBit, disc, eorAtEOF = False})
else (Source {current = nextLine, rest=residual, loc = incRecordNumber loc, bit = zeroBit, disc, eorAtEOF = eorAtEOF'})
where (nextLine, residual, eorAtEOF') = breakUsingDisc rest disc
srcLineBegin :: Source -> (Maybe String, Source)
srcLineBegin s = (Nothing, s)
srcLineEnd :: Source -> (Maybe String, Source)
srcLineEnd s = if isEOF s
then (Just "Found EOF when looking for EOR", s)
else (Nothing, getNextRecord s)
setRecordDiscipline :: RecordDiscipline -> Source -> ((),Source)
setRecordDiscipline r s =
let s' = unputCurrentLine s
s'' = s'{disc = r}
in ((),getNextRecord s'')
unputCurrentLine :: Source -> Source
unputCurrentLine (s @ Source {current, rest, loc, disc, eorAtEOF}) =
if isEOF s then s
else case disc of
Single n -> let rest' = if B.null rest
then if eorAtEOF
then B.concat [current, B.singleton n]
else current
else B.concat [current, B.singleton n, rest]
loc' = if B.null current then loc else decLineNumber loc
in Source {current = B.empty, rest = rest', loc = loc', bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
Multi br -> let rest' = if B.null rest
then if eorAtEOF
then B.concat [current, br]
else current
else B.concat [current, rest]
loc' = if B.null current then loc else decLineNumber loc
in Source {current = B.empty, rest = rest', loc = loc', bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
Bytes n -> Source {current = B.empty, rest = B.append current rest, loc = decLineNumber loc, bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
NoPartition -> Source {current = B.empty, rest = current, loc = decLineNumber loc, bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
NoDiscipline -> s
breakUsingDisc :: B.ByteString -> RecordDiscipline -> (B.ByteString, B.ByteString, Bool)
breakUsingDisc bs rd = case rd of
Single n -> let (nextLine, raw_residual) = B.break (\c->c == n) bs
residual = B.drop 1 raw_residual
eorAtEOF = (B.null residual) && (not $ B.null raw_residual)
in (nextLine, residual, eorAtEOF)
Multi s -> let (nextLine, raw_residual) = B.breakSubstring s bs
residual = B.drop (B.length s) raw_residual
eorAtEOF = (B.null residual) && (not $ B.null raw_residual)
in (nextLine, residual, eorAtEOF)
Bytes n -> let (nextLine, residual) = B.splitAt n bs
in (nextLine, residual, False)
NoPartition -> (bs, B.empty, False)
NoDiscipline -> error "Pads Source: Attempt to partition source using internal discipline 'NoDiscipline'"
padsSourceToString :: Source -> String
padsSourceToString = (map word8ToChr) . B.unpack . padsSourceToByteString
padsSourceToByteString :: Source -> B.ByteString
padsSourceToByteString = rest . unputCurrentLine
drainSource :: Source -> (String, Source)
drainSource s = (padsSourceToString s, emptySource)
drainSourceNB :: Source -> (String, Source)
drainSourceNB (s @ Source{current,loc, ..}) =
let len = (B.length current) - (if bit == zeroBit then 0 else 1)
(bs, s') = takeBits (len * 8) s
in (map word8ToChr (numToWord8s bs []), emptySource)
rawSource :: Source -> (B.ByteString, Source)
rawSource s = (padsSourceToByteString s, emptySource)
restRec :: Source -> String
restRec = byteStringToStr . current
head :: Source -> Char
head = word8ToChr . headOrZero . current
headOrZero s = if B.null s then chrToWord8 '\0' else B.head s
peekHeadM :: Source -> (Maybe Char, Source)
peekHeadM (s @ Source{current,loc, ..}) =
if B.null current then (Nothing, s) else (Just (Language.Pads.Source.head s), s)
takeHead :: Source -> (Char, Source)
takeHead (s @ Source{current,loc, ..}) =
(word8ToChr $ B.head current, s{current = B.tail current, loc = incOffset loc})
partitionBS :: Integral a => B.ByteString -> a -> a -> (B.ByteString, B.ByteString, Bool)
partitionBS bS bitIndex bits =
let part b bs = if bs > b + 1 then 1 + part zeroBit (bs - (b + 1)) else 1
byteAlign = (bits - (bitIndex + 1)) `mod` 8 == 0
withinByte = bits <= bitIndex + 1
hd = B.take (part (fromIntegral bitIndex) (fromIntegral bits)) bS
tl = B.drop (B.length hd - if not byteAlign then 1 else 0) bS
in (hd, tl, withinByte || not byteAlign)
accumulate :: Integral a => a -> (a, Int) -> (a, Int)
accumulate byte (num, pow) = ((byte * (256 ^ pow)) + num, pow + 1)
takeBits8 :: Integral a => a -> Source -> (Word8, Source)
takeBits8 b (s @ Source{current,loc,bit, ..}) =
let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
bS = map (\x -> fromIntegral x :: Word16) (B.unpack $ B.take 2 hd)
bytes = fst $ foldr accumulate (0,0) bS
mask = (2 ^ b) - 1
bits = mask .&. shiftR bytes ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
in (fromIntegral bits, s{current = tl,
loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
bit = (zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8))})
takeBits16 :: Integral a => a -> Source -> (Word16, Source)
takeBits16 b (s @ Source{current,loc,bit, ..}) =
let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
bS = map (\x -> fromIntegral x :: Word32) (B.unpack $ B.take 3 hd)
bytes = fst $ foldr accumulate (0,0) bS
mask = (2 ^ b) - 1
bits = mask .&. shiftR bytes ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
in (fromIntegral bits, s{current = tl,
loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
bit = (zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8))})
takeBits32 :: Integral a => a -> Source -> (Word32, Source)
takeBits32 b (s @ Source{current,loc,bit, ..}) =
let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
bS = map (\x -> fromIntegral x :: Word64) (B.unpack $ B.take 5 hd)
bytes = fst $ foldr accumulate (0,0) bS
mask = (2 ^ b) - 1
bits = mask .&. shiftR bytes ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
in (fromIntegral bits, s{current = tl,
loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
bit = zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8)})
takeBits64 :: Integral a => a -> Source -> (Word64, Source)
takeBits64 b s = let (bits, s') = takeBits b s
in (fromIntegral bits, s')
tobinary :: Integer -> Integer
tobinary x
| (div x 2) == 0 = x
| otherwise = (mod x 2) + (10 * (tobinary $ div x 2))
takeBits :: Integral a => a -> Source -> (Integer, Source)
takeBits b (s @ Source{current,loc,bit, ..}) =
let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
bS = map fromIntegral (B.unpack hd)
bytes = fst $ foldr accumulate (0,0) bS
mask = (2 ^ b) - 1
shiftAmt = max 0 ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
bits = mask .&. shiftR bytes shiftAmt
in (bits, s{current = tl,
loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
bit = zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8)})
takeHeadM :: Source -> (Maybe Char, Source)
takeHeadM (s @ Source{current,loc, ..}) =
if B.null current then (Nothing, s)
else (Just $ word8ToChr $ B.head current, s{current = B.tail current, loc = incOffset loc})
takeHeadStr :: String -> Source -> (Bool, Source)
takeHeadStr str s =
let pstr = strToByteString str
in if B.isPrefixOf pstr (current s)
then let (res,source) = Language.Pads.Source.take (B.length pstr) s
in (True, source)
else (False, s)
matchString :: String -> Source -> Maybe(String, Source)
matchString str s =
let pstr = strToByteString str
in if B.isPrefixOf pstr (current s)
then let (res,source) = Language.Pads.Source.take (B.length pstr) s
in Just(str, source)
else Nothing
breakSubstring :: B.ByteString
-> B.ByteString
-> (B.ByteString,B.ByteString)
breakSubstring pat src = search 0 src
where
search :: Int -> B.ByteString -> (B.ByteString, B.ByteString)
search a b | a `seq` b `seq` False = undefined
search n s
| B.null s = (src,B.empty)
| pat `B.isPrefixOf` s = (B.take n src,s)
| otherwise = search (n+1) (B.tail s)
scanStr :: String -> Source -> (Maybe String, Source)
scanStr str (s @ Source{current,loc, ..}) =
let pat = strToByteString str
(before,after) = breakSubstring pat current
in if B.null after then (Nothing, s)
else let len = B.length pat
in (Just (byteStringToStr before),
s{current = B.drop len after, loc = incOffsetBy loc len})
scanString :: String -> Source -> Maybe (String, Source)
scanString str (s @ Source{current,loc, ..}) =
let pat = strToByteString str
(before,after) = breakSubstring pat current
in if B.null after then Nothing
else let len = B.length pat
in Just (byteStringToStr before, s{current= B.drop len after, loc = incOffsetBy loc len})
satisfyNB :: (Char -> Bool) -> Source -> (String, Source)
satisfyNB p s =
let (c, s') = takeBits8 8 s
c' = word8ToChr c
in if p c'
then (c' : (fst $ satisfyNB p s'), snd $ satisfyNB p s')
else ([], s)
takeBytes :: Int -> Source -> (B.ByteString, Source)
takeBytes n (s @ Source{current,loc, ..}) =
let (head, tail) = B.splitAt n current
incOffset = B.length head
in (head, s{current= tail, loc = incOffsetBy loc incOffset})
takeBytesNB :: Int -> Source -> (B.ByteString, Source)
takeBytesNB n s =
let (bits, s') = takeBits (n * 8) s
numToBS x = B.pack $ numToWord8s x []
in (numToBS bits, s')
numToWord8s :: Integral a => a -> [Word8] -> [Word8]
numToWord8s x accum
| x < 256 = fromIntegral x : accum
| otherwise = numToWord8s (x `div` 256) (fromIntegral (x `mod` 256) : accum)
take :: Int -> Source -> (String, Source)
take n s = let (bs, s') = takeBytes n s
in (byteStringToStr bs, s')
regexMatch :: RE -> Source -> (Maybe String, Source)
regexMatch (RE re_str_raw) (s @ Source{current,loc,..}) =
let (before, match, after) = current TRP.=~ (strToByteString('^' : re_str_raw))
in if not (B.null before) then (Nothing, s)
else (Just (byteStringToStr match), s{current=after, loc=incOffsetBy loc (fromIntegral (B.length match))})
regexMatch (REd re_str_raw def ) s = regexMatch (RE re_str_raw) s
regexStop :: RE -> Source -> (Maybe String, Source)
regexStop (RE re_str_raw) (s @ Source{current,loc,..}) =
let packed = strToByteString re_str_raw
(before, match, after) = current TRP.=~ packed
isMatch = current TRP.=~ packed
in if not isMatch
then (Nothing, s)
else (Just (byteStringToStr before),
s{current= B.append match after,loc=incOffsetBy loc (fromIntegral (B.length before))})
regexStop (REd re_str_raw def) s = regexStop (RE re_str_raw) s
span p (s @ Source{current,loc,..}) =
let (head, tail) = B.span p current
incOffset = B.length head
in (B.unpack head, s{current=tail, loc = incOffsetBy loc incOffset})
whileS :: (Char -> Bool) -> Source -> Maybe (String,Source)
whileS p (s @ Source{current,loc,..}) =
let (head, tail) = B.span (p . word8ToChr) current
incOffset = B.length head
in Just (byteStringToStr head, s{current=tail, loc=incOffsetBy loc incOffset})
tail (s @ Source{current,loc,..}) =
(s{current=B.tail current,loc=incOffset loc})
scanTo :: Char -> Source -> (Bool, Source, Span)
scanTo chr (src @ Source{current,loc, ..}) =
let begin = getSrcLoc src
(skipped, residual) = B.break (\c->c== (chrToWord8 chr)) current
(found,remaining,incAmount) =
if B.null residual then
(False, residual, B.length skipped)
else (True, B.tail residual, (B.length skipped) + 1)
newLoc = incOffsetBy loc incAmount
endErrLoc = incOffsetBy loc (B.length skipped)
in (found,
src {current = remaining, loc=newLoc},
Span {begin, end=Just endErrLoc})
lift :: (String -> [(a, String)]) -> (Source -> (Maybe a, Source))
lift f s = case f (byteStringToStr $ current s) of
[] -> (Nothing, s)
(x,residual):rest -> (Just x, s{current= (strToByteString residual)})
eqCurrent :: Source -> Source -> Bool
eqCurrent s s'= current s == current s'
chrToWord8 :: Char -> Word8
chrToWord8 c = toEnum $ fromEnum c
strToWord8s :: String -> [Word8]
strToWord8s = map chrToWord8
word8ToChr :: Word8 -> Char
word8ToChr = toEnum . fromEnum
word8sToStr :: [Word8] -> String
word8sToStr = map word8ToChr
byteStringToStr :: B.ByteString -> String
byteStringToStr = word8sToStr . B.unpack
strToByteString :: String -> B.ByteString
strToByteString = Char8.pack
locToSpan :: Loc -> Span
locToSpan loc = Span { begin = loc, end = Nothing }
locsToSpan :: Loc -> Loc -> Span
locsToSpan b e = Span {begin = b, end = Just e}
instance Pretty Source where
ppr (Source{current, rest, ..}) = text "Current:" <+> text (show current)
instance Pretty Loc where
ppr (Loc{recordNumber,byteOffset}) = text "Line:" <+> ppr recordNumber <> text ", Offset:" <+> ppr byteOffset
instance Pretty Span where
ppr (Span{begin,end}) = case end of
Nothing -> ppr begin
Just end_loc -> text "from:" <+> ppr begin <+> text "to:" <+> ppr end_loc