{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}

module Data.Warc.Header
    ( -- * Parsing
      header
      -- * Encoding
    , encodeHeader
      -- * WARC Version
    , Version(..)
    , warc0_16
      -- * Types
    , RecordHeader(..)
    , WarcType(..)
    , RecordId(..)
    , TruncationReason(..)
    , Digest(..)
    , Uri(..)
      -- * Header field types
    , Field(..)
    , FieldName(..)
    , field
    , lookupField
    , addField
    , mapField
    , rawField
      -- ** Standard fields
    , warcRecordId
    , contentLength
    , warcDate
    , warcType
    , contentType
    , warcConcurrentTo
    , warcBlockDigest
    , warcPayloadDigest
    , warcIpAddress
    , warcRefersTo
    , warcTargetUri
    , warcTruncated
    , warcWarcinfoID
    , warcFilename
    , warcProfile
    , warcSegmentNumber
    , warcSegmentTotalLength
      -- * Lenses
    , recWarcVersion, recHeaders
    ) where

import Control.Applicative
import Control.Monad (void, guard)
import Data.Maybe (catMaybes)
import Data.Monoid ((<>))
import Data.Time.Clock
import Data.Time.Format
import Data.Char (ord)
import Data.String (IsString)

import Data.Attoparsec.ByteString.Char8 as A
import qualified Data.Attoparsec.ByteString.Lazy as AL
import qualified Data.HashMap.Strict as HM
import Data.Hashable (Hashable(..))
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Builder as BB

import Control.Lens

withName :: String -> Parser a -> Parser a
withName :: String -> Parser a -> Parser a
withName String
name Parser a
parser = Parser a
parser Parser a -> String -> Parser a
forall i a. Parser i a -> String -> Parser i a
<?> String
name

data Version = Version {Version -> Int
versionMajor, Version -> Int
versionMinor :: !Int}
             deriving (Int -> Version -> ShowS
[Version] -> ShowS
Version -> String
(Int -> Version -> ShowS)
-> (Version -> String) -> ([Version] -> ShowS) -> Show Version
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Version] -> ShowS
$cshowList :: [Version] -> ShowS
show :: Version -> String
$cshow :: Version -> String
showsPrec :: Int -> Version -> ShowS
$cshowsPrec :: Int -> Version -> ShowS
Show, ReadPrec [Version]
ReadPrec Version
Int -> ReadS Version
ReadS [Version]
(Int -> ReadS Version)
-> ReadS [Version]
-> ReadPrec Version
-> ReadPrec [Version]
-> Read Version
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Version]
$creadListPrec :: ReadPrec [Version]
readPrec :: ReadPrec Version
$creadPrec :: ReadPrec Version
readList :: ReadS [Version]
$creadList :: ReadS [Version]
readsPrec :: Int -> ReadS Version
$creadsPrec :: Int -> ReadS Version
Read, Version -> Version -> Bool
(Version -> Version -> Bool)
-> (Version -> Version -> Bool) -> Eq Version
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Version -> Version -> Bool
$c/= :: Version -> Version -> Bool
== :: Version -> Version -> Bool
$c== :: Version -> Version -> Bool
Eq, Eq Version
Eq Version
-> (Version -> Version -> Ordering)
-> (Version -> Version -> Bool)
-> (Version -> Version -> Bool)
-> (Version -> Version -> Bool)
-> (Version -> Version -> Bool)
-> (Version -> Version -> Version)
-> (Version -> Version -> Version)
-> Ord Version
Version -> Version -> Bool
Version -> Version -> Ordering
Version -> Version -> Version
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Version -> Version -> Version
$cmin :: Version -> Version -> Version
max :: Version -> Version -> Version
$cmax :: Version -> Version -> Version
>= :: Version -> Version -> Bool
$c>= :: Version -> Version -> Bool
> :: Version -> Version -> Bool
$c> :: Version -> Version -> Bool
<= :: Version -> Version -> Bool
$c<= :: Version -> Version -> Bool
< :: Version -> Version -> Bool
$c< :: Version -> Version -> Bool
compare :: Version -> Version -> Ordering
$ccompare :: Version -> Version -> Ordering
$cp1Ord :: Eq Version
Ord)

warc0_16 :: Version
warc0_16 :: Version
warc0_16 = Int -> Int -> Version
Version Int
0 Int
16

version :: Parser Version
version :: Parser Version
version = String -> Parser Version -> Parser Version
forall a. String -> Parser a -> Parser a
withName String
"version" (Parser Version -> Parser Version)
-> Parser Version -> Parser Version
forall a b. (a -> b) -> a -> b
$ do
    Parser ByteString ByteString
"WARC/"
    Int
major <- Parser Int
forall a. Integral a => Parser a
decimal
    Char -> Parser Char
char Char
'.'
    Int
minor <- Parser Int
forall a. Integral a => Parser a
decimal
    Version -> Parser Version
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Int -> Version
Version Int
major Int
minor)

newtype FieldName = FieldName {FieldName -> Text
getFieldName :: Text}
                  deriving (Int -> FieldName -> ShowS
[FieldName] -> ShowS
FieldName -> String
(Int -> FieldName -> ShowS)
-> (FieldName -> String)
-> ([FieldName] -> ShowS)
-> Show FieldName
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FieldName] -> ShowS
$cshowList :: [FieldName] -> ShowS
show :: FieldName -> String
$cshow :: FieldName -> String
showsPrec :: Int -> FieldName -> ShowS
$cshowsPrec :: Int -> FieldName -> ShowS
Show, ReadPrec [FieldName]
ReadPrec FieldName
Int -> ReadS FieldName
ReadS [FieldName]
(Int -> ReadS FieldName)
-> ReadS [FieldName]
-> ReadPrec FieldName
-> ReadPrec [FieldName]
-> Read FieldName
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [FieldName]
$creadListPrec :: ReadPrec [FieldName]
readPrec :: ReadPrec FieldName
$creadPrec :: ReadPrec FieldName
readList :: ReadS [FieldName]
$creadList :: ReadS [FieldName]
readsPrec :: Int -> ReadS FieldName
$creadsPrec :: Int -> ReadS FieldName
Read, String -> FieldName
(String -> FieldName) -> IsString FieldName
forall a. (String -> a) -> IsString a
fromString :: String -> FieldName
$cfromString :: String -> FieldName
IsString)

instance Hashable FieldName where
    hashWithSalt :: Int -> FieldName -> Int
hashWithSalt Int
salt (FieldName Text
t) = Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
salt (Text -> Text
T.toCaseFold Text
t)

instance Eq FieldName where
    FieldName Text
a == :: FieldName -> FieldName -> Bool
== FieldName Text
b = Text -> Text
T.toCaseFold Text
a Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Text
T.toCaseFold Text
b

instance Ord FieldName where
    FieldName Text
a compare :: FieldName -> FieldName -> Ordering
`compare` FieldName Text
b = Text -> Text
T.toCaseFold Text
a Text -> Text -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` Text -> Text
T.toCaseFold Text
b

separators :: String
separators :: String
separators = String
"()<>@,;:\\\"/[]?={}"

crlf :: Parser ()
crlf :: Parser ()
crlf = Parser ByteString ByteString -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString ByteString -> Parser ())
-> Parser ByteString ByteString -> Parser ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString ByteString
string ByteString
"\r\n"

token :: Parser ByteString
token :: Parser ByteString ByteString
token = (Char -> Bool) -> Parser ByteString ByteString
takeTill (String -> Char -> Bool
inClass (String -> Char -> Bool) -> String -> Char -> Bool
forall a b. (a -> b) -> a -> b
$ String
separatorsString -> ShowS
forall a. [a] -> [a] -> [a]
++String
" \t\n\r")

utf8Token :: Parser Text
utf8Token :: Parser Text
utf8Token = ByteString -> Text
TE.decodeUtf8 (ByteString -> Text) -> Parser ByteString ByteString -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString ByteString
token

ord' :: Char -> Word8
ord' = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord

text :: Parser Text
text :: Parser Text
text = do
    let content :: TL.Text -> Parser TL.Text
        content :: Text -> Parser Text
content Text
accum = do
            (Char -> Bool) -> Parser Char
satisfy (Word8 -> Bool
isHorizontalSpace (Word8 -> Bool) -> (Char -> Word8) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
ord')
            ByteString
c <- (Char -> Bool) -> Parser ByteString ByteString
takeTill (Word8 -> Bool
isEndOfLine (Word8 -> Bool) -> (Char -> Word8) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
ord')
            Parser ()
endOfLine
            Text -> Parser Text
continuation (Text
accum Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
TL.fromStrict (ByteString -> Text
TE.decodeUtf8 ByteString
c))
        continuation :: TL.Text -> Parser TL.Text
        continuation :: Text -> Parser Text
continuation Text
accum = Text -> Parser Text
content Text
accum Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
accum
    ByteString
firstLine <- (Char -> Bool) -> Parser ByteString ByteString
takeTill (Word8 -> Bool
isEndOfLine (Word8 -> Bool) -> (Char -> Word8) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
ord')
    Parser ()
endOfLine
    Text -> Text
TL.toStrict (Text -> Text) -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Parser Text
continuation (Text -> Text
TL.fromStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> Text
TE.decodeUtf8 ByteString
firstLine)

quotedString :: Parser Text
quotedString :: Parser Text
quotedString = do
    Char -> Parser Char
char Char
'"'
    Text
c <- ByteString -> Text
TE.decodeUtf8 (ByteString -> Text) -> Parser ByteString ByteString -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"')
    Char -> Parser Char
char Char
'"'
    Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
c

data WarcType = WarcInfo
              | Response
              | Resource
              | Request
              | Metadata
              | Revisit
              | Conversion
              | Continuation
              | FutureType !Text
              deriving (Int -> WarcType -> ShowS
[WarcType] -> ShowS
WarcType -> String
(Int -> WarcType -> ShowS)
-> (WarcType -> String) -> ([WarcType] -> ShowS) -> Show WarcType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [WarcType] -> ShowS
$cshowList :: [WarcType] -> ShowS
show :: WarcType -> String
$cshow :: WarcType -> String
showsPrec :: Int -> WarcType -> ShowS
$cshowsPrec :: Int -> WarcType -> ShowS
Show, ReadPrec [WarcType]
ReadPrec WarcType
Int -> ReadS WarcType
ReadS [WarcType]
(Int -> ReadS WarcType)
-> ReadS [WarcType]
-> ReadPrec WarcType
-> ReadPrec [WarcType]
-> Read WarcType
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [WarcType]
$creadListPrec :: ReadPrec [WarcType]
readPrec :: ReadPrec WarcType
$creadPrec :: ReadPrec WarcType
readList :: ReadS [WarcType]
$creadList :: ReadS [WarcType]
readsPrec :: Int -> ReadS WarcType
$creadsPrec :: Int -> ReadS WarcType
Read, Eq WarcType
Eq WarcType
-> (WarcType -> WarcType -> Ordering)
-> (WarcType -> WarcType -> Bool)
-> (WarcType -> WarcType -> Bool)
-> (WarcType -> WarcType -> Bool)
-> (WarcType -> WarcType -> Bool)
-> (WarcType -> WarcType -> WarcType)
-> (WarcType -> WarcType -> WarcType)
-> Ord WarcType
WarcType -> WarcType -> Bool
WarcType -> WarcType -> Ordering
WarcType -> WarcType -> WarcType
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: WarcType -> WarcType -> WarcType
$cmin :: WarcType -> WarcType -> WarcType
max :: WarcType -> WarcType -> WarcType
$cmax :: WarcType -> WarcType -> WarcType
>= :: WarcType -> WarcType -> Bool
$c>= :: WarcType -> WarcType -> Bool
> :: WarcType -> WarcType -> Bool
$c> :: WarcType -> WarcType -> Bool
<= :: WarcType -> WarcType -> Bool
$c<= :: WarcType -> WarcType -> Bool
< :: WarcType -> WarcType -> Bool
$c< :: WarcType -> WarcType -> Bool
compare :: WarcType -> WarcType -> Ordering
$ccompare :: WarcType -> WarcType -> Ordering
$cp1Ord :: Eq WarcType
Ord, WarcType -> WarcType -> Bool
(WarcType -> WarcType -> Bool)
-> (WarcType -> WarcType -> Bool) -> Eq WarcType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: WarcType -> WarcType -> Bool
$c/= :: WarcType -> WarcType -> Bool
== :: WarcType -> WarcType -> Bool
$c== :: WarcType -> WarcType -> Bool
Eq)

parseWarcType :: Parser WarcType
parseWarcType :: Parser WarcType
parseWarcType = [Parser WarcType] -> Parser WarcType
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
     [ Parser ByteString ByteString
"warcinfo"     Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
WarcInfo
     , Parser ByteString ByteString
"response"     Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
Response
     , Parser ByteString ByteString
"resource"     Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
Resource
     , Parser ByteString ByteString
"request"      Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
Request
     , Parser ByteString ByteString
"metadata"     Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
Metadata
     , Parser ByteString ByteString
"revisit"      Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
Revisit
     , Parser ByteString ByteString
"conversion"   Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
Conversion
     , Parser ByteString ByteString
"continuation" Parser ByteString ByteString -> Parser WarcType -> Parser WarcType
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> WarcType -> Parser WarcType
forall (f :: * -> *) a. Applicative f => a -> f a
pure WarcType
Continuation
     , Text -> WarcType
FutureType (Text -> WarcType) -> Parser Text -> Parser WarcType
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
utf8Token
     ]

encodeText :: T.Text -> BB.Builder
encodeText :: Text -> Builder
encodeText = ByteString -> Builder
BB.byteString (ByteString -> Builder) -> (Text -> ByteString) -> Text -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf8

encodeWarcType :: WarcType -> BB.Builder
encodeWarcType :: WarcType -> Builder
encodeWarcType WarcType
WarcInfo       = Builder
"warcinfo"
encodeWarcType WarcType
Response       = Builder
"response"
encodeWarcType WarcType
Resource       = Builder
"resource"
encodeWarcType WarcType
Request        = Builder
"request"
encodeWarcType WarcType
Metadata       = Builder
"metadata"
encodeWarcType WarcType
Revisit        = Builder
"revisit"
encodeWarcType WarcType
Conversion     = Builder
"conversion"
encodeWarcType WarcType
Continuation   = Builder
"continuation"
encodeWarcType (FutureType Text
t) = Text -> Builder
encodeText Text
t

newtype Uri = Uri ByteString
            deriving (Int -> Uri -> ShowS
[Uri] -> ShowS
Uri -> String
(Int -> Uri -> ShowS)
-> (Uri -> String) -> ([Uri] -> ShowS) -> Show Uri
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Uri] -> ShowS
$cshowList :: [Uri] -> ShowS
show :: Uri -> String
$cshow :: Uri -> String
showsPrec :: Int -> Uri -> ShowS
$cshowsPrec :: Int -> Uri -> ShowS
Show, ReadPrec [Uri]
ReadPrec Uri
Int -> ReadS Uri
ReadS [Uri]
(Int -> ReadS Uri)
-> ReadS [Uri] -> ReadPrec Uri -> ReadPrec [Uri] -> Read Uri
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Uri]
$creadListPrec :: ReadPrec [Uri]
readPrec :: ReadPrec Uri
$creadPrec :: ReadPrec Uri
readList :: ReadS [Uri]
$creadList :: ReadS [Uri]
readsPrec :: Int -> ReadS Uri
$creadsPrec :: Int -> ReadS Uri
Read, Uri -> Uri -> Bool
(Uri -> Uri -> Bool) -> (Uri -> Uri -> Bool) -> Eq Uri
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Uri -> Uri -> Bool
$c/= :: Uri -> Uri -> Bool
== :: Uri -> Uri -> Bool
$c== :: Uri -> Uri -> Bool
Eq, Eq Uri
Eq Uri
-> (Uri -> Uri -> Ordering)
-> (Uri -> Uri -> Bool)
-> (Uri -> Uri -> Bool)
-> (Uri -> Uri -> Bool)
-> (Uri -> Uri -> Bool)
-> (Uri -> Uri -> Uri)
-> (Uri -> Uri -> Uri)
-> Ord Uri
Uri -> Uri -> Bool
Uri -> Uri -> Ordering
Uri -> Uri -> Uri
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Uri -> Uri -> Uri
$cmin :: Uri -> Uri -> Uri
max :: Uri -> Uri -> Uri
$cmax :: Uri -> Uri -> Uri
>= :: Uri -> Uri -> Bool
$c>= :: Uri -> Uri -> Bool
> :: Uri -> Uri -> Bool
$c> :: Uri -> Uri -> Bool
<= :: Uri -> Uri -> Bool
$c<= :: Uri -> Uri -> Bool
< :: Uri -> Uri -> Bool
$c< :: Uri -> Uri -> Bool
compare :: Uri -> Uri -> Ordering
$ccompare :: Uri -> Uri -> Ordering
$cp1Ord :: Eq Uri
Ord)

uri :: Parser Uri
uri :: Parser Uri
uri = do
    Char -> Parser Char
char Char
'<'
    ByteString
s <- (Char -> Bool) -> Parser ByteString ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>')
    Char -> Parser Char
char Char
'>'
    Uri -> Parser Uri
forall (m :: * -> *) a. Monad m => a -> m a
return (Uri -> Parser Uri) -> Uri -> Parser Uri
forall a b. (a -> b) -> a -> b
$ ByteString -> Uri
Uri ByteString
s

laxUri :: Parser Uri
laxUri :: Parser Uri
laxUri = ByteString -> Uri
Uri (ByteString -> Uri) -> Parser ByteString ByteString -> Parser Uri
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString ByteString
takeTill (Word8 -> Bool
isEndOfLine (Word8 -> Bool) -> (Char -> Word8) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
ord')

encodeUri :: Uri -> BB.Builder
encodeUri :: Uri -> Builder
encodeUri (Uri ByteString
b) = Char -> Builder
BB.char7 Char
'<' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
BB.byteString ByteString
b Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char7 Char
'>'

newtype RecordId = RecordId Uri
                 deriving (Int -> RecordId -> ShowS
[RecordId] -> ShowS
RecordId -> String
(Int -> RecordId -> ShowS)
-> (RecordId -> String) -> ([RecordId] -> ShowS) -> Show RecordId
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RecordId] -> ShowS
$cshowList :: [RecordId] -> ShowS
show :: RecordId -> String
$cshow :: RecordId -> String
showsPrec :: Int -> RecordId -> ShowS
$cshowsPrec :: Int -> RecordId -> ShowS
Show, ReadPrec [RecordId]
ReadPrec RecordId
Int -> ReadS RecordId
ReadS [RecordId]
(Int -> ReadS RecordId)
-> ReadS [RecordId]
-> ReadPrec RecordId
-> ReadPrec [RecordId]
-> Read RecordId
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [RecordId]
$creadListPrec :: ReadPrec [RecordId]
readPrec :: ReadPrec RecordId
$creadPrec :: ReadPrec RecordId
readList :: ReadS [RecordId]
$creadList :: ReadS [RecordId]
readsPrec :: Int -> ReadS RecordId
$creadsPrec :: Int -> ReadS RecordId
Read, RecordId -> RecordId -> Bool
(RecordId -> RecordId -> Bool)
-> (RecordId -> RecordId -> Bool) -> Eq RecordId
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RecordId -> RecordId -> Bool
$c/= :: RecordId -> RecordId -> Bool
== :: RecordId -> RecordId -> Bool
$c== :: RecordId -> RecordId -> Bool
Eq, Eq RecordId
Eq RecordId
-> (RecordId -> RecordId -> Ordering)
-> (RecordId -> RecordId -> Bool)
-> (RecordId -> RecordId -> Bool)
-> (RecordId -> RecordId -> Bool)
-> (RecordId -> RecordId -> Bool)
-> (RecordId -> RecordId -> RecordId)
-> (RecordId -> RecordId -> RecordId)
-> Ord RecordId
RecordId -> RecordId -> Bool
RecordId -> RecordId -> Ordering
RecordId -> RecordId -> RecordId
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: RecordId -> RecordId -> RecordId
$cmin :: RecordId -> RecordId -> RecordId
max :: RecordId -> RecordId -> RecordId
$cmax :: RecordId -> RecordId -> RecordId
>= :: RecordId -> RecordId -> Bool
$c>= :: RecordId -> RecordId -> Bool
> :: RecordId -> RecordId -> Bool
$c> :: RecordId -> RecordId -> Bool
<= :: RecordId -> RecordId -> Bool
$c<= :: RecordId -> RecordId -> Bool
< :: RecordId -> RecordId -> Bool
$c< :: RecordId -> RecordId -> Bool
compare :: RecordId -> RecordId -> Ordering
$ccompare :: RecordId -> RecordId -> Ordering
$cp1Ord :: Eq RecordId
Ord)

recordId :: Parser RecordId
recordId :: Parser RecordId
recordId = Uri -> RecordId
RecordId (Uri -> RecordId) -> Parser Uri -> Parser RecordId
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Uri
uri

encodeRecordId :: RecordId -> BB.Builder
encodeRecordId :: RecordId -> Builder
encodeRecordId (RecordId Uri
r) = Uri -> Builder
encodeUri Uri
r

data TruncationReason = TruncLength
                      | TruncTime
                      | TruncDisconnect
                      | TruncUnspecified
                      | TruncOther !Text
                      deriving (Int -> TruncationReason -> ShowS
[TruncationReason] -> ShowS
TruncationReason -> String
(Int -> TruncationReason -> ShowS)
-> (TruncationReason -> String)
-> ([TruncationReason] -> ShowS)
-> Show TruncationReason
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TruncationReason] -> ShowS
$cshowList :: [TruncationReason] -> ShowS
show :: TruncationReason -> String
$cshow :: TruncationReason -> String
showsPrec :: Int -> TruncationReason -> ShowS
$cshowsPrec :: Int -> TruncationReason -> ShowS
Show, ReadPrec [TruncationReason]
ReadPrec TruncationReason
Int -> ReadS TruncationReason
ReadS [TruncationReason]
(Int -> ReadS TruncationReason)
-> ReadS [TruncationReason]
-> ReadPrec TruncationReason
-> ReadPrec [TruncationReason]
-> Read TruncationReason
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TruncationReason]
$creadListPrec :: ReadPrec [TruncationReason]
readPrec :: ReadPrec TruncationReason
$creadPrec :: ReadPrec TruncationReason
readList :: ReadS [TruncationReason]
$creadList :: ReadS [TruncationReason]
readsPrec :: Int -> ReadS TruncationReason
$creadsPrec :: Int -> ReadS TruncationReason
Read, Eq TruncationReason
Eq TruncationReason
-> (TruncationReason -> TruncationReason -> Ordering)
-> (TruncationReason -> TruncationReason -> Bool)
-> (TruncationReason -> TruncationReason -> Bool)
-> (TruncationReason -> TruncationReason -> Bool)
-> (TruncationReason -> TruncationReason -> Bool)
-> (TruncationReason -> TruncationReason -> TruncationReason)
-> (TruncationReason -> TruncationReason -> TruncationReason)
-> Ord TruncationReason
TruncationReason -> TruncationReason -> Bool
TruncationReason -> TruncationReason -> Ordering
TruncationReason -> TruncationReason -> TruncationReason
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: TruncationReason -> TruncationReason -> TruncationReason
$cmin :: TruncationReason -> TruncationReason -> TruncationReason
max :: TruncationReason -> TruncationReason -> TruncationReason
$cmax :: TruncationReason -> TruncationReason -> TruncationReason
>= :: TruncationReason -> TruncationReason -> Bool
$c>= :: TruncationReason -> TruncationReason -> Bool
> :: TruncationReason -> TruncationReason -> Bool
$c> :: TruncationReason -> TruncationReason -> Bool
<= :: TruncationReason -> TruncationReason -> Bool
$c<= :: TruncationReason -> TruncationReason -> Bool
< :: TruncationReason -> TruncationReason -> Bool
$c< :: TruncationReason -> TruncationReason -> Bool
compare :: TruncationReason -> TruncationReason -> Ordering
$ccompare :: TruncationReason -> TruncationReason -> Ordering
$cp1Ord :: Eq TruncationReason
Ord, TruncationReason -> TruncationReason -> Bool
(TruncationReason -> TruncationReason -> Bool)
-> (TruncationReason -> TruncationReason -> Bool)
-> Eq TruncationReason
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TruncationReason -> TruncationReason -> Bool
$c/= :: TruncationReason -> TruncationReason -> Bool
== :: TruncationReason -> TruncationReason -> Bool
$c== :: TruncationReason -> TruncationReason -> Bool
Eq)

truncationReason :: Parser TruncationReason
truncationReason :: Parser TruncationReason
truncationReason = [Parser TruncationReason] -> Parser TruncationReason
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ Parser ByteString ByteString
"length" Parser ByteString ByteString
-> Parser TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a. Applicative f => a -> f a
pure TruncationReason
TruncLength
    , Parser ByteString ByteString
"time"   Parser ByteString ByteString
-> Parser TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a. Applicative f => a -> f a
pure TruncationReason
TruncTime
    , Parser ByteString ByteString
"disconnect" Parser ByteString ByteString
-> Parser TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a. Applicative f => a -> f a
pure TruncationReason
TruncDisconnect
    , Parser ByteString ByteString
"unspecified" Parser ByteString ByteString
-> Parser TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TruncationReason -> Parser TruncationReason
forall (f :: * -> *) a. Applicative f => a -> f a
pure TruncationReason
TruncUnspecified
    , Text -> TruncationReason
TruncOther (Text -> TruncationReason)
-> Parser Text -> Parser TruncationReason
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
utf8Token
    ]

encodeTruncationReason :: TruncationReason -> BB.Builder
encodeTruncationReason :: TruncationReason -> Builder
encodeTruncationReason TruncationReason
TruncLength      = Builder
"length"
encodeTruncationReason TruncationReason
TruncTime        = Builder
"time"
encodeTruncationReason TruncationReason
TruncDisconnect  = Builder
"disconnect"
encodeTruncationReason TruncationReason
TruncUnspecified = Builder
"unspecified"
encodeTruncationReason (TruncOther Text
o)   = Text -> Builder
encodeText Text
o

data Digest = Digest { Digest -> ByteString
digestAlgorithm, Digest -> ByteString
digestHash :: !ByteString }
            deriving (Int -> Digest -> ShowS
[Digest] -> ShowS
Digest -> String
(Int -> Digest -> ShowS)
-> (Digest -> String) -> ([Digest] -> ShowS) -> Show Digest
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Digest] -> ShowS
$cshowList :: [Digest] -> ShowS
show :: Digest -> String
$cshow :: Digest -> String
showsPrec :: Int -> Digest -> ShowS
$cshowsPrec :: Int -> Digest -> ShowS
Show, ReadPrec [Digest]
ReadPrec Digest
Int -> ReadS Digest
ReadS [Digest]
(Int -> ReadS Digest)
-> ReadS [Digest]
-> ReadPrec Digest
-> ReadPrec [Digest]
-> Read Digest
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Digest]
$creadListPrec :: ReadPrec [Digest]
readPrec :: ReadPrec Digest
$creadPrec :: ReadPrec Digest
readList :: ReadS [Digest]
$creadList :: ReadS [Digest]
readsPrec :: Int -> ReadS Digest
$creadsPrec :: Int -> ReadS Digest
Read, Digest -> Digest -> Bool
(Digest -> Digest -> Bool)
-> (Digest -> Digest -> Bool) -> Eq Digest
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Digest -> Digest -> Bool
$c/= :: Digest -> Digest -> Bool
== :: Digest -> Digest -> Bool
$c== :: Digest -> Digest -> Bool
Eq, Eq Digest
Eq Digest
-> (Digest -> Digest -> Ordering)
-> (Digest -> Digest -> Bool)
-> (Digest -> Digest -> Bool)
-> (Digest -> Digest -> Bool)
-> (Digest -> Digest -> Bool)
-> (Digest -> Digest -> Digest)
-> (Digest -> Digest -> Digest)
-> Ord Digest
Digest -> Digest -> Bool
Digest -> Digest -> Ordering
Digest -> Digest -> Digest
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Digest -> Digest -> Digest
$cmin :: Digest -> Digest -> Digest
max :: Digest -> Digest -> Digest
$cmax :: Digest -> Digest -> Digest
>= :: Digest -> Digest -> Bool
$c>= :: Digest -> Digest -> Bool
> :: Digest -> Digest -> Bool
$c> :: Digest -> Digest -> Bool
<= :: Digest -> Digest -> Bool
$c<= :: Digest -> Digest -> Bool
< :: Digest -> Digest -> Bool
$c< :: Digest -> Digest -> Bool
compare :: Digest -> Digest -> Ordering
$ccompare :: Digest -> Digest -> Ordering
$cp1Ord :: Eq Digest
Ord)

digest :: Parser Digest
digest :: Parser Digest
digest = do
    ByteString
algo <- Parser ByteString ByteString
token Parser ByteString ByteString
-> Parser Char -> Parser ByteString ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
':'
    ByteString
hash <- Parser ByteString ByteString
token
    Digest -> Parser Digest
forall (m :: * -> *) a. Monad m => a -> m a
return (Digest -> Parser Digest) -> Digest -> Parser Digest
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> Digest
Digest ByteString
algo ByteString
hash

encodeDigest :: Digest -> BB.Builder
encodeDigest :: Digest -> Builder
encodeDigest (Digest ByteString
algo ByteString
hash) =
    ByteString -> Builder
BB.byteString ByteString
algo Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
":" Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
BB.byteString ByteString
hash

date :: Parser UTCTime
date :: Parser UTCTime
date = do
    ByteString
s <- (Char -> Bool) -> Parser ByteString ByteString
takeTill Char -> Bool
isSpace
    Bool -> TimeLocale -> String -> String -> Parser UTCTime
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale String
dateFormat (ByteString -> String
BS.unpack ByteString
s)

encodeDate :: UTCTime -> BB.Builder
encodeDate :: UTCTime -> Builder
encodeDate = String -> Builder
BB.string7 (String -> Builder) -> (UTCTime -> String) -> UTCTime -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TimeLocale -> String -> UTCTime -> String
forall t. FormatTime t => TimeLocale -> String -> t -> String
formatTime TimeLocale
defaultTimeLocale String
dateFormat

dateFormat :: String
dateFormat = Maybe String -> String
iso8601DateFormat (String -> Maybe String
forall a. a -> Maybe a
Just String
"%H:%M:%SZ")

warcField :: Parser (FieldName, BSL.ByteString)
warcField :: Parser (FieldName, ByteString)
warcField = String
-> Parser (FieldName, ByteString) -> Parser (FieldName, ByteString)
forall a. String -> Parser a -> Parser a
withName String
"field" (Parser (FieldName, ByteString) -> Parser (FieldName, ByteString))
-> Parser (FieldName, ByteString) -> Parser (FieldName, ByteString)
forall a b. (a -> b) -> a -> b
$ do
    Parser Char
peekChar' Parser Char -> (Char -> Parser ()) -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Bool -> Parser ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Parser ()) -> (Char -> Bool) -> Char -> Parser ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace
    FieldName
fieldName <- Text -> FieldName
FieldName (Text -> FieldName)
-> (ByteString -> Text) -> ByteString -> FieldName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeUtf8 (ByteString -> FieldName)
-> Parser ByteString ByteString -> Parser ByteString FieldName
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString ByteString
A.takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':')
    Char -> Parser Char
char Char
':'
    Parser ()
skipSpace
    ByteString
v0 <- Parser ByteString ByteString
takeLine
    Parser ()
endOfLine
    let continuation :: BB.Builder -> Parser BB.Builder
        continuation :: Builder -> Parser Builder
continuation Builder
v = do
            Maybe Char
c <- Parser (Maybe Char)
peekChar
            case Maybe Char
c of
              Just Char
c' | Word8 -> Bool
isHorizontalSpace (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c') -> do
                            ByteString
v' <- Parser ByteString ByteString
takeLine
                            Parser ()
endOfLine
                            Builder -> Parser Builder
continuation (Builder
v Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
BB.byteString ByteString
v')
              Maybe Char
_ -> Builder -> Parser Builder
forall (m :: * -> *) a. Monad m => a -> m a
return Builder
v
    Builder
v1 <- Builder -> Parser Builder
continuation (ByteString -> Builder
BB.byteString ByteString
v0)
    (FieldName, ByteString) -> Parser (FieldName, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (FieldName
fieldName, Builder -> ByteString
BB.toLazyByteString Builder
v1)

-- | Take the rest of the line (but leaving the newline character unparsed).
takeLine :: Parser BS.ByteString
takeLine :: Parser ByteString ByteString
takeLine = (Char -> Bool) -> Parser ByteString ByteString
A.takeTill (Word8 -> Bool
isEndOfLine (Word8 -> Bool) -> (Char -> Word8) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
ord')

data RecordHeader = RecordHeader { RecordHeader -> Version
_recWarcVersion :: Version
                                 , RecordHeader -> HashMap FieldName ByteString
_recHeaders     :: HM.HashMap FieldName BSL.ByteString
                                 }
                  deriving (Int -> RecordHeader -> ShowS
[RecordHeader] -> ShowS
RecordHeader -> String
(Int -> RecordHeader -> ShowS)
-> (RecordHeader -> String)
-> ([RecordHeader] -> ShowS)
-> Show RecordHeader
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RecordHeader] -> ShowS
$cshowList :: [RecordHeader] -> ShowS
show :: RecordHeader -> String
$cshow :: RecordHeader -> String
showsPrec :: Int -> RecordHeader -> ShowS
$cshowsPrec :: Int -> RecordHeader -> ShowS
Show)

makeLenses ''RecordHeader

-- | A lens-y means of querying 'Field's.
field :: Field a -> Traversal' RecordHeader a
field :: Field a -> Traversal' RecordHeader a
field Field a
fld = (HashMap FieldName ByteString -> f (HashMap FieldName ByteString))
-> RecordHeader -> f RecordHeader
Lens' RecordHeader (HashMap FieldName ByteString)
recHeaders ((HashMap FieldName ByteString -> f (HashMap FieldName ByteString))
 -> RecordHeader -> f RecordHeader)
-> ((a -> f a)
    -> HashMap FieldName ByteString
    -> f (HashMap FieldName ByteString))
-> (a -> f a)
-> RecordHeader
-> f RecordHeader
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Index (HashMap FieldName ByteString)
-> Traversal'
     (HashMap FieldName ByteString)
     (IxValue (HashMap FieldName ByteString))
forall m. Ixed m => Index m -> Traversal' m (IxValue m)
ix (Field a -> FieldName
forall a. Field a -> FieldName
fieldName Field a
fld) ((ByteString -> f ByteString)
 -> HashMap FieldName ByteString
 -> f (HashMap FieldName ByteString))
-> ((a -> f a) -> ByteString -> f ByteString)
-> (a -> f a)
-> HashMap FieldName ByteString
-> f (HashMap FieldName ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Field a -> Prism' ByteString a
forall a. Field a -> Prism' ByteString a
parsedField Field a
fld

parsedField :: Field a -> Prism' BSL.ByteString a
parsedField :: Field a -> Prism' ByteString a
parsedField Field a
fld = (a -> ByteString) -> (ByteString -> Maybe a) -> Prism' ByteString a
forall b s a. (b -> s) -> (s -> Maybe a) -> Prism s s a b
prism' a -> ByteString
to ByteString -> Maybe a
from
  where
    from :: ByteString -> Maybe a
from ByteString
bs = case Parser a -> ByteString -> Result a
forall a. Parser a -> ByteString -> Result a
AL.parse (Field a -> Parser a
forall a. Field a -> Parser a
decode Field a
fld) ByteString
bs of
                AL.Fail ByteString
_ [String]
_ String
_ -> Maybe a
forall a. Maybe a
Nothing
                AL.Done ByteString
_ a
x   -> a -> Maybe a
forall a. a -> Maybe a
Just a
x
    to :: a -> ByteString
to = Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString) -> (a -> Builder) -> a -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Field a -> a -> Builder
forall a. Field a -> a -> Builder
encode Field a
fld

addField :: Field a -> a -> RecordHeader -> RecordHeader
addField :: Field a -> a -> RecordHeader -> RecordHeader
addField Field a
fld a
v =
    (HashMap FieldName ByteString
 -> Identity (HashMap FieldName ByteString))
-> RecordHeader -> Identity RecordHeader
Lens' RecordHeader (HashMap FieldName ByteString)
recHeaders ((HashMap FieldName ByteString
  -> Identity (HashMap FieldName ByteString))
 -> RecordHeader -> Identity RecordHeader)
-> ((Maybe ByteString -> Identity (Maybe ByteString))
    -> HashMap FieldName ByteString
    -> Identity (HashMap FieldName ByteString))
-> (Maybe ByteString -> Identity (Maybe ByteString))
-> RecordHeader
-> Identity RecordHeader
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Index (HashMap FieldName ByteString)
-> Lens'
     (HashMap FieldName ByteString)
     (Maybe (IxValue (HashMap FieldName ByteString)))
forall m. At m => Index m -> Lens' m (Maybe (IxValue m))
at (Field a -> FieldName
forall a. Field a -> FieldName
fieldName Field a
fld) ((Maybe ByteString -> Identity (Maybe ByteString))
 -> RecordHeader -> Identity RecordHeader)
-> Maybe ByteString -> RecordHeader -> RecordHeader
forall s t a b. ASetter s t a b -> b -> s -> t
.~ ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$ Field a -> a -> Builder
forall a. Field a -> a -> Builder
encode Field a
fld a
v)

-- | A WARC header
header :: Parser RecordHeader
header :: Parser RecordHeader
header = String -> Parser RecordHeader -> Parser RecordHeader
forall a. String -> Parser a -> Parser a
withName String
"header" (Parser RecordHeader -> Parser RecordHeader)
-> Parser RecordHeader -> Parser RecordHeader
forall a b. (a -> b) -> a -> b
$ do
    Parser ()
skipSpace
    Version
ver <- Parser Version
version Parser Version -> Parser () -> Parser Version
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine
    HashMap FieldName ByteString
fields <- ([(FieldName, ByteString)] -> HashMap FieldName ByteString)
-> Parser [(FieldName, ByteString)]
-> Parser ByteString (HashMap FieldName ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [(FieldName, ByteString)] -> HashMap FieldName ByteString
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList (Parser [(FieldName, ByteString)]
 -> Parser ByteString (HashMap FieldName ByteString))
-> (Parser [(FieldName, ByteString)]
    -> Parser [(FieldName, ByteString)])
-> Parser [(FieldName, ByteString)]
-> Parser ByteString (HashMap FieldName ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String
-> Parser [(FieldName, ByteString)]
-> Parser [(FieldName, ByteString)]
forall a. String -> Parser a -> Parser a
withName String
"fields" (Parser [(FieldName, ByteString)]
 -> Parser ByteString (HashMap FieldName ByteString))
-> Parser [(FieldName, ByteString)]
-> Parser ByteString (HashMap FieldName ByteString)
forall a b. (a -> b) -> a -> b
$ Parser (FieldName, ByteString) -> Parser [(FieldName, ByteString)]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser (FieldName, ByteString)
 -> Parser [(FieldName, ByteString)])
-> Parser (FieldName, ByteString)
-> Parser [(FieldName, ByteString)]
forall a b. (a -> b) -> a -> b
$ Parser (FieldName, ByteString)
warcField
    Parser ()
endOfLine
    RecordHeader -> Parser RecordHeader
forall (m :: * -> *) a. Monad m => a -> m a
return (RecordHeader -> Parser RecordHeader)
-> RecordHeader -> Parser RecordHeader
forall a b. (a -> b) -> a -> b
$ Version -> HashMap FieldName ByteString -> RecordHeader
RecordHeader Version
ver HashMap FieldName ByteString
fields

encodeHeader :: RecordHeader -> BB.Builder
encodeHeader :: RecordHeader -> Builder
encodeHeader (RecordHeader (Version Int
maj Int
min) HashMap FieldName ByteString
flds) =
       Builder
"WARC/"Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>Int -> Builder
BB.intDec Int
majBuilder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>Builder
"."Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>Int -> Builder
BB.intDec Int
min Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"\r\n"
    Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ((FieldName, ByteString) -> Builder)
-> [(FieldName, ByteString)] -> Builder
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (FieldName, ByteString) -> Builder
field (HashMap FieldName ByteString -> [(FieldName, ByteString)]
forall k v. HashMap k v -> [(k, v)]
HM.toList HashMap FieldName ByteString
flds)
    Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"\r\n"
  where field :: (FieldName, BSL.ByteString) -> BB.Builder
        field :: (FieldName, ByteString) -> Builder
field (FieldName Text
fname, ByteString
value) =
            Text -> Builder
TE.encodeUtf8Builder Text
fname Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
": " Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
BB.lazyByteString ByteString
value Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"\r\n"

-- | Lookup the value of a field. Returns @Nothing@ if the field is not
-- present, @Just (Left err)@ in the event of a parse error, and
-- @Just (Right v)@ on success.
lookupField :: RecordHeader -> Field a -> Maybe (Either String a)
lookupField :: RecordHeader -> Field a -> Maybe (Either String a)
lookupField (RecordHeader {_recHeaders :: RecordHeader -> HashMap FieldName ByteString
_recHeaders=HashMap FieldName ByteString
headers}) Field a
fld
  | Just ByteString
v <- FieldName -> HashMap FieldName ByteString -> Maybe ByteString
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HM.lookup (Field a -> FieldName
forall a. Field a -> FieldName
fieldName Field a
fld) HashMap FieldName ByteString
headers
  = case Parser a -> ByteString -> Result a
forall a. Parser a -> ByteString -> Result a
AL.parse (Field a -> Parser a
forall a. Field a -> Parser a
decode Field a
fld) ByteString
v of
      AL.Fail ByteString
_ [String]
_ String
err -> Either String a -> Maybe (Either String a)
forall a. a -> Maybe a
Just (Either String a -> Maybe (Either String a))
-> Either String a -> Maybe (Either String a)
forall a b. (a -> b) -> a -> b
$ String -> Either String a
forall a b. a -> Either a b
Left String
err
      AL.Done ByteString
_ a
x     -> Either String a -> Maybe (Either String a)
forall a. a -> Maybe a
Just (Either String a -> Maybe (Either String a))
-> Either String a -> Maybe (Either String a)
forall a b. (a -> b) -> a -> b
$ a -> Either String a
forall a b. b -> Either a b
Right a
x
  | Bool
otherwise
  = Maybe (Either String a)
forall a. Maybe a
Nothing

data Field a = Field { Field a -> FieldName
fieldName :: FieldName
                     , Field a -> a -> Builder
encode    :: a -> BB.Builder
                     , Field a -> Parser a
decode    :: Parser a
                     }

mapField :: (a -> b) -> (b -> a) -> Field a -> Field b
mapField :: (a -> b) -> (b -> a) -> Field a -> Field b
mapField a -> b
f b -> a
g (Field FieldName
fieldName a -> Builder
encode Parser a
decode) =
    FieldName -> (b -> Builder) -> Parser b -> Field b
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
fieldName (a -> Builder
encode (a -> Builder) -> (b -> a) -> b -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. b -> a
g) (a -> b
f (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
decode)

warcRecordId :: Field RecordId
warcRecordId :: Field RecordId
warcRecordId = FieldName
-> (RecordId -> Builder) -> Parser RecordId -> Field RecordId
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Record-ID" RecordId -> Builder
encodeRecordId Parser RecordId
recordId

contentLength :: Field Integer
contentLength :: Field Integer
contentLength = FieldName
-> (Integer -> Builder) -> Parser Integer -> Field Integer
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"Content-Length" Integer -> Builder
BB.integerDec Parser Integer
forall a. Integral a => Parser a
decimal

warcDate :: Field UTCTime
warcDate :: Field UTCTime
warcDate = FieldName
-> (UTCTime -> Builder) -> Parser UTCTime -> Field UTCTime
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Date" UTCTime -> Builder
encodeDate Parser UTCTime
date

warcType :: Field WarcType
warcType :: Field WarcType
warcType = FieldName
-> (WarcType -> Builder) -> Parser WarcType -> Field WarcType
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Type" WarcType -> Builder
encodeWarcType Parser WarcType
parseWarcType

contentType :: Field BS.ByteString
contentType :: Field ByteString
contentType = FieldName
-> (ByteString -> Builder)
-> Parser ByteString ByteString
-> Field ByteString
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"Content-Type" ByteString -> Builder
BB.byteString ((Char -> Bool) -> Parser ByteString ByteString
takeTill (Word8 -> Bool
isEndOfLine (Word8 -> Bool) -> (Char -> Word8) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
ord'))

warcConcurrentTo :: Field RecordId
warcConcurrentTo :: Field RecordId
warcConcurrentTo = FieldName
-> (RecordId -> Builder) -> Parser RecordId -> Field RecordId
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Concurrent-To" RecordId -> Builder
encodeRecordId Parser RecordId
recordId

warcBlockDigest :: Field Digest
warcBlockDigest :: Field Digest
warcBlockDigest = FieldName -> (Digest -> Builder) -> Parser Digest -> Field Digest
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Block-Digest" Digest -> Builder
encodeDigest Parser Digest
digest

warcPayloadDigest :: Field Digest
warcPayloadDigest :: Field Digest
warcPayloadDigest = FieldName -> (Digest -> Builder) -> Parser Digest -> Field Digest
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Payload-Digest" Digest -> Builder
encodeDigest Parser Digest
digest

warcIpAddress :: Field BS.ByteString
warcIpAddress :: Field ByteString
warcIpAddress = FieldName
-> (ByteString -> Builder)
-> Parser ByteString ByteString
-> Field ByteString
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-IP-Address" ByteString -> Builder
BB.byteString ((Char -> Bool) -> Parser ByteString ByteString
takeTill (Word8 -> Bool
isEndOfLine (Word8 -> Bool) -> (Char -> Word8) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Word8
ord'))

warcRefersTo :: Field Uri
warcRefersTo :: Field Uri
warcRefersTo = FieldName -> (Uri -> Builder) -> Parser Uri -> Field Uri
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Refers-To" Uri -> Builder
encodeUri Parser Uri
uri

warcTargetUri :: Field Uri
warcTargetUri :: Field Uri
warcTargetUri = FieldName -> (Uri -> Builder) -> Parser Uri -> Field Uri
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Target-URI" Uri -> Builder
encodeUri Parser Uri
laxUri

warcTruncated :: Field TruncationReason
warcTruncated :: Field TruncationReason
warcTruncated = FieldName
-> (TruncationReason -> Builder)
-> Parser TruncationReason
-> Field TruncationReason
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Truncated" TruncationReason -> Builder
encodeTruncationReason Parser TruncationReason
truncationReason

warcWarcinfoID :: Field RecordId
warcWarcinfoID :: Field RecordId
warcWarcinfoID = FieldName
-> (RecordId -> Builder) -> Parser RecordId -> Field RecordId
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Warcinfo-ID" RecordId -> Builder
encodeRecordId Parser RecordId
recordId

warcFilename :: Field T.Text
warcFilename :: Field Text
warcFilename = FieldName -> (Text -> Builder) -> Parser Text -> Field Text
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Filename"(Builder -> Builder
quoted (Builder -> Builder) -> (Text -> Builder) -> Text -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Builder
encodeText) (Parser Text
text Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
quotedString)

warcProfile :: Field Uri
warcProfile :: Field Uri
warcProfile = FieldName -> (Uri -> Builder) -> Parser Uri -> Field Uri
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Profile" Uri -> Builder
encodeUri Parser Uri
uri

warcSegmentNumber :: Field Integer
warcSegmentNumber :: Field Integer
warcSegmentNumber = FieldName
-> (Integer -> Builder) -> Parser Integer -> Field Integer
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Segment-Number" Integer -> Builder
BB.integerDec Parser Integer
forall a. Integral a => Parser a
decimal

warcSegmentTotalLength :: Field Integer
warcSegmentTotalLength :: Field Integer
warcSegmentTotalLength = FieldName
-> (Integer -> Builder) -> Parser Integer -> Field Integer
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
"WARC-Segment-Total-Length" Integer -> Builder
BB.integerDec Parser Integer
forall a. Integral a => Parser a
decimal

rawField :: FieldName -> Field BSL.ByteString
rawField :: FieldName -> Field ByteString
rawField FieldName
fname = FieldName
-> (ByteString -> Builder) -> Parser ByteString -> Field ByteString
forall a. FieldName -> (a -> Builder) -> Parser a -> Field a
Field FieldName
fname ByteString -> Builder
BB.lazyByteString Parser ByteString
takeLazyByteString

quoted :: Builder -> Builder
quoted Builder
x = Builder
q Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
x Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
q
  where q :: Builder
q = Char -> Builder
BB.char7 Char
'"'