module Web.Tweet.Parser ( parseTweet
, getData ) where
import qualified Data.ByteString as BS
import Text.Megaparsec.ByteString
import Text.Megaparsec.Lexer as L
import Text.Megaparsec
import Web.Tweet.Types
import Data.Monoid
import Data.Maybe
import Control.Monad
parseTweet :: Parser Timeline
parseTweet = many (try getData <|> (const (TweetEntity "" "" "" 0 Nothing 0 0) <$> eof))
getData :: Parser TweetEntity
getData = do
id <- read <$> filterStr "id"
text <- filterStr "text"
skipMentions
name <- filterStr "name"
screenName <- filterStr "screen_name"
isQuote <- filterStr "is_quote_status"
case isQuote of
"false" -> do
rts <- read <$> filterStr "retweet_count"
faves <- read <$> filterStr "favorite_count"
pure (TweetEntity text name screenName id Nothing rts faves)
"true" -> do
quoted <- parseQuoted
rts <- read <$> filterStr "retweet_count"
faves <- read <$> filterStr "favorite_count"
pure $ TweetEntity text name screenName id quoted rts faves
parseQuoted :: Parser (Maybe TweetEntity)
parseQuoted = do
optional (string ",\"quoted_status_id" >> filterStr "quoted_status_id_str")
contents <- optional $ string "\",\"quoted_status"
case contents of
(Just contents) -> pure <$> getData
_ -> pure Nothing
skipInsideBrackets :: Parser ()
skipInsideBrackets = void (between (char '[') (char ']') $ many (skipInsideBrackets <|> void (noneOf ("[]" :: String))))
skipMentions :: Parser ()
skipMentions = do
many $ try $ anyChar >> notFollowedBy (string ("\"user_mentions\":"))
char ','
string "\"user_mentions\":"
skipInsideBrackets
pure ()
filterStr :: String -> Parser String
filterStr str = do
many $ try $ anyChar >> notFollowedBy (string ("\"" <> str <> "\":"))
char ','
filterTag str
filterTag :: String -> Parser String
filterTag str = do
string $ "\"" <> str <> "\":"
open <- optional $ char '\"'
let forbidden = if (isJust open) then ("\\\"" :: String) else ("\\\"," :: String)
want <- many $ noneOf forbidden <|> specialChar '\"' <|> specialChar '/' <|> newlineChar <|> unicodeChar
pure want
newlineChar :: Parser Char
newlineChar = do
string "\\n"
pure '\n'
unicodeChar :: Parser Char
unicodeChar = do
string "\\u"
num <- fromHex . filterEmoji . BS.pack . map (fromIntegral . fromEnum) <$> count 4 anyChar
pure . toEnum . fromIntegral $ num
filterEmoji str = if BS.head str == (fromIntegral . fromEnum $ 'd') then "FFFD" else str
specialChar :: Char -> Parser Char
specialChar c = do
string $ "\\" ++ pure c
pure c
fromHex :: BS.ByteString -> Integer
fromHex = fromRight . (parse (L.hexadecimal :: Parser Integer) "")
where fromRight (Right a) = a