{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} module Commonmark.Extensions.Autolink ( autolinkSpec ) where import Commonmark.Types import Commonmark.Tokens import Commonmark.Syntax import Commonmark.Inlines import Commonmark.TokParsers import Control.Monad (guard, void) import Text.Parsec import Data.Text (Text) autolinkSpec :: (Monad m, IsBlock il bl, IsInline il) => SyntaxSpec m il bl autolinkSpec = mempty { syntaxInlineParsers = [parseAutolink] } parseAutolink :: (Monad m, IsInline a) => InlineParser m a parseAutolink = do void $ lookAhead $ satisfyTok $ \t -> case tokType t of WordChars -> True Symbol c -> c == '.' || c == '-' || c == '_' || c == '+' _ -> False (prefix, linktext) <- withRaw $ wwwAutolink <|> urlAutolink <|> emailAutolink return $! link (prefix <> untokenize linktext) "" (str . untokenize $ linktext) wwwAutolink :: Monad m => InlineParser m Text wwwAutolink = try $ do lookAhead $ satisfyWord (== "www") validDomain linkPath 0 return "http://" validDomain :: Monad m => InlineParser m () validDomain = do let domainPart = do ds <- many1 $ satisfyTok (hasType WordChars) <|> symbol '-' <|> symbol '_' guard $ case reverse ds of (Tok WordChars _ _ : _) -> True _ -> False domainPart skipMany1 $ try (symbol '.' >> domainPart) linkPath :: Monad m => Int -> InlineParser m () linkPath openParens = try (symbol '&' *> notFollowedBy (try (satisfyWord (const True) *> symbol ';' *> linkEnd)) *> linkPath openParens) <|> (pathPunctuation *> linkPath openParens) <|> (symbol '(' *> linkPath (openParens + 1)) <|> (guard (openParens > 0) *> symbol ')' *> linkPath (openParens - 1)) -- the following clause is needed to implement the GFM spec, which allows -- unbalanced ) except at link end. However, leaving this in causes -- problematic interaction with explicit link syntax in certain odd cases (see #147). -- <|> (notFollowedBy linkEnd *> symbol ')' *> linkPath (openParens - 1)) <|> (satisfyTok (\t -> case tokType t of LineEnd -> False Spaces -> False Symbol c -> not (isTrailingPunctuation c || c == '&' || c == ')') _ -> True) *> linkPath openParens) <|> pure () linkEnd :: Monad m => InlineParser m () linkEnd = try $ skipMany trailingPunctuation *> (void whitespace <|> eof) trailingPunctuation :: Monad m => InlineParser m () trailingPunctuation = void $ satisfyTok (\t -> case tokType t of Symbol c -> isTrailingPunctuation c _ -> False) isTrailingPunctuation :: Char -> Bool isTrailingPunctuation = (`elem` ['!', '"', '\'', ')', '*', ',', '.', ':', ';', '?', '_', '~', '<']) pathPunctuation :: Monad m => InlineParser m () pathPunctuation = try $ do satisfyTok (\t -> case tokType t of Symbol c -> isTrailingPunctuation c && c /= ')' && c /= '<' _ -> False) void $ lookAhead (satisfyTok (\t -> case tokType t of WordChars -> True _ -> False)) urlAutolink :: Monad m => InlineParser m Text urlAutolink = try $ do satisfyWord (`elem` ["http", "https", "ftp"]) symbol ':' symbol '/' symbol '/' validDomain linkPath 0 return "" emailAutolink :: Monad m => InlineParser m Text emailAutolink = try $ do let emailNameTok (Tok WordChars _ _) = True emailNameTok (Tok (Symbol c) _ _) = c == '.' || c == '-' || c == '_' || c == '+' emailNameTok _ = False skipMany1 $ satisfyTok emailNameTok symbol '@' validDomain return "mailto:"