-- | -- Module : Network.DNS.Pattern -- Description : Generic network related types. -- -- Provides utilities and parsers for a simple domain name pattern language. {-# LANGUAGE OverloadedStrings #-} module Network.DNS.Pattern ( -- * Domain names -- $domainNames parseAbsDomain , parseAbsDomainRelax , parseDomainLabel , absDomainP , absDomainRelaxP , Domain(..) , DomainLabel(..) , pprDomain , pprDomainLabel , foldCase , foldCaseLabel -- * Pattern language -- $patterns , parsePattern , patternWorksInside , matchesPattern , domainLabelP , patternP , DomainPattern(..) , LabelPattern(..) , encodedLength , pprPattern ) where import Control.Monad (when) import Data.Char (isAscii, ord) import Data.Coerce (coerce) import Data.Foldable (asum, foldl') import Data.Word (Word8) import Text.Printf (printf) import Control.Applicative.Combinators import Data.Attoparsec.Text (()) import qualified Data.Attoparsec.Text as A import Data.ByteString.Internal (c2w, w2c) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Builder as TB import qualified Data.ByteString as BS -- -- * Domain names -- -- $domainNames -- -- There is no standardized presentation and parsing format for domain names. -- In this library we assume a domain name and pattern to be specified as a text with an ASCII dot @.@ acting as a separator and terminator. -- We do not admit arbitrary unicode codepoints, only ASCII is acceptable. Punycoding, if desired, must be taken care of the user. -- -- Escape sequences -- The domain name and pattern language here allows for the following escape sequences -- -- @ -- \\. gives a dot inside a label, rather than a label separator -- \\\\ gives a backslash inside a label -- \\012 gives an arbitrary octet inside a label as specified by the three octets -- @ -- -- For example: @foo\\.bar.quux.@ is a domain name comprised of two labels @foo.bar@ and @quux@ -- * Patterns -- -- $patterns -- -- Patterns can be simple absolute domain names, where labels can be replaced with either a single glob @*@ or a globstar @**@. -- A single glob will match any label in its place, where globstar will greedily match as many labels as possible. -- -- Admits the escape sequences from domain names as well as the following -- -- @ -- \\* gives an asterisk inside a label, rather than a glob/globstar. -- @ -- -- -- Note: Currently a globstar is only supported on the left-most label. -- -- Examples or valid patterns are: -- -- @ -- *.foo.bar. -- **.foo.bar. -- foo.*.bar. -- foo.bar.*. -- @ type Parser = A.Parser -- | A domain pattern. newtype DomainPattern = DomainPattern { getDomainPattern :: [LabelPattern] } deriving (Eq, Ord) -- | A domain parsed into labels. Each label is a 'BS.ByteString' rather than 'T.Text' or 'String' because a label can contain arbitrary bytes. -- However, the 'Ord' and 'Eq' instances do limited case-folding according to [RFC4343](https://datatracker.ietf.org/doc/html/rfc4343#section-3). newtype Domain = Domain { getDomain :: [DomainLabel] } deriving (Eq, Ord) -- | Newtype warpper for 'BS.ByteString' that implements case-insensitive 'Eq' and 'Ord' as per [RFC4343](https://datatracker.ietf.org/doc/html/rfc4343#section-3). newtype DomainLabel = DomainLabel { getDomainLabel :: BS.ByteString } deriving (Ord) instance Eq DomainLabel where DomainLabel l == DomainLabel r = BS.map foldCase_ l == BS.map foldCase_ r foldCase :: Domain -> Domain foldCase (Domain ls) = Domain (foldCaseLabel <$> ls) -- | Case-folding according to [RFC4343](https://datatracker.ietf.org/doc/html/rfc4343#section-3). foldCaseLabel :: DomainLabel -> DomainLabel foldCaseLabel (DomainLabel l) = DomainLabel (BS.map foldCase_ l) foldCase_ :: Word8 -> Word8 foldCase_ x = case x of 0x41 -> c2w 'a' 0x42 -> c2w 'b' 0x43 -> c2w 'c' 0x44 -> c2w 'd' 0x45 -> c2w 'e' 0x46 -> c2w 'f' 0x47 -> c2w 'g' 0x48 -> c2w 'h' 0x49 -> c2w 'i' 0x4a -> c2w 'j' 0x4b -> c2w 'k' 0x4c -> c2w 'l' 0x4d -> c2w 'm' 0x4e -> c2w 'n' 0x4f -> c2w 'o' 0x50 -> c2w 'p' 0x51 -> c2w 'q' 0x52 -> c2w 'r' 0x53 -> c2w 's' 0x54 -> c2w 't' 0x55 -> c2w 'u' 0x56 -> c2w 'v' 0x57 -> c2w 'w' 0x58 -> c2w 'x' 0x59 -> c2w 'y' 0x5a -> c2w 'z' _ -> x -- | Print an arbitrary domain into a presentation format. -- -- This function nearly roundtrips with 'parseAbsDomain' in the sense that octet escape sequences might change case or drop a leading zero. -- -- prop> parseAbsDomain . pretty ~~~ id pprDomain :: Domain -> T.Text pprDomain (Domain l) = TL.toStrict (TB.toLazyText build) where build :: TB.Builder build = foldl' (\buf x -> buf <> buildLabel x <> ".") mempty l -- | Print a singular domain label into a presentation format. pprDomainLabel :: DomainLabel -> T.Text pprDomainLabel = TL.toStrict . TB.toLazyText . buildLabel -- | Print domain into presentation format. -- -- This function nearly roundtrips with 'parsePattern' in the sense that octet escape sequences might change case or drop a leading zero. -- -- prop> parsePattern . pprPattern ~~~ id pprPattern :: DomainPattern -> T.Text pprPattern (DomainPattern l) = TL.toStrict (TB.toLazyText build) where build :: TB.Builder build = foldl' (\buf x -> buf <> buildLabelPattern x <> ".") mempty l buildLabel :: DomainLabel -> TB.Builder buildLabel (DomainLabel d) = BS.foldl' (\buf x -> buf <> go x) mempty d where go :: Word8 -> TB.Builder go 0x2e = "\\." go 0x5c = "\\\\" go c | c > 0x20 && c <= 0x7E = TB.singleton (w2c c) | otherwise = "\\" <> TB.fromString (printf "%03o" c) buildLabelPattern :: LabelPattern -> TB.Builder buildLabelPattern (DomLiteral d) = BS.foldl' (\buf x -> buf <> go x) mempty d where go :: Word8 -> TB.Builder go 0x2e = "\\." go 0x5c = "\\\\" go 0x2a = "\\*" go c | c > 0x20 && c <= 0x7E = TB.singleton (w2c c) | otherwise = "\\" <> TB.fromString (printf "%03o" c) buildLabelPattern DomGlob = "*" buildLabelPattern DomGlobStar = "**" -- | A pattern for a singular label. data LabelPattern = DomLiteral BS.ByteString -- ^ Represents an exact label that must be matched. | DomGlob -- ^ Represents a single asterisk glob matching any arbitrary domain at a given level. | DomGlobStar -- ^ Represents a double asterisk matching any arbitrary subdomain at a given level. deriving (Eq, Ord, Show) -- | Given a pattern and a DNS zone specified by a domain name, test whether or not the pattern -- is applicable beneath that zone. -- -- @ -- foo.*.bar. applicable inside zone quux.bar. -- foo.bar. applicable inside zone bar. -- bar. applicable inside zone bar. -- foo.bar. not applicable inside zone quux. -- @ patternWorksInside :: DomainPattern -> Domain -> Bool patternWorksInside (DomainPattern x) (Domain y) = go (reverse x) (reverse y) where go :: [LabelPattern] -> [DomainLabel] -> Bool go [DomGlobStar] _ = True go [] [] = True go [] _ls = False go _p [] = True go (p:ps) (l:ls) = labelMatchesPattern (getDomainLabel l) p && go ps ls -- | Test whether a given domain matches a 'DomainPattern' matchesPattern :: Domain -> DomainPattern -> Bool matchesPattern (Domain x) (DomainPattern y) = go (reverse x) (reverse y) where go :: [DomainLabel] -> [LabelPattern] -> Bool go [] [] = True go [] _ps = False go _ls [] = False go _ls [DomGlobStar] = True go (l:ls) (p:ps) = labelMatchesPattern (getDomainLabel l) p && go ls ps -- | Test whether a single label matches a label pattern labelMatchesPattern :: BS.ByteString -> LabelPattern -> Bool labelMatchesPattern _l DomGlob = True labelMatchesPattern l (DomLiteral p) = l == p labelMatchesPattern _l DomGlobStar = True -- | Parser for domain patterns. See 'parsePattern' for a convenince wrapper. patternP :: Parser DomainPattern patternP = asum [ do p <- litGlobStar <* A.char '.' ps <- litGlob `endBy` A.char '.' pure (DomainPattern (p:ps)) , DomainPattern [] <$ A.char '.' -- Literal pattern of the root domain "." ] where litGlobStar :: Parser LabelPattern litGlobStar = asum [ DomLiteral <$> patternLabelP , DomGlobStar <$ A.string "**" , DomGlob <$ A.char '*' ] litGlob :: Parser LabelPattern litGlob = asum [ DomLiteral <$> patternLabelP , DomGlob <$ A.char '*' ] -- | Parse a domain pattern. Convenience wrapper for 'patternP. parsePattern :: T.Text -> Either String DomainPattern parsePattern = A.parseOnly (patternP <* A.endOfInput) -- | Parse an absolute domain. Convenience wrapper for 'absDomainP'. parseAbsDomain :: T.Text -> Either String Domain parseAbsDomain = A.parseOnly (absDomainP <* A.endOfInput) -- | Parse a singular domain label. Convenience wrapper for 'domainLabelP'. parseDomainLabel :: T.Text -> Either String DomainLabel parseDomainLabel = A.parseOnly (domainLabelP <* A.endOfInput) -- | Version of parseAbsDomain that also considers a domain name without a trailing dot -- to be absolute. parseAbsDomainRelax :: T.Text -> Either String Domain parseAbsDomainRelax = A.parseOnly (absDomainRelaxP <* A.endOfInput) -- | Parser for absolute domains. See 'parseAbsDomainRelax' for a convenience warpper. -- This variant differs from 'absDomainP' in that it does not care whether the domain -- name is terminated with a dot. absDomainRelaxP :: Parser Domain absDomainRelaxP = do d <- go let l = encodedLength d when (l >= 255) (fail "domain name too long") pure d where go = Domain <$> domainLabelP `sepBy1` A.char '.' <* optional (A.char '.') -- | Calculate the wire-encoded length of a domain name. encodedLength :: Domain -> Int encodedLength (Domain labels) = sum (BS.length <$> l') + length l' where l' = coerce labels :: [BS.ByteString] -- | Parser for absolute domains. See 'parseAbsDomain' for a convenience wrapper. -- For a parser that also admits domain forms without a leading dot, see 'absDomainRelaxP'. absDomainP :: Parser Domain absDomainP = do d <- go let l = encodedLength d when (l >= 255) (fail "domain name too long") pure d where go = Domain <$> asum [ domainLabelP `endBy1` A.char '.' , [] <$ A.char '.' -- The root domain itself ] patternLabelP :: Parser BS.ByteString patternLabelP = do r <- BS.pack <$> (some labelChar) when (BS.length r >= 64) (fail "label must not be longer than 63 octets") pure r where labelChar :: Parser Word8 labelChar = do c <- A.satisfy (\x -> x /= '.' && x /= '*' && isAscii x ) "pattern label character" case c of '\\' -> escapable _ -> pure (c2w c) escapable :: Parser Word8 escapable = asum [ c2w <$> A.char '.' , c2w <$> A.char '\\' , c2w <$> A.char '*' , octal ] "escapable character" octal :: Parser Word8 octal = do o1 <- v <$> A.satisfy isOctal o2 <- v <$> A.satisfy isOctal o3 <- v <$> A.satisfy isOctal pure (fromIntegral (o1 * 64 + o2 * 8 + o3)) where v c = ord c - 48 isOctal :: Char -> Bool isOctal c = c >= '0' && c <= '7' -- | Parser for a singular domain label. See 'parseDomainLabel' for a convenince wrapper. Also see 'absDomainP'. domainLabelP :: Parser DomainLabel domainLabelP = DomainLabel . BS.pack <$> (some labelChar) where labelChar :: Parser Word8 labelChar = do c <- A.satisfy (\x -> x /= '.' && isAscii x ) "domain label character" case c of '\\' -> escapable _ -> pure (c2w c) escapable :: Parser Word8 escapable = asum [ c2w <$> A.char '.' , c2w <$> A.char '\\' , octal ] "escapable character"