{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE StrictData          #-}
{- |
   Module      : Text.Pandoc.Transforms
   Copyright   : © 2006-2024 John MacFarlane
   License     : GPL-2.0-or-later
   Maintainer  : John MacFarlane <jgm@berkeley@edu>

Transformation of a Pandoc document post-parsing
-}
module Text.Pandoc.Transforms
  ( Transform
  , applyTransforms
  , adjustLinksAndIds
  , eastAsianLineBreakFilter
  , filterIpynbOutput
  , headerShift
  ) where

import Data.List (sortOn)
import Data.Text (Text)
import Network.URI (unEscapeString)
import Text.DocLayout (charWidth)
import Text.Pandoc.Definition
  ( Pandoc (..), Attr, Block (..), Format (..), Inline (..) )
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Options (Extensions)
import Text.Pandoc.Shared (stringify, textToIdentifier)
import Text.Pandoc.Walk (walk)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B

-- | Transformation of a Pandoc document post-parsing
type Transform = Pandoc -> Pandoc

-- | Apply a list of transforms to a document, in order.
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
applyTransforms :: forall (m :: * -> *). Monad m => [Transform] -> Pandoc -> m Pandoc
applyTransforms [Transform]
transforms Pandoc
d = Pandoc -> m Pandoc
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pandoc -> m Pandoc) -> Pandoc -> m Pandoc
forall a b. (a -> b) -> a -> b
$ (Transform -> Transform) -> Pandoc -> [Transform] -> Pandoc
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Transform -> Transform
forall a b. (a -> b) -> a -> b
($) Pandoc
d [Transform]
transforms

-- | Prefixes identifiers with a string derived from the filepath of
-- @thisfile@; fixes links to targets in @allfiles@ accordingly.
adjustLinksAndIds :: Extensions        -- ^ defines how IDs are generated
                  -> Text              -- ^ thisfile
                  -> [Text]            -- ^ allfiles
                  -> Transform
adjustLinksAndIds :: Extensions -> Text -> [Text] -> Transform
adjustLinksAndIds Extensions
exts Text
thisfile [Text]
allfiles
  | [Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
allfiles Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 = (Inline -> Inline) -> Transform
forall a b. Walkable a b => (a -> a) -> b -> b
walk Inline -> Inline
fixInline Transform -> Transform -> Transform
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Block -> Block) -> Transform
forall a b. Walkable a b => (a -> a) -> b -> b
walk Block -> Block
fixBlock
  | Bool
otherwise           = Transform
forall a. a -> a
id
 where
  -- fix ids in blocks
  fixBlock :: Block -> Block
  fixBlock :: Block -> Block
fixBlock (CodeBlock Attr
attr Text
t) = Attr -> Text -> Block
CodeBlock (Attr -> Attr
fixAttrs Attr
attr) Text
t
  fixBlock (Header Int
lev Attr
attr [Inline]
ils) = Int -> Attr -> [Inline] -> Block
Header Int
lev (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils
  fixBlock (Table Attr
attr Caption
cap [ColSpec]
cols TableHead
th [TableBody]
tbs TableFoot
tf) =
     Attr
-> Caption
-> [ColSpec]
-> TableHead
-> [TableBody]
-> TableFoot
-> Block
Table (Attr -> Attr
fixAttrs Attr
attr) Caption
cap [ColSpec]
cols TableHead
th [TableBody]
tbs TableFoot
tf
  fixBlock (Div Attr
attr [Block]
bs) = Attr -> [Block] -> Block
Div (Attr -> Attr
fixAttrs Attr
attr) [Block]
bs
  fixBlock Block
x = Block
x

  -- fix ids and links in inlines
  fixInline :: Inline -> Inline
  fixInline :: Inline -> Inline
fixInline (Code Attr
attr Text
t) = Attr -> Text -> Inline
Code (Attr -> Attr
fixAttrs Attr
attr) Text
t
  fixInline (Link Attr
attr [Inline]
ils (Text
url,Text
tit)) =
    Attr -> [Inline] -> (Text, Text) -> Inline
Link (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils (Text -> Text
fixURL Text
url,Text
tit)
  fixInline (Image Attr
attr [Inline]
ils (Text
url,Text
tit)) =
    Attr -> [Inline] -> (Text, Text) -> Inline
Image (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils (Text -> Text
fixURL Text
url,Text
tit)
  fixInline (Span Attr
attr [Inline]
ils) = Attr -> [Inline] -> Inline
Span (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils
  fixInline Inline
x = Inline
x

  -- add thisfile as prefix of identifier
  fixAttrs :: Attr -> Attr
  fixAttrs :: Attr -> Attr
fixAttrs (Text
i,[Text]
cs,[(Text, Text)]
kvs)
    | Text -> Bool
T.null Text
i = (Text
i,[Text]
cs,[(Text, Text)]
kvs)
    | Bool
otherwise =
        (Text -> [Text] -> Text
T.intercalate Text
"__"
          ((Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text -> Text
toIdent Text
thisfile, Text
i]),
        [Text]
cs, [(Text, Text)]
kvs)

  -- turns a filepath into an identifier
  toIdent :: Text -> Text
  toIdent :: Text -> Text
toIdent = Extensions -> Text -> Text
textToIdentifier Extensions
exts (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text] -> Text
T.intercalate Text
"__" ([Text] -> Text) -> (Text -> [Text]) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            (Char -> Bool) -> Text -> [Text]
T.split (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\\')

  -- if URL begins with file from allfiles, convert to
  -- an internal link with the appropriate identifier
  fixURL :: Text -> Text
  fixURL :: Text -> Text
fixURL Text
u =
    let (Text
a,Text
b) = (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
unEscapeString (String -> String) -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
u
        filepart :: Text
filepart = if Text -> Bool
T.null Text
a
                      then Text -> Text
toIdent Text
thisfile
                      else Text -> Text
toIdent Text
a
        fragpart :: Text
fragpart = (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') Text
b
     in if Text -> Bool
T.null Text
a Bool -> Bool -> Bool
|| Text
a Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
allfiles
           then Text
"#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> [Text] -> Text
T.intercalate Text
"__"
                         ((Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text
filepart, Text
fragpart])
           else Text
u

-- | Process ipynb output cells.  If mode is Nothing,
-- remove all output.  If mode is Just format, select
-- best output for the format.  If format is not ipynb,
-- strip out ANSI escape sequences from CodeBlocks (see #5633).
filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc
filterIpynbOutput :: Maybe Format -> Transform
filterIpynbOutput Maybe Format
mode = (Block -> Block) -> Transform
forall a b. Walkable a b => (a -> a) -> b -> b
walk Block -> Block
go
  where go :: Block -> Block
go (Div (Text
ident, Text
"output":[Text]
os, [(Text, Text)]
kvs) [Block]
bs) =
          case Maybe Format
mode of
            Maybe Format
Nothing  -> Attr -> [Block] -> Block
Div (Text
ident, Text
"output"Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
os, [(Text, Text)]
kvs) []
            -- "best" for ipynb includes all formats:
            Just Format
fmt
              | Format
fmt Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Format
Format Text
"ipynb"
                          -> Attr -> [Block] -> Block
Div (Text
ident, Text
"output"Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
os, [(Text, Text)]
kvs) [Block]
bs
              | Bool
otherwise -> Attr -> [Block] -> Block
Div (Text
ident, Text
"output"Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
os, [(Text, Text)]
kvs) ([Block] -> Block) -> [Block] -> Block
forall a b. (a -> b) -> a -> b
$
                              (Block -> Block) -> [Block] -> [Block]
forall a b. Walkable a b => (a -> a) -> b -> b
walk Block -> Block
removeANSI ([Block] -> [Block]) -> [Block] -> [Block]
forall a b. (a -> b) -> a -> b
$
                              Int -> [Block] -> [Block]
forall a. Int -> [a] -> [a]
take Int
1 ([Block] -> [Block]) -> [Block] -> [Block]
forall a b. (a -> b) -> a -> b
$ (Block -> Int) -> [Block] -> [Block]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn Block -> Int
rank [Block]
bs
                 where
                  rank :: Block -> Int
rank (RawBlock (Format Text
"html") Text
_)
                    | Format
fmt Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Format
Format Text
"html" = Int
1 :: Int
                    | Format
fmt Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Format
Format Text
"markdown" = Int
3
                    | Bool
otherwise = Int
4
                  rank (RawBlock (Format Text
"latex") Text
_)
                    | Format
fmt Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Format
Format Text
"latex" = Int
1
                    | Format
fmt Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Format
Format Text
"markdown" = Int
3
                    | Bool
otherwise = Int
4
                  rank (RawBlock Format
f Text
_)
                    | Format
fmt Format -> Format -> Bool
forall a. Eq a => a -> a -> Bool
== Format
f = Int
1
                    | Bool
otherwise = Int
4
                  rank (Para [Image{}]) = Int
2
                  rank Block
_ = Int
3
                  removeANSI :: Block -> Block
removeANSI (CodeBlock Attr
attr Text
code) =
                    Attr -> Text -> Block
CodeBlock Attr
attr (Text -> Text
removeANSIEscapes Text
code)
                  removeANSI Block
x = Block
x
                  removeANSIEscapes :: Text -> Text
removeANSIEscapes Text
t
                    | Just Text
cs <- Text -> Text -> Maybe Text
T.stripPrefix Text
"\x1b[" Text
t =
                        Text -> Text
removeANSIEscapes (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
1 (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'm') Text
cs
                    | Just (Char
c, Text
cs) <- Text -> Maybe (Char, Text)
T.uncons Text
t = Char -> Text -> Text
T.cons Char
c (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
removeANSIEscapes Text
cs
                    | Bool
otherwise = Text
""
        go Block
x = Block
x

-- | Remove soft breaks between East Asian characters.
eastAsianLineBreakFilter :: Pandoc -> Pandoc
eastAsianLineBreakFilter :: Transform
eastAsianLineBreakFilter = ([Inline] -> [Inline]) -> Transform
forall a b. (Data a, Data b) => (a -> a) -> b -> b
bottomUp [Inline] -> [Inline]
go
  where go :: [Inline] -> [Inline]
go (Inline
x:Inline
SoftBreak:Inline
y:[Inline]
zs)
          | Just (Text
_, Char
b) <- Text -> Maybe (Text, Char)
T.unsnoc (Text -> Maybe (Text, Char)) -> Text -> Maybe (Text, Char)
forall a b. (a -> b) -> a -> b
$ Inline -> Text
forall a. Walkable Inline a => a -> Text
stringify Inline
x
          , Just (Char
c, Text
_) <- Text -> Maybe (Char, Text)
T.uncons (Text -> Maybe (Char, Text)) -> Text -> Maybe (Char, Text)
forall a b. (a -> b) -> a -> b
$ Inline -> Text
forall a. Walkable Inline a => a -> Text
stringify Inline
y
          , Char -> Int
charWidth Char
b Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2
          , Char -> Int
charWidth Char
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2
          = Inline
xInline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
:Inline
yInline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
:[Inline]
zs
          | Bool
otherwise
          = Inline
xInline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
:Inline
SoftBreakInline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
:Inline
yInline -> [Inline] -> [Inline]
forall a. a -> [a] -> [a]
:[Inline]
zs
        go [Inline]
xs
          = [Inline]
xs

-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
headerShift :: Int -> Transform
headerShift Int
n (Pandoc Meta
meta (Header Int
m Attr
_ [Inline]
ils : [Block]
bs))
  | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0
  , Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int -> Transform
headerShift Int
n Transform -> Transform
forall a b. (a -> b) -> a -> b
$
                 Inlines -> Transform
B.setTitle ([Inline] -> Inlines
forall a. [a] -> Many a
B.fromList [Inline]
ils) Transform -> Transform
forall a b. (a -> b) -> a -> b
$ Meta -> [Block] -> Pandoc
Pandoc Meta
meta [Block]
bs
headerShift Int
n (Pandoc Meta
meta [Block]
bs) = Meta -> [Block] -> Pandoc
Pandoc Meta
meta ((Block -> Block) -> [Block] -> [Block]
forall a b. Walkable a b => (a -> a) -> b -> b
walk Block -> Block
shift [Block]
bs)

 where
   shift :: Block -> Block
   shift :: Block -> Block
shift (Header Int
level Attr
attr [Inline]
inner)
     | Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0  = Int -> Attr -> [Inline] -> Block
Header (Int
level Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n) Attr
attr [Inline]
inner
     | Bool
otherwise      = [Inline] -> Block
Para [Inline]
inner
   shift Block
x            = Block
x