{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Ipynb ( Notebook(..)
, NbV3
, NbV4
, JSONMeta
, Cell(..)
, Source(..)
, CellType(..)
, Output(..)
, MimeType
, MimeData(..)
, MimeBundle(..)
, breakLines
)
where
import Control.Applicative ((<|>))
import Control.Monad (when)
import Data.Aeson as Aeson
import qualified Data.Aeson.Types as Aeson
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Base64 as Base64
import Data.Char (isSpace)
import qualified Data.HashMap.Strict as HM
import Data.List (partition)
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import GHC.Generics
import Prelude
#if MIN_VERSION_base(4,11,0)
#else
import Data.Semigroup
#endif
data NbV3
data NbV4
data Notebook a = Notebook
{ notebookMetadata :: JSONMeta
, notebookFormat :: (Int, Int)
, notebookCells :: [Cell a]
} deriving (Show, Eq, Generic)
instance Semigroup (Notebook a) where
Notebook m1 f1 c1 <> Notebook m2 f2 c2 =
Notebook (m1 <> m2) (max f1 f2) (c1 <> c2)
instance Monoid (Notebook a) where
mempty = Notebook mempty (0, 0) mempty
#if MIN_VERSION_base(4,11,0)
#else
mappend = (<>)
#endif
instance FromJSON (Notebook NbV4) where
parseJSON = withObject "Notebook" $ \v -> do
fmt <- v .:? "nbformat" .!= 0
when (fmt < 4 || fmt > 4) $ fail "expected nbformat == 4"
fmtminor <- v .:? "nbformat_minor" .!= 0
metadata <- v .:? "metadata" .!= mempty
cells <- v .: "cells"
return
Notebook{ notebookMetadata = metadata
, notebookFormat = (fmt, fmtminor)
, notebookCells = cells
}
instance FromJSON (Notebook NbV3) where
parseJSON = withObject "Notebook" $ \v -> do
fmt <- v .:? "nbformat" .!= 0
when (fmt > 3) $ fail "expected nbformat <= 3"
fmtminor <- v .:? "nbformat_minor" .!= 0
metadata <- v .:? "metadata" .!= mempty
worksheets <- v .: "worksheets"
cells <- mconcat <$> mapM (.: "cells") worksheets
return
Notebook{ notebookMetadata = metadata
, notebookFormat = (fmt, fmtminor)
, notebookCells = cells
}
instance ToJSON (Notebook NbV4) where
toJSON n = object
[ "nbformat" .= fst (notebookFormat n)
, "nbformat_minor" .= snd (notebookFormat n)
, "metadata" .= notebookMetadata n
, "cells" .= (if notebookFormat n >= (4,1)
then id
else map (\c -> c{ cellAttachments = Nothing }))
(notebookCells n)
]
instance ToJSON (Notebook NbV3) where
toJSON n = object
[ "nbformat" .= fst (notebookFormat n)
, "nbformat_minor" .= snd (notebookFormat n)
, "metadata" .= notebookMetadata n
, "worksheets" .=
[ object
[ "cells" .= (if notebookFormat n >= (4,1)
then id
else map (\c -> c{ cellAttachments = Nothing }))
(notebookCells n)
, "metadata" .= (mempty :: JSONMeta)
]
]
]
type JSONMeta = M.Map Text Value
newtype Source = Source{ unSource :: [Text] }
deriving (Show, Eq, Generic, Semigroup, Monoid)
instance FromJSON Source where
parseJSON v = do
ts <- parseJSON v <|> (:[]) <$> parseJSON v
return $ Source ts
instance ToJSON Source where
toJSON (Source ts) = toJSON ts
data Cell a = Cell
{ cellType :: CellType a
, cellSource :: Source
, cellMetadata :: JSONMeta
, cellAttachments :: Maybe (M.Map Text MimeBundle)
} deriving (Show, Eq, Generic)
instance FromJSON (Cell NbV4) where
parseJSON = withObject "Cell" $ \v -> do
ty <- v .: "cell_type"
cell_type <-
case ty of
"markdown" -> pure Markdown
"raw" -> pure Raw
"code" ->
Code
<$> v .:? "execution_count"
<*> v .: "outputs"
_ -> fail $ "Unknown cell_type " ++ ty
metadata <- v .: "metadata"
attachments <- v .:? "attachments"
source <- v .: "source"
return
Cell{ cellType = cell_type
, cellMetadata = metadata
, cellAttachments = attachments
, cellSource = source
}
instance FromJSON (Cell NbV3) where
parseJSON = withObject "Cell" $ \v -> do
ty <- v .: "cell_type"
cell_type <-
case ty of
"markdown" -> pure Markdown
"heading" -> Heading <$> v .: "level"
"raw" -> pure Raw
"code" ->
Code
<$> v .:? "prompt_number"
<*> v .: "outputs"
_ -> fail $ "Unknown cell_type " ++ ty
metadata <- parseV3Metadata v
source <- if ty == "code"
then v .: "input"
else v .: "source"
return
Cell{ cellType = cell_type
, cellMetadata = metadata
, cellAttachments = Nothing
, cellSource = source
}
instance ToJSON (Cell NbV4) where
toJSON c = object $
("metadata" .= cellMetadata c) :
maybe [] (\x -> ["attachments" .= x]) (cellAttachments c) ++
case cellType c of
Markdown -> [ "cell_type" .= ("markdown" :: Text)
, "source" .= cellSource c ]
Heading lev ->
[ "cell_type" .= ("markdown" :: Text)
, "source" .=
(Source . breakLines .
((T.replicate lev "#" <> " ") <>) .
mconcat . unSource) (cellSource c)
]
Raw -> [ "cell_type" .= ("raw" :: Text)
, "source" .= cellSource c
]
Code{
codeExecutionCount = ec
, codeOutputs = outs
} -> [ "cell_type" .= ("code" :: Text)
, "execution_count" .= ec
, "outputs" .= outs
, "source" .= cellSource c
]
instance ToJSON (Cell NbV3) where
toJSON c =
object $
metadataToV3Pairs (cellMetadata c) ++
case cellType c of
Markdown -> [ "cell_type" .= ("markdown" :: Text)
, "source" .= cellSource c
]
Heading lev -> [ "cell_type" .= ("heading" :: Text)
, "level" .= lev
, "source" .= cellSource c
]
Raw -> [ "cell_type" .= ("raw" :: Text)
, "source" .= cellSource c
]
Code{
codeExecutionCount = ec
, codeOutputs = outs
} -> [ "cell_type" .= ("code" :: Text)
, "input" .= cellSource c
, "outputs" .= outs
] ++
maybe [] (\n -> ["prompt_number" .= n]) ec
metadataToV3Pairs :: JSONMeta -> [Aeson.Pair]
metadataToV3Pairs meta =
("metadata" .= M.fromList regMeta) : map toPair extraMeta
where (extraMeta, regMeta) = partition isExtraMeta $ M.toList meta
toPair (k,v) = k .= v
v3MetaInMainCell :: [Text]
v3MetaInMainCell = ["collapsed", "language"]
isExtraMeta :: (Text, a) -> Bool
isExtraMeta (k,_) = k `elem` v3MetaInMainCell
parseV3Metadata :: HM.HashMap Text Value -> Aeson.Parser JSONMeta
parseV3Metadata v = do
meta <- v .:? "metadata" .!= mempty
let extraMeta = M.fromList $ filter isExtraMeta $ HM.toList v
return (meta <> extraMeta)
data CellType a =
Markdown
| Heading
{ headingLevel :: Int
}
| Raw
| Code
{ codeExecutionCount :: Maybe Int
, codeOutputs :: [Output a]
}
deriving (Show, Eq, Generic)
data Output a =
Stream
{ streamName :: Text
, streamText :: Source }
| DisplayData
{ displayData :: MimeBundle
, displayMetadata :: JSONMeta
}
| ExecuteResult
{ executeCount :: Int
, executeData :: MimeBundle
, executeMetadata :: JSONMeta
}
| Err
{ errName :: Text
, errValue :: Text
, errTraceback :: [Text]
}
deriving (Show, Eq, Generic)
instance FromJSON (Output NbV4) where
parseJSON = withObject "Object" $ \v -> do
ty <- v .: "output_type"
case ty of
"stream" ->
Stream
<$> v .: "name"
<*> v .: "text"
"display_data" ->
DisplayData
<$> v .: "data"
<*> v .:? "metadata" .!= mempty
"execute_result" ->
ExecuteResult
<$> v .: "execution_count"
<*> v .: "data"
<*> v .:? "metadata" .!= mempty
"error" ->
Err
<$> v .: "ename"
<*> v .: "evalue"
<*> v .: "traceback"
_ -> fail $ "Unknown object_type " ++ ty
instance FromJSON (Output NbV3) where
parseJSON = withObject "Object" $ \v -> do
ty <- v .: "output_type"
case ty of
"stream" ->
Stream
<$> v .: "stream"
<*> v .: "text"
"display_data" ->
DisplayData
<$> extractNbV3Data v
<*> v .:? "metadata" .!= mempty
"pyout" ->
ExecuteResult
<$> v .: "prompt_number"
<*> extractNbV3Data v
<*> v .:? "metadata" .!= mempty
"pyerr" ->
Err
<$> v .: "ename"
<*> v .: "evalue"
<*> v .: "traceback"
_ -> fail $ "Unknown object_type " ++ ty
extractNbV3Data :: Aeson.Object -> Aeson.Parser MimeBundle
extractNbV3Data v = do
let go ("output_type", _) = Nothing
go ("metadata", _) = Nothing
go ("prompt_number", _) = Nothing
go ("text", x) = Just ("text/plain", x)
go ("latex", x) = Just ("text/latex", x)
go ("html", x) = Just ("text/html", x)
go ("png", x) = Just ("image/png", x)
go ("jpeg", x) = Just ("image/jpeg", x)
go ("javascript", x) = Just ("application/javascript", x)
go (_, _) = Nothing
parseJSON (Object . HM.fromList . mapMaybe go . HM.toList $ v)
instance ToJSON (Output NbV4) where
toJSON s@Stream{} = object
[ "output_type" .= ("stream" :: Text)
, "name" .= streamName s
, "text" .= streamText s
]
toJSON d@DisplayData{} = object
[ "output_type" .= ("display_data" :: Text)
, "data" .= displayData d
, "metadata" .= displayMetadata d
]
toJSON e@ExecuteResult{} = object
[ "output_type" .= ("execute_result" :: Text)
, "execution_count" .= executeCount e
, "data" .= executeData e
, "metadata" .= executeMetadata e
]
toJSON e@Err{} = object
[ "output_type" .= ("error" :: Text)
, "ename" .= errName e
, "evalue" .= errValue e
, "traceback" .= errTraceback e
]
instance ToJSON (Output NbV3) where
toJSON s@Stream{} = object
[ "output_type" .= ("stream" :: Text)
, "stream" .= streamName s
, "text" .= streamText s
]
toJSON d@DisplayData{} =
adjustV3DataFields $ object
[ "output_type" .= ("display_data" :: Text)
, "data" .= displayData d
, "metadata" .= displayMetadata d ]
toJSON e@ExecuteResult{} =
adjustV3DataFields $ object
[ "output_type" .= ("pyout" :: Text)
, "prompt_number" .= executeCount e
, "data" .= executeData e
, "metadata" .= executeMetadata e ]
toJSON e@Err{} = object
[ "output_type" .= ("pyerr" :: Text)
, "ename" .= errName e
, "evalue" .= errValue e
, "traceback" .= errTraceback e
]
adjustV3DataFields :: Value -> Value
adjustV3DataFields (Object hm) =
case HM.lookup "data" hm of
Just (Object dm) -> Object $
HM.delete "data" $ foldr
(\(k, v) -> HM.insert (modKey k) v) hm
(HM.toList dm)
_ -> Object hm
where modKey "text/plain" = "text"
modKey "text/latex" = "latex"
modKey "text/html" = "html"
modKey "image/jpeg" = "jpeg"
modKey "image/png" = "png"
modKey "application/javascript" = "javascript"
modKey x = x
adjustV3DataFields x = x
data MimeData =
BinaryData ByteString
| TextualData Text
| JsonData Value
deriving (Show, Eq, Generic)
type MimeType = Text
newtype MimeBundle = MimeBundle{ unMimeBundle :: M.Map MimeType MimeData }
deriving (Show, Eq, Generic, Semigroup, Monoid)
instance FromJSON MimeBundle where
parseJSON v = do
m <- parseJSON v >>= mapM pairToMimeData . M.toList
return $ MimeBundle $ M.fromList m
pairToMimeData :: (MimeType, Value) -> Aeson.Parser (MimeType, MimeData)
pairToMimeData (mt, v)
| mt == "application/json" ||
"+json" `T.isSuffixOf` mt = return (mt, JsonData v)
pairToMimeData (mt, v) = do
t <- parseJSON v <|> (mconcat <$> parseJSON v)
let mimeprefix = T.takeWhile (/='/') mt
if mimeprefix == "text"
then return (mt, TextualData t)
else
case Base64.decode (TE.encodeUtf8 (T.filter (not . isSpace) t)) of
Left _ -> return (mt, TextualData t)
Right b -> return (mt, BinaryData b)
instance ToJSON MimeBundle where
toJSON (MimeBundle m) =
let mimeBundleToValue (BinaryData bs) =
toJSON .
TE.decodeUtf8 .
(<> "\n") .
B.intercalate "\n" . chunksOf 76 .
Base64.encode
$ bs
mimeBundleToValue (JsonData v) = v
mimeBundleToValue (TextualData t) = toJSON (breakLines t)
in toJSON $ M.map mimeBundleToValue m
chunksOf :: Int -> ByteString -> [ByteString]
chunksOf k s
| B.null s = []
| otherwise =
let (h,t) = B.splitAt k s
in h : chunksOf k t
breakLines :: Text -> [Text]
breakLines t =
let (x, y) = T.break (=='\n') t
in case T.uncons y of
Nothing -> if T.null x then [] else [x]
Just (c, rest) -> (x <> T.singleton c) : breakLines rest