{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Ipynb ( Notebook(..)
, NbV3
, NbV4
, JSONMeta
, Cell(..)
, Source(..)
, CellType(..)
, Output(..)
, MimeType
, MimeData(..)
, MimeBundle(..)
, breakLines
)
where
import Control.Applicative ((<|>))
import Control.Monad (when)
import Data.Aeson as Aeson
import qualified Data.Aeson.Types as Aeson
import Data.ByteString (ByteString)
import qualified Data.ByteString.Base64 as Base64
import Data.Char (isSpace)
import qualified Data.HashMap.Strict as HM
import Data.List (partition)
import qualified Data.Map as M
import Data.Maybe (mapMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import GHC.Generics
import Prelude
#if MIN_VERSION_base(4,11,0)
#else
import Data.Semigroup
#endif
data NbV3
data NbV4
data Notebook a = Notebook
{ notebookMetadata :: JSONMeta
, notebookFormat :: (Int, Int)
, notebookCells :: [Cell a]
} deriving (Show, Eq, Generic)
instance Semigroup (Notebook a) where
Notebook m1 f1 c1 <> Notebook m2 f2 c2 =
Notebook (m1 <> m2) (max f1 f2) (c1 <> c2)
instance Monoid (Notebook a) where
mempty = Notebook mempty (0, 0) mempty
#if MIN_VERSION_base(4,11,0)
#else
mappend = (<>)
#endif
instance FromJSON (Notebook NbV4) where
parseJSON = withObject "Notebook" $ \v -> do
fmt <- v .:? "nbformat" .!= 0
when (fmt < 4 || fmt > 4) $ fail "expected nbformat == 4"
fmtminor <- v .:? "nbformat_minor" .!= 0
metadata <- v .:? "metadata" .!= mempty
cells <- v .: "cells"
return
Notebook{ notebookMetadata = metadata
, notebookFormat = (fmt, fmtminor)
, notebookCells = cells
}
instance FromJSON (Notebook NbV3) where
parseJSON = withObject "Notebook" $ \v -> do
fmt <- v .:? "nbformat" .!= 0
when (fmt > 3) $ fail "expected nbformat <= 3"
fmtminor <- v .:? "nbformat_minor" .!= 0
metadata <- v .:? "metadata" .!= mempty
worksheets <- v .: "worksheets"
cells <- mconcat <$> mapM (.: "cells") worksheets
return
Notebook{ notebookMetadata = metadata
, notebookFormat = (fmt, fmtminor)
, notebookCells = cells
}
instance ToJSON (Notebook NbV4) where
toJSON n = object
[ "nbformat" .= fst (notebookFormat n)
, "nbformat_minor" .= snd (notebookFormat n)
, "metadata" .= notebookMetadata n
, "cells" .= (if notebookFormat n >= (4,1)
then id
else map (\c -> c{ cellAttachments = Nothing }))
(notebookCells n)
]
instance ToJSON (Notebook NbV3) where
toJSON n = object
[ "nbformat" .= fst (notebookFormat n)
, "nbformat_minor" .= snd (notebookFormat n)
, "metadata" .= notebookMetadata n
, "worksheets" .=
[ object
[ "cells" .= (if notebookFormat n >= (4,1)
then id
else map (\c -> c{ cellAttachments = Nothing }))
(notebookCells n)
, "metadata" .= (mempty :: JSONMeta)
]
]
]
type JSONMeta = M.Map Text Value
newtype Source = Source{ unSource :: [Text] }
deriving (Show, Eq, Generic, Semigroup, Monoid)
instance FromJSON Source where
parseJSON v = do
ts <- parseJSON v <|> (:[]) <$> parseJSON v
return $ Source ts
instance ToJSON Source where
toJSON (Source ts) = toJSON ts
data Cell a = Cell
{ cellType :: CellType a
, cellSource :: Source
, cellMetadata :: JSONMeta
, cellAttachments :: Maybe (M.Map Text MimeBundle)
} deriving (Show, Eq, Generic)
instance FromJSON (Cell NbV4) where
parseJSON = withObject "Cell" $ \v -> do
ty <- v .: "cell_type"
cell_type <-
case ty of
"markdown" -> pure Markdown
"raw" -> pure Raw
"code" ->
Code
<$> v .:? "execution_count"
<*> v .: "outputs"
_ -> fail $ "Unknown cell_type " ++ ty
metadata <- v .: "metadata"
attachments <- v .:? "attachments"
source <- v .: "source"
return
Cell{ cellType = cell_type
, cellMetadata = metadata
, cellAttachments = attachments
, cellSource = source
}
instance FromJSON (Cell NbV3) where
parseJSON = withObject "Cell" $ \v -> do
ty <- v .: "cell_type"
cell_type <-
case ty of
"markdown" -> pure Markdown
"heading" -> Heading <$> v .: "level"
"raw" -> pure Raw
"code" ->
Code
<$> v .:? "prompt_number"
<*> v .: "outputs"
_ -> fail $ "Unknown cell_type " ++ ty
metadata <- parseV3Metadata v
source <- if ty == "code"
then v .: "input"
else v .: "source"
return
Cell{ cellType = cell_type
, cellMetadata = metadata
, cellAttachments = Nothing
, cellSource = source
}
instance ToJSON (Cell NbV4) where
toJSON c = object $
("metadata" .= cellMetadata c) :
maybe [] (\x -> ["attachments" .= x]) (cellAttachments c) ++
case cellType c of
Markdown -> [ "cell_type" .= ("markdown" :: Text)
, "source" .= cellSource c ]
Heading lev ->
[ "cell_type" .= ("markdown" :: Text)
, "source" .=
(Source . breakLines .
((T.replicate lev "#" <> " ") <>) .
mconcat . unSource) (cellSource c)
]
Raw -> [ "cell_type" .= ("raw" :: Text)
, "source" .= cellSource c
]
Code{
codeExecutionCount = ec
, codeOutputs = outs
} -> [ "cell_type" .= ("code" :: Text)
, "execution_count" .= ec
, "outputs" .= outs
, "source" .= cellSource c
]
instance ToJSON (Cell NbV3) where
toJSON c =
object $
metadataToV3Pairs (cellMetadata c) ++
case cellType c of
Markdown -> [ "cell_type" .= ("markdown" :: Text)
, "source" .= cellSource c
]
Heading lev -> [ "cell_type" .= ("heading" :: Text)
, "level" .= lev
, "source" .= cellSource c
]
Raw -> [ "cell_type" .= ("raw" :: Text)
, "source" .= cellSource c
]
Code{
codeExecutionCount = ec
, codeOutputs = outs
} -> [ "cell_type" .= ("code" :: Text)
, "input" .= cellSource c
, "outputs" .= outs
] ++
maybe [] (\n -> ["prompt_number" .= n]) ec
metadataToV3Pairs :: JSONMeta -> [Aeson.Pair]
metadataToV3Pairs meta =
("metadata" .= M.fromList regMeta) : map toPair extraMeta
where (extraMeta, regMeta) = partition isExtraMeta $ M.toList meta
toPair (k,v) = k .= v
v3MetaInMainCell :: [Text]
v3MetaInMainCell = ["collapsed", "language"]
isExtraMeta :: (Text, a) -> Bool
isExtraMeta (k,_) = k `elem` v3MetaInMainCell
parseV3Metadata :: HM.HashMap Text Value -> Aeson.Parser JSONMeta
parseV3Metadata v = do
meta <- v .:? "metadata" .!= mempty
let extraMeta = M.fromList $ filter isExtraMeta $ HM.toList v
return (meta <> extraMeta)
data CellType a =
Markdown
| Heading
{ headingLevel :: Int
}
| Raw
| Code
{ codeExecutionCount :: Maybe Int
, codeOutputs :: [Output a]
}
deriving (Show, Eq, Generic)
data Output a =
Stream
{ streamName :: Text
, streamText :: Source }
| DisplayData
{ displayData :: MimeBundle
, displayMetadata :: JSONMeta
}
| ExecuteResult
{ executeCount :: Int
, executeData :: MimeBundle
, executeMetadata :: JSONMeta
}
| Err
{ errName :: Text
, errValue :: Text
, errTraceback :: [Text]
}
deriving (Show, Eq, Generic)
instance FromJSON (Output NbV4) where
parseJSON = withObject "Object" $ \v -> do
ty <- v .: "output_type"
case ty of
"stream" ->
Stream
<$> v .: "name"
<*> v .: "text"
"display_data" ->
DisplayData
<$> v .: "data"
<*> v .:? "metadata" .!= mempty
"execute_result" ->
ExecuteResult
<$> v .: "execution_count"
<*> v .: "data"
<*> v .:? "metadata" .!= mempty
"error" ->
Err
<$> v .: "ename"
<*> v .: "evalue"
<*> v .: "traceback"
_ -> fail $ "Unknown object_type " ++ ty
instance FromJSON (Output NbV3) where
parseJSON = withObject "Object" $ \v -> do
ty <- v .: "output_type"
case ty of
"stream" ->
Stream
<$> v .: "stream"
<*> v .: "text"
"display_data" ->
DisplayData
<$> extractNbV3Data v
<*> v .:? "metadata" .!= mempty
"pyout" ->
ExecuteResult
<$> v .: "prompt_number"
<*> extractNbV3Data v
<*> v .:? "metadata" .!= mempty
"pyerr" ->
Err
<$> v .: "ename"
<*> v .: "evalue"
<*> v .: "traceback"
_ -> fail $ "Unknown object_type " ++ ty
extractNbV3Data :: Aeson.Object -> Aeson.Parser MimeBundle
extractNbV3Data v = do
let go ("output_type", _) = Nothing
go ("metadata", _) = Nothing
go ("prompt_number", _) = Nothing
go ("text", x) = Just ("text/plain", x)
go ("latex", x) = Just ("text/latex", x)
go ("html", x) = Just ("text/html", x)
go ("png", x) = Just ("image/png", x)
go ("jpeg", x) = Just ("image/jpeg", x)
go ("javascript", x) = Just ("application/javascript", x)
go (_, _) = Nothing
parseJSON (Object . HM.fromList . mapMaybe go . HM.toList $ v)
instance ToJSON (Output NbV4) where
toJSON s@Stream{} = object
[ "output_type" .= ("stream" :: Text)
, "name" .= streamName s
, "text" .= streamText s
]
toJSON d@DisplayData{} = object
[ "output_type" .= ("display_data" :: Text)
, "data" .= displayData d
, "metadata" .= displayMetadata d
]
toJSON e@ExecuteResult{} = object
[ "output_type" .= ("execute_result" :: Text)
, "execution_count" .= executeCount e
, "data" .= executeData e
, "metadata" .= executeMetadata e
]
toJSON e@Err{} = object
[ "output_type" .= ("error" :: Text)
, "ename" .= errName e
, "evalue" .= errValue e
, "traceback" .= errTraceback e
]
instance ToJSON (Output NbV3) where
toJSON s@Stream{} = object
[ "output_type" .= ("stream" :: Text)
, "stream" .= streamName s
, "text" .= streamText s
]
toJSON d@DisplayData{} =
adjustV3DataFields $ object
[ "output_type" .= ("display_data" :: Text)
, "data" .= displayData d
, "metadata" .= displayMetadata d ]
toJSON e@ExecuteResult{} =
adjustV3DataFields $ object
[ "output_type" .= ("pyout" :: Text)
, "prompt_number" .= executeCount e
, "data" .= executeData e
, "metadata" .= executeMetadata e ]
toJSON e@Err{} = object
[ "output_type" .= ("pyerr" :: Text)
, "ename" .= errName e
, "evalue" .= errValue e
, "traceback" .= errTraceback e
]
adjustV3DataFields :: Value -> Value
adjustV3DataFields (Object hm) =
case HM.lookup "data" hm of
Just (Object dm) -> Object $
HM.delete "data" $ foldr
(\(k, v) -> HM.insert (modKey k) v) hm
(HM.toList dm)
_ -> Object hm
where modKey "text/plain" = "text"
modKey "text/latex" = "latex"
modKey "text/html" = "html"
modKey "image/jpeg" = "jpeg"
modKey "image/png" = "png"
modKey "application/javascript" = "javascript"
modKey x = x
adjustV3DataFields x = x
data MimeData =
BinaryData ByteString
| TextualData Text
| JsonData Value
deriving (Show, Eq, Generic)
type MimeType = Text
newtype MimeBundle = MimeBundle{ unMimeBundle :: M.Map MimeType MimeData }
deriving (Show, Eq, Generic, Semigroup, Monoid)
instance FromJSON MimeBundle where
parseJSON v = do
m <- parseJSON v >>= mapM pairToMimeData . M.toList
return $ MimeBundle $ M.fromList m
pairToMimeData :: (MimeType, Value) -> Aeson.Parser (MimeType, MimeData)
pairToMimeData (mt, v)
| mt == "application/json" ||
"+json" `T.isSuffixOf` mt = return (mt, JsonData v)
pairToMimeData (mt, v) = do
t <- parseJSON v <|> (mconcat <$> parseJSON v)
let mimeprefix = T.takeWhile (/='/') mt
if mimeprefix == "text"
then return (mt, TextualData t)
else
case Base64.decode (TE.encodeUtf8 (T.filter (not . isSpace) t)) of
Left _ -> return (mt, TextualData t)
Right b -> return (mt, BinaryData b)
instance ToJSON MimeBundle where
toJSON (MimeBundle m) =
let mimeBundleToValue (BinaryData bs) =
toJSON $ TE.decodeUtf8 . Base64.joinWith "\n" 76 . Base64.encode $ bs
mimeBundleToValue (JsonData v) = v
mimeBundleToValue (TextualData t) = toJSON (breakLines t)
in toJSON $ M.map mimeBundleToValue m
breakLines :: Text -> [Text]
breakLines t =
let (x, y) = T.break (=='\n') t
in case T.uncons y of
Nothing -> if T.null x then [] else [x]
Just (c, rest) -> (x <> T.singleton c) : breakLines rest