{-# LANGUAGE ApplicativeDo     #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards   #-}
{-|
  Reanimate can automatically synchronize animations to your voice if you have
  a transcript and an audio recording. This works with the help of Gentle
  (<https://lowerquality.com/gentle/>). Accuracy is not perfect but it is pretty
  close, and it is by far the easiest way of adding narration to an animation.
-}
module Reanimate.Voice
  ( Transcript(..)
  , TWord(..)
  , Phone(..)
  , findWord                -- :: Transcript -> [Text] -> Text -> TWord
  , findWords               -- :: Transcript -> [Text] -> Text -> [TWord]
  , loadTranscript          -- :: FilePath -> Transcript
  , fakeTranscript          -- :: Text -> Transcript
  , splitTranscript         -- :: Transcript -> SVG -> [(SVG, TWord)]
  , annotateWithTranscript  -- :: Transcript -> Scene s ()
  )
where

import           Control.Monad       (forM_)
import           Data.Aeson
import           Data.Char           (isAlphaNum, isSpace)
import           Data.List           (sortOn)
import           Data.Map            (Map)
import qualified Data.Map            as Map
import           Data.Maybe          (listToMaybe)
import           Data.Text           (Text)
import qualified Data.Text           as T
import qualified Data.Text.IO        as T
import           Reanimate.Animation (SVG, staticFrame)
import           Reanimate.Constants (defaultStrokeWidth, screenHeight, screenWidth)
import           Reanimate.LaTeX     (latex, latexChunks)
import           Reanimate.Misc      (withTempFile)
import           Reanimate.Scene     (Scene, play, waitUntil)
import           Reanimate.Svg       (mkGroup, scale, translate, withStrokeColor, withStrokeWidth)
import           System.Directory    (doesFileExist)
import           System.Exit         (ExitCode (ExitFailure, ExitSuccess))
import           System.FilePath     (replaceExtension)
import           System.IO.Unsafe    (unsafePerformIO)
import           System.Process      (rawSystem, showCommandForUser)

-- | Aligned transcript. Contains the transcript text as well as
--   timing data for each word.
data Transcript = Transcript
  { Transcript -> Text
transcriptText  :: Text
  , Transcript -> Map Text Int
transcriptKeys  :: Map Text Int
  , Transcript -> [TWord]
transcriptWords :: [TWord]
  } deriving (Int -> Transcript -> ShowS
[Transcript] -> ShowS
Transcript -> String
(Int -> Transcript -> ShowS)
-> (Transcript -> String)
-> ([Transcript] -> ShowS)
-> Show Transcript
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Transcript] -> ShowS
$cshowList :: [Transcript] -> ShowS
show :: Transcript -> String
$cshow :: Transcript -> String
showsPrec :: Int -> Transcript -> ShowS
$cshowsPrec :: Int -> Transcript -> ShowS
Show)

instance FromJSON Transcript where
  parseJSON :: Value -> Parser Transcript
parseJSON = String
-> (Object -> Parser Transcript) -> Value -> Parser Transcript
forall a. String -> (Object -> Parser a) -> Value -> Parser a
withObject String
"transcript" ((Object -> Parser Transcript) -> Value -> Parser Transcript)
-> (Object -> Parser Transcript) -> Value -> Parser Transcript
forall a b. (a -> b) -> a -> b
$ \Object
o ->
    Text -> Map Text Int -> [TWord] -> Transcript
Transcript (Text -> Map Text Int -> [TWord] -> Transcript)
-> Parser Text -> Parser (Map Text Int -> [TWord] -> Transcript)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
o Object -> Text -> Parser Text
forall a. FromJSON a => Object -> Text -> Parser a
.: Text
"transcript" Parser (Map Text Int -> [TWord] -> Transcript)
-> Parser (Map Text Int) -> Parser ([TWord] -> Transcript)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Map Text Int -> Parser (Map Text Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Map Text Int
forall k a. Map k a
Map.empty Parser ([TWord] -> Transcript)
-> Parser [TWord] -> Parser Transcript
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o Object -> Text -> Parser [TWord]
forall a. FromJSON a => Object -> Text -> Parser a
.: Text
"words"

-- | Spoken word. Includes information about when it was spoken,
--   its duration, and its phonemes.
data TWord = TWord
  { TWord -> Text
wordAligned     :: Text
  , TWord -> Text
wordCase        :: Text
  , TWord -> Double
wordStart       :: Double -- ^ Start of pronunciation in seconds
  , TWord -> Int
wordStartOffset :: Int    -- ^ Character index of word in transcript
  , TWord -> Double
wordEnd         :: Double -- ^ End of pronunciation in seconds
  , TWord -> Int
wordEndOffset   :: Int    -- ^ Last character index of word in transcript
  , TWord -> [Phone]
wordPhones      :: [Phone]
  , TWord -> Text
wordReference   :: Text   -- ^ The word being pronounced.
  } deriving (Int -> TWord -> ShowS
[TWord] -> ShowS
TWord -> String
(Int -> TWord -> ShowS)
-> (TWord -> String) -> ([TWord] -> ShowS) -> Show TWord
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TWord] -> ShowS
$cshowList :: [TWord] -> ShowS
show :: TWord -> String
$cshow :: TWord -> String
showsPrec :: Int -> TWord -> ShowS
$cshowsPrec :: Int -> TWord -> ShowS
Show)

instance FromJSON TWord where
  parseJSON :: Value -> Parser TWord
parseJSON = String -> (Object -> Parser TWord) -> Value -> Parser TWord
forall a. String -> (Object -> Parser a) -> Value -> Parser a
withObject String
"word" ((Object -> Parser TWord) -> Value -> Parser TWord)
-> (Object -> Parser TWord) -> Value -> Parser TWord
forall a b. (a -> b) -> a -> b
$ \Object
o ->
    Text
-> Text
-> Double
-> Int
-> Double
-> Int
-> [Phone]
-> Text
-> TWord
TWord
      (Text
 -> Text
 -> Double
 -> Int
 -> Double
 -> Int
 -> [Phone]
 -> Text
 -> TWord)
-> Parser Text
-> Parser
     (Text
      -> Double -> Int -> Double -> Int -> [Phone] -> Text -> TWord)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
o
      Object -> Text -> Parser (Maybe Text)
forall a. FromJSON a => Object -> Text -> Parser (Maybe a)
.:? Text
"alignedWord"
      Parser (Maybe Text) -> Text -> Parser Text
forall a. Parser (Maybe a) -> a -> Parser a
.!= Text
T.empty
      Parser
  (Text
   -> Double -> Int -> Double -> Int -> [Phone] -> Text -> TWord)
-> Parser Text
-> Parser
     (Double -> Int -> Double -> Int -> [Phone] -> Text -> TWord)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o
      Object -> Text -> Parser Text
forall a. FromJSON a => Object -> Text -> Parser a
.:  Text
"case"
      Parser (Double -> Int -> Double -> Int -> [Phone] -> Text -> TWord)
-> Parser Double
-> Parser (Int -> Double -> Int -> [Phone] -> Text -> TWord)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o
      Object -> Text -> Parser (Maybe Double)
forall a. FromJSON a => Object -> Text -> Parser (Maybe a)
.:? Text
"start"
      Parser (Maybe Double) -> Double -> Parser Double
forall a. Parser (Maybe a) -> a -> Parser a
.!= Double
0
      Parser (Int -> Double -> Int -> [Phone] -> Text -> TWord)
-> Parser Int -> Parser (Double -> Int -> [Phone] -> Text -> TWord)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o
      Object -> Text -> Parser Int
forall a. FromJSON a => Object -> Text -> Parser a
.:  Text
"startOffset"
      Parser (Double -> Int -> [Phone] -> Text -> TWord)
-> Parser Double -> Parser (Int -> [Phone] -> Text -> TWord)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o
      Object -> Text -> Parser (Maybe Double)
forall a. FromJSON a => Object -> Text -> Parser (Maybe a)
.:? Text
"end"
      Parser (Maybe Double) -> Double -> Parser Double
forall a. Parser (Maybe a) -> a -> Parser a
.!= Double
0
      Parser (Int -> [Phone] -> Text -> TWord)
-> Parser Int -> Parser ([Phone] -> Text -> TWord)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o
      Object -> Text -> Parser Int
forall a. FromJSON a => Object -> Text -> Parser a
.:  Text
"endOffset"
      Parser ([Phone] -> Text -> TWord)
-> Parser [Phone] -> Parser (Text -> TWord)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o
      Object -> Text -> Parser (Maybe [Phone])
forall a. FromJSON a => Object -> Text -> Parser (Maybe a)
.:? Text
"phones"
      Parser (Maybe [Phone]) -> [Phone] -> Parser [Phone]
forall a. Parser (Maybe a) -> a -> Parser a
.!= []
      Parser (Text -> TWord) -> Parser Text -> Parser TWord
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o
      Object -> Text -> Parser Text
forall a. FromJSON a => Object -> Text -> Parser a
.:  Text
"word"

-- | Phoneme type
data Phone = Phone
  { Phone -> Double
phoneDuration :: Double
  , Phone -> Text
phoneType     :: Text
  } deriving (Int -> Phone -> ShowS
[Phone] -> ShowS
Phone -> String
(Int -> Phone -> ShowS)
-> (Phone -> String) -> ([Phone] -> ShowS) -> Show Phone
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Phone] -> ShowS
$cshowList :: [Phone] -> ShowS
show :: Phone -> String
$cshow :: Phone -> String
showsPrec :: Int -> Phone -> ShowS
$cshowsPrec :: Int -> Phone -> ShowS
Show)

instance FromJSON Phone where
  parseJSON :: Value -> Parser Phone
parseJSON =
    String -> (Object -> Parser Phone) -> Value -> Parser Phone
forall a. String -> (Object -> Parser a) -> Value -> Parser a
withObject String
"phone" ((Object -> Parser Phone) -> Value -> Parser Phone)
-> (Object -> Parser Phone) -> Value -> Parser Phone
forall a b. (a -> b) -> a -> b
$ \Object
o -> Double -> Text -> Phone
Phone (Double -> Text -> Phone)
-> Parser Double -> Parser (Text -> Phone)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
o Object -> Text -> Parser Double
forall a. FromJSON a => Object -> Text -> Parser a
.: Text
"duration" Parser (Text -> Phone) -> Parser Text -> Parser Phone
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Object
o Object -> Text -> Parser Text
forall a. FromJSON a => Object -> Text -> Parser a
.: Text
"phone"

-- | Locate the first word that occurs after all the given keys.
--   An error is thrown if no such word exists. An error is thrown
--   if the keys do not exist in the transcript.
findWord :: Transcript -> [Text] -> Text -> TWord
findWord :: Transcript -> [Text] -> Text -> TWord
findWord Transcript
t [Text]
keys Text
w = case [TWord] -> Maybe TWord
forall a. [a] -> Maybe a
listToMaybe (Transcript -> [Text] -> Text -> [TWord]
findWords Transcript
t [Text]
keys Text
w) of
  Maybe TWord
Nothing    -> String -> TWord
forall a. HasCallStack => String -> a
error (String -> TWord) -> String -> TWord
forall a b. (a -> b) -> a -> b
$ String
"Word not in transcript: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ([Text], Text) -> String
forall a. Show a => a -> String
show ([Text]
keys, Text
w)
  Just TWord
tword -> TWord
tword

-- | Locate all words that occur after all the given keys.
--   May return an empty list. An error is thrown
--   if the keys do not exist in the transcript.
findWords :: Transcript -> [Text] -> Text -> [TWord]
findWords :: Transcript -> [Text] -> Text -> [TWord]
findWords Transcript
t [] Text
wd =
  [ TWord
tword | TWord
tword <- Transcript -> [TWord]
transcriptWords Transcript
t, TWord -> Text
wordReference TWord
tword Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
wd ]
findWords Transcript
t (Text
key : [Text]
keys) Text
wd =
  [ TWord
tword
  | TWord
tword <- Transcript -> [Text] -> Text -> [TWord]
findWords Transcript
t [Text]
keys Text
wd
  , TWord -> Int
wordStartOffset TWord
tword Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Text -> Map Text Int -> Int
forall k a. Ord k => a -> k -> Map k a -> a
Map.findWithDefault Int
forall a. a
badKey Text
key (Transcript -> Map Text Int
transcriptKeys Transcript
t)
  ]
  where badKey :: a
badKey = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> String -> a
forall a b. (a -> b) -> a -> b
$ String
"Missing transcript key: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
forall a. Show a => a -> String
show Text
key

-- | Loading a transcript does three things depending on which files are available
--   with the same basename as the input argument:
--   1. If a JSON file is available, it is parsed and returned.
--   2. If an audio file is available, reanimate tries to align it by calling out to
--      Gentle on localhost:8765/. If Gentle is not running, an error will be thrown.
--   3. If only the text transcript is available, a fake transcript is returned,
--      with timings roughly at 120 words per minute.
loadTranscript :: FilePath -> Transcript
loadTranscript :: String -> Transcript
loadTranscript String
path = IO Transcript -> Transcript
forall a. IO a -> a
unsafePerformIO (IO Transcript -> Transcript) -> IO Transcript -> Transcript
forall a b. (a -> b) -> a -> b
$ do
  Text
rawTranscript <- String -> IO Text
T.readFile String
path
  let keys :: Map Text Int
keys           = Text -> Map Text Int
parseTranscriptKeys Text
rawTranscript
      trimTranscript :: Text
trimTranscript = Map Text Int -> Text -> Text
cutoutKeys Map Text Int
keys Text
rawTranscript
  Bool
hasJSON    <- String -> IO Bool
doesFileExist String
jsonPath
  Transcript
transcript <- if Bool
hasJSON
    then do
      Maybe Transcript
mbT <- String -> IO (Maybe Transcript)
forall a. FromJSON a => String -> IO (Maybe a)
decodeFileStrict String
jsonPath
      case Maybe Transcript
mbT of
        Maybe Transcript
Nothing -> String -> IO Transcript
forall a. HasCallStack => String -> a
error String
"bad json"
        Just Transcript
t  -> Transcript -> IO Transcript
forall (f :: * -> *) a. Applicative f => a -> f a
pure Transcript
t
    else do
      Maybe String
hasAudio <- String -> [String] -> IO (Maybe String)
findWithExtension String
path [String]
audioExtensions
      case Maybe String
hasAudio of
        Maybe String
Nothing        -> Transcript -> IO Transcript
forall (m :: * -> *) a. Monad m => a -> m a
return (Transcript -> IO Transcript) -> Transcript -> IO Transcript
forall a b. (a -> b) -> a -> b
$ Text -> Transcript
fakeTranscript' Text
trimTranscript
        Just String
audioPath -> String -> (String -> IO Transcript) -> IO Transcript
forall a. String -> (String -> IO a) -> IO a
withTempFile String
"txt" ((String -> IO Transcript) -> IO Transcript)
-> (String -> IO Transcript) -> IO Transcript
forall a b. (a -> b) -> a -> b
$ \String
txtPath -> do
          String -> Text -> IO ()
T.writeFile String
txtPath Text
trimTranscript
          String -> String -> IO ()
runGentleForcedAligner String
audioPath String
txtPath
          Maybe Transcript
mbT <- String -> IO (Maybe Transcript)
forall a. FromJSON a => String -> IO (Maybe a)
decodeFileStrict String
jsonPath
          case Maybe Transcript
mbT of
            Maybe Transcript
Nothing -> String -> IO Transcript
forall a. HasCallStack => String -> a
error String
"bad json"
            Just Transcript
t  -> Transcript -> IO Transcript
forall (f :: * -> *) a. Applicative f => a -> f a
pure Transcript
t
  pure $ Transcript
transcript { transcriptKeys :: Map Text Int
transcriptKeys = Map Text Int -> Map Text Int
finalizeKeys Map Text Int
keys }
 where
  jsonPath :: String
jsonPath        = String -> ShowS
replaceExtension String
path String
"json"
  audioExtensions :: [String]
audioExtensions = [String
"mp3", String
"m4a", String
"flac"]

parseTranscriptKeys :: Text -> Map Text Int
parseTranscriptKeys :: Text -> Map Text Int
parseTranscriptKeys = Map Text Int -> Int -> Text -> Map Text Int
worker Map Text Int
forall k a. Map k a
Map.empty Int
0
 where
  worker :: Map Text Int -> Int -> Text -> Map Text Int
worker Map Text Int
keys Int
offset Text
txt = case Text -> Maybe (Char, Text)
T.uncons Text
txt of
    Maybe (Char, Text)
Nothing -> Map Text Int
keys
    Just (Char
'[', Text
cs) ->
      let key :: Text
key       = (Char -> Bool) -> Text -> Text
T.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
']') Text
cs
          newOffset :: Int
newOffset = Text -> Int
T.length Text
key Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
      in  Map Text Int -> Int -> Text -> Map Text Int
worker (Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Text
key Int
offset Map Text Int
keys)
                 (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
newOffset)
                 (Int -> Text -> Text
T.drop Int
newOffset Text
txt)
    Just (Char
_, Text
cs) -> Map Text Int -> Int -> Text -> Map Text Int
worker Map Text Int
keys (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Text
cs

finalizeKeys :: Map Text Int -> Map Text Int
finalizeKeys :: Map Text Int -> Map Text Int
finalizeKeys = [(Text, Int)] -> Map Text Int
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(Text, Int)] -> Map Text Int)
-> (Map Text Int -> [(Text, Int)]) -> Map Text Int -> Map Text Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [(Text, Int)] -> [(Text, Int)]
worker Int
0 ([(Text, Int)] -> [(Text, Int)])
-> (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> Int) -> [(Text, Int)] -> [(Text, Int)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (Text, Int) -> Int
forall a b. (a, b) -> b
snd ([(Text, Int)] -> [(Text, Int)])
-> (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
Map.toList
 where
  worker :: Int -> [(Text, Int)] -> [(Text, Int)]
worker Int
_offset [] = []
  worker Int
offset ((Text
key, Int
at) : [(Text, Int)]
rest) =
    (Text
key, Int
at Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
offset) (Text, Int) -> [(Text, Int)] -> [(Text, Int)]
forall a. a -> [a] -> [a]
: Int -> [(Text, Int)] -> [(Text, Int)]
worker (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
key Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) [(Text, Int)]
rest

cutoutKeys :: Map Text Int -> Text -> Text
cutoutKeys :: Map Text Int -> Text -> Text
cutoutKeys Map Text Int
keys = [Text] -> Text
T.concat ([Text] -> Text) -> (Text -> [Text]) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [(Text, Int)] -> Text -> [Text]
worker Int
0 (((Text, Int) -> Int) -> [(Text, Int)] -> [(Text, Int)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (Text, Int) -> Int
forall a b. (a, b) -> b
snd (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
Map.toList Map Text Int
keys))
 where
  worker :: Int -> [(Text, Int)] -> Text -> [Text]
worker Int
_offset [] Text
txt = [Text
txt]
  worker Int
offset ((Text
key, Int
at) : [(Text, Int)]
xs) Text
txt =
    let keyLen :: Int
keyLen          = Text -> Int
T.length Text
key Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2
        (Text
before, Text
after) = Int -> Text -> (Text, Text)
T.splitAt (Int
at Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
offset) Text
txt
    in  Text
before Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Int -> [(Text, Int)] -> Text -> [Text]
worker (Int
at Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
keyLen) [(Text, Int)]
xs (Int -> Text -> Text
T.drop Int
keyLen Text
after)

findWithExtension :: FilePath -> [String] -> IO (Maybe FilePath)
findWithExtension :: String -> [String] -> IO (Maybe String)
findWithExtension String
_path []       = Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe String
forall a. Maybe a
Nothing
findWithExtension String
path  (String
e : [String]
es) = do
  let newPath :: String
newPath = String -> ShowS
replaceExtension String
path String
e
  Bool
hasFile <- String -> IO Bool
doesFileExist String
newPath
  if Bool
hasFile then Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Maybe String
forall a. a -> Maybe a
Just String
newPath) else String -> [String] -> IO (Maybe String)
findWithExtension String
path [String]
es

runGentleForcedAligner :: FilePath -> FilePath -> IO ()
runGentleForcedAligner :: String -> String -> IO ()
runGentleForcedAligner String
audioFile String
transcriptFile = do
  ExitCode
ret <- String -> [String] -> IO ExitCode
rawSystem String
prog [String]
args
  case ExitCode
ret of
    ExitCode
ExitSuccess -> () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    ExitFailure Int
e ->
      String -> IO ()
forall a. HasCallStack => String -> a
error
        (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$  String
"Gentle forced aligner failed with: "
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
e
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\nIs it running locally on port 8765?"
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\nCommand: "
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ String -> [String] -> String
showCommandForUser String
prog [String]
args
 where
  prog :: String
prog = String
"curl"
  args :: [String]
args =
    [ String
"--silent"
    , String
"--form"
    , String
"audio=@" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
audioFile
    , String
"--form"
    , String
"transcript=@" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
transcriptFile
    , String
"--output"
    , String -> ShowS
replaceExtension String
audioFile String
"json"
    , String
"http://localhost:8765/transcriptions?async=false"
    ]

data Token = TokenWord Int Int Text | TokenComma | TokenPeriod | TokenParagraph
  deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show)

lexText :: Text -> [Token]
lexText :: Text -> [Token]
lexText = Int -> Text -> [Token]
worker Int
0
 where
  worker :: Int -> Text -> [Token]
worker Int
offset Text
txt = case Text -> Maybe (Char, Text)
T.uncons Text
txt of
    Maybe (Char, Text)
Nothing -> []
    Just (Char
c, Text
cs)
      | Char -> Bool
isSpace Char
c
      -> let (Text
w, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') Text
txt
         in  if Text -> Int
T.length Text
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
3
               then Token
TokenParagraph Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Int -> Text -> [Token]
worker (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
w) Text
rest
               else Int -> Text -> [Token]
worker (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Text
cs
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.'
      -> Token
TokenPeriod Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Int -> Text -> [Token]
worker (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Text
cs
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
','
      -> Token
TokenComma Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Int -> Text -> [Token]
worker (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Text
cs
      | Char -> Bool
isWord Char
c
      -> let (Text
w, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
isWord Text
txt
             newOffset :: Int
newOffset = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
w
         in  Int -> Int -> Text -> Token
TokenWord Int
offset Int
newOffset Text
w Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Int -> Text -> [Token]
worker Int
newOffset Text
rest
      | Bool
otherwise
      -> Int -> Text -> [Token]
worker (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Text
cs
  isWord :: Char -> Bool
isWord Char
c = Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'\'', Char
'-']

-- | Fake transcript timings at roughly 120 words per minute.
fakeTranscript :: Text -> Transcript
fakeTranscript :: Text -> Transcript
fakeTranscript Text
rawTranscript =
  let keys :: Map Text Int
keys = Text -> Map Text Int
parseTranscriptKeys Text
rawTranscript
      t :: Transcript
t    = Text -> Transcript
fakeTranscript' (Map Text Int -> Text -> Text
cutoutKeys Map Text Int
keys Text
rawTranscript)
  in  Transcript
t { transcriptKeys :: Map Text Int
transcriptKeys = Map Text Int -> Map Text Int
finalizeKeys Map Text Int
keys }

fakeTranscript' :: Text -> Transcript
fakeTranscript' :: Text -> Transcript
fakeTranscript' Text
input = Transcript :: Text -> Map Text Int -> [TWord] -> Transcript
Transcript { transcriptText :: Text
transcriptText  = Text
input
                                   , transcriptKeys :: Map Text Int
transcriptKeys  = Map Text Int
forall k a. Map k a
Map.empty
                                   , transcriptWords :: [TWord]
transcriptWords = Double -> [Token] -> [TWord]
worker Double
0 (Text -> [Token]
lexText Text
input)
                                   }
 where
  worker :: Double -> [Token] -> [TWord]
worker Double
_now []             = []
  worker Double
now  (Token
token : [Token]
rest) = case Token
token of
    TokenWord Int
start Int
end Text
w ->
      let dur :: Double
dur = Int -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
0.1
      in  TWord :: Text
-> Text
-> Double
-> Int
-> Double
-> Int
-> [Phone]
-> Text
-> TWord
TWord { wordAligned :: Text
wordAligned     = Text -> Text
T.toLower Text
w
                , wordCase :: Text
wordCase        = Text
"success"
                , wordStart :: Double
wordStart       = Double
now
                , wordStartOffset :: Int
wordStartOffset = Int
start
                , wordEnd :: Double
wordEnd         = Double
now Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
dur
                , wordEndOffset :: Int
wordEndOffset   = Int
end
                , wordPhones :: [Phone]
wordPhones      = []
                , wordReference :: Text
wordReference   = Text
w
                }
            TWord -> [TWord] -> [TWord]
forall a. a -> [a] -> [a]
: Double -> [Token] -> [TWord]
worker (Double
now Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
dur) [Token]
rest
    Token
TokenComma     -> Double -> [Token] -> [TWord]
worker (Double
now Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
commaPause) [Token]
rest
    Token
TokenPeriod    -> Double -> [Token] -> [TWord]
worker (Double
now Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
periodPause) [Token]
rest
    Token
TokenParagraph -> Double -> [Token] -> [TWord]
worker (Double
now Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
paragraphPause) [Token]
rest
  paragraphPause :: Double
paragraphPause = Double
0.5
  commaPause :: Double
commaPause     = Double
0.1
  periodPause :: Double
periodPause    = Double
0.2

-- | Convert the transcript text to an SVG image using LaTeX and associate
--   each word image with its timing information.
splitTranscript :: Transcript -> [(SVG, TWord)]
splitTranscript :: Transcript -> [(SVG, TWord)]
splitTranscript Transcript {[TWord]
Text
Map Text Int
transcriptWords :: [TWord]
transcriptKeys :: Map Text Int
transcriptText :: Text
transcriptWords :: Transcript -> [TWord]
transcriptKeys :: Transcript -> Map Text Int
transcriptText :: Transcript -> Text
..} =
  [ (SVG
svg, TWord
tword)
  | tword :: TWord
tword@TWord {Double
Int
[Phone]
Text
wordReference :: Text
wordPhones :: [Phone]
wordEndOffset :: Int
wordEnd :: Double
wordStartOffset :: Int
wordStart :: Double
wordCase :: Text
wordAligned :: Text
wordReference :: TWord -> Text
wordPhones :: TWord -> [Phone]
wordEndOffset :: TWord -> Int
wordEnd :: TWord -> Double
wordStartOffset :: TWord -> Int
wordStart :: TWord -> Double
wordCase :: TWord -> Text
wordAligned :: TWord -> Text
..} <- [TWord]
transcriptWords
  , let wordLength :: Int
wordLength  = Int
wordEndOffset Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
wordStartOffset
        [SVG
_, SVG
svg, SVG
_] = [Text] -> [SVG]
forall (t :: * -> *). Traversable t => t Text -> t SVG
latexChunks
          [ Int -> Text -> Text
T.take Int
wordStartOffset Text
transcriptText
          , Int -> Text -> Text
T.take Int
wordLength (Int -> Text -> Text
T.drop Int
wordStartOffset Text
transcriptText)
          , Int -> Text -> Text
T.drop Int
wordEndOffset Text
transcriptText
          ]
  ]

-- | Helper function for rendering a transcript.
annotateWithTranscript :: Transcript -> Scene s ()
annotateWithTranscript :: Transcript -> Scene s ()
annotateWithTranscript Transcript
t = [TWord] -> (TWord -> Scene s ()) -> Scene s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (Transcript -> [TWord]
transcriptWords Transcript
t) ((TWord -> Scene s ()) -> Scene s ())
-> (TWord -> Scene s ()) -> Scene s ()
forall a b. (a -> b) -> a -> b
$ \TWord
tword -> do
  let svg :: SVG
svg = Double -> SVG -> SVG
scale Double
1 (SVG -> SVG) -> SVG -> SVG
forall a b. (a -> b) -> a -> b
$ Text -> SVG
latex (TWord -> Text
wordReference TWord
tword)
  Double -> Scene s ()
forall s. Double -> Scene s ()
waitUntil (TWord -> Double
wordStart TWord
tword)
  let dur :: Double
dur = TWord -> Double
wordEnd TWord
tword Double -> Double -> Double
forall a. Num a => a -> a -> a
- TWord -> Double
wordStart TWord
tword
  Animation -> Scene s ()
forall s. Animation -> Scene s ()
play (Animation -> Scene s ()) -> Animation -> Scene s ()
forall a b. (a -> b) -> a -> b
$ Double -> SVG -> Animation
staticFrame Double
dur (SVG -> Animation) -> SVG -> Animation
forall a b. (a -> b) -> a -> b
$ SVG -> SVG
position (SVG -> SVG) -> SVG -> SVG
forall a b. (a -> b) -> a -> b
$ SVG -> SVG
outline SVG
svg
 where
  position :: SVG -> SVG
position = Double -> Double -> SVG -> SVG
translate (-Double
forall a. Fractional a => a
screenWidth Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2) (-Double
forall a. Fractional a => a
screenHeight Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2)
  outline :: SVG -> SVG
outline SVG
txt = [SVG] -> SVG
mkGroup
    [ Double -> SVG -> SVG
withStrokeWidth (Double
defaultStrokeWidth Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
10) (SVG -> SVG) -> SVG -> SVG
forall a b. (a -> b) -> a -> b
$ String -> SVG -> SVG
withStrokeColor String
"white" SVG
txt
    , Double -> SVG -> SVG
withStrokeWidth Double
0 SVG
txt
    ]