{-# LANGUAGE DeriveDataTypeable          #-}
{-# LANGUAGE DeriveGeneric               #-}
{-# LANGUAGE FlexibleContexts            #-}
{-# LANGUAGE FlexibleInstances           #-}
{-# LANGUAGE MultiParamTypeClasses       #-}
{-# LANGUAGE OverloadedLists             #-}
{-# LANGUAGE OverloadedStrings           #-}
{-# LANGUAGE MultiWayIf                  #-}
{-# LANGUAGE PackageImports              #-}
{-# LANGUAGE RecordWildCards             #-}
{-# LANGUAGE TemplateHaskell             #-}
{-# LANGUAGE TypeFamilies                #-}
{-# Language QuasiQuotes                 #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

-- |
-- Module      :  Data.SAM.Version1_6.Read.Parser.Alignment.Base
-- Copyright   :  (c) Matthew Mosior 2023
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = WARNING
--
-- This module is considered __internal__.
--
-- The Package Versioning Policy __does not apply__.
--
-- The contents of this module may change __in any way whatsoever__
-- and __without any warning__ between minor versions of this package.
--
-- Authors importing this library are expected to track development
-- closely.
--
-- All credit goes to the author(s)/maintainer(s) of the
-- [containers](https://hackage.haskell.org/package/containers) library
-- for the above warning text.
--
-- = Description
--
-- This library enables the decoding/encoding of SAM, BAM and CRAM file formats.

module Data.SAM.Version1_6.Read.Parser.Alignment.Base ( -- * SAM_V1_6 parser - alignment section
                                                        parse_SAM_V1_6_Alignment
                                                      ) where

import Data.SAM.Version1_6.Alignment
import Data.SAM.Version1_6.Read.Error
import Data.SAM.Version1_6.Read.Parser.Alignment.AOPT
import Data.SAM.Version1_6.Read.Parser.Alignment.IOPT
import Data.SAM.Version1_6.Read.Parser.Alignment.FOPT
import Data.SAM.Version1_6.Read.Parser.Alignment.ZOPT
import Data.SAM.Version1_6.Read.Parser.Alignment.HOPT
import Data.SAM.Version1_6.Read.Parser.Alignment.BOPT

import           Data.Attoparsec.ByteString.Lazy   as DABL
import qualified Data.ByteString.Char8             as DBC8
import           Text.Regex.PCRE.Heavy

-- | Make a parser optional, return Nothing if there is no match.
maybeOption :: Parser a
            -> Parser (Maybe a)
maybeOption :: forall a. Parser a -> Parser (Maybe a)
maybeOption Parser a
p = Maybe a
-> Parser ByteString (Maybe a) -> Parser ByteString (Maybe a)
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Maybe a
forall a. Maybe a
Nothing (a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> Parser a -> Parser ByteString (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p)

-- | @"SAM_V1_6_Alignment"@ parser
-- Defines a parser for the alignment section of the SAM v1.6 file format.
-- See the [SAM v1.6](http://samtools.github.io/hts-specs/SAMv1.pdf) specification documentation.
parse_SAM_V1_6_Alignment :: Parser SAM_V1_6_Alignment
parse_SAM_V1_6_Alignment :: Parser SAM_V1_6_Alignment
parse_SAM_V1_6_Alignment = do
  ByteString
qname <- do ByteString
qnamep <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
              -- Parse QNAME field of alignment section.
              case (ByteString
qnamep ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|[!-?A-~]{1,254}|\*|]) of
                Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_QNAME_Incorrect_Format
                Bool
True  -> -- QNAME is in the accepted format.
                         ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
qnamep
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
flag <- do ByteString
flagp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
             -- Parse FLAG field of alignment section.
             case (ByteString
flagp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|[0-9]*|]) of
               Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_FLAG_Incorrect_Format
               Bool
True  -> -- FLAG is in the accepted format.
                        ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
flagp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
rname <- do ByteString
rnamep <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
              -- Parse RNAME field of alignment section.
              case (ByteString
rnamep ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|\*|[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*|]) of
                Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_RNAME_Incorrect_Format 
                Bool
True  -> -- RNAME is in the accepted format.
                         ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
rnamep
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
pos <- do ByteString
posp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
            -- Parse POS field of the alignment section.
            case (ByteString
posp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|[0-9]*|]) of
              Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_POS_Incorrect_Format
              Bool
True  -> -- POS is in the accepted format.
                       ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
posp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
mapq <- do ByteString
mapqp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
             -- Parse MAPQ field of the alignment section.
             case (ByteString
mapqp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|[0-9]*|]) of
               Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_MAPQ_Incorrect_Format
               Bool
True  -> -- MAPQ is in the accepted format.
                        ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
mapqp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
cigar <- do ByteString
cigarp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
              -- Parse CIGAR field of alignment section.
              case (ByteString
cigarp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|\*|([0-9]+[MIDNSHPX=])+|]) of
                Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_CIGAR_Incorrect_Format
                Bool
True  -> -- CIGAR is in the accepted format.
                         ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
cigarp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
rnext <- do ByteString
rnextp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
              -- Parse RNEXT field of the alignment section.
              case (ByteString
rnextp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|\*|=|[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*|]) of
                Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_RNEXT_Incorrect_Format
                Bool
True  -> -- RNEXT is in the accepted format.
                         ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
rnextp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
pnext <- do ByteString
pnextp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
              -- Parse PNEXT field of the alignment section.
              case (ByteString
pnextp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|[0-9]*|]) of
                Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_PNEXT_Incorrect_Format
                Bool
True  -> -- PNEXT is in the accepted format.
                         ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
pnextp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
tlen <- do ByteString
tlenp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
             -- Parse TLEN field of the alignment section.
             case (ByteString
tlenp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|[-]?[0-9]*|]) of
               Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_TLEN_Incorrect_Format 
               Bool
True  -> -- TLEN is in the accepted format.
                        ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
tlenp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
seq <- do ByteString
seqp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
            -- Parse SEQ field of the alignment section.
            case (ByteString
seqp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|\*|[A-Za-z=.]+|]) of
              Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_SEQ_Incorrect_Format 
              Bool
True  -> -- SEQ is in the accepted format.
                       ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
seqp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  ByteString
qual <- do ByteString
qualp <- (Word8 -> Bool) -> Parser ByteString ByteString
DABL.takeTill (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
09)
             -- Parse QUAL field of the alignment section.
             case (ByteString
qualp ByteString -> Regex -> Bool
forall a. ConvertibleStrings a ByteString => a -> Regex -> Bool
=~ [re|[!-~]+|]) of
               Bool
False -> String -> Parser ByteString ByteString
forall a. String -> Parser ByteString a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString ByteString)
-> String -> Parser ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SAM_V1_6_Error -> String
forall a. Show a => a -> String
show SAM_V1_6_Error
SAM_V1_6_Error_Alignment_QUAL_Incorrect_Format
               Bool
True  -> -- QUAL is in the accepted format.
                        ByteString -> Parser ByteString ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
qualp
  Word8
_ <- Word8 -> Parser Word8
word8 Word8
09
  -- This parser assumes that the AOPT tag always appears first,
  -- followed by IOPT, FOPT, ZOPT, HOPT, and BOPT, if they exist,
  -- in that order.
  Maybe ByteString
aopt <- Parser ByteString ByteString -> Parser (Maybe ByteString)
forall a. Parser a -> Parser (Maybe a)
maybeOption Parser ByteString ByteString
parse_SAM_V1_6_Alignment_AOPT
  Word8
_    <- Word8 -> Parser Word8
word8 Word8
09
  Maybe Integer
iopt <- Parser Integer -> Parser (Maybe Integer)
forall a. Parser a -> Parser (Maybe a)
maybeOption Parser Integer
parse_SAM_V1_6_Alignment_IOPT
  Word8
_    <- Word8 -> Parser Word8
word8 Word8
09
  Maybe Float
fopt <- Parser Float -> Parser (Maybe Float)
forall a. Parser a -> Parser (Maybe a)
maybeOption Parser Float
parse_SAM_V1_6_Alignment_FOPT
  Word8
_    <- Word8 -> Parser Word8
word8 Word8
09
  Maybe ByteString
zopt <- Parser ByteString ByteString -> Parser (Maybe ByteString)
forall a. Parser a -> Parser (Maybe a)
maybeOption Parser ByteString ByteString
parse_SAM_V1_6_Alignment_ZOPT
  Word8
_    <- Word8 -> Parser Word8
word8 Word8
09
  Maybe (Seq Word8)
hopt <- Parser (Seq Word8) -> Parser (Maybe (Seq Word8))
forall a. Parser a -> Parser (Maybe a)
maybeOption Parser (Seq Word8)
parse_SAM_V1_6_Alignment_HOPT
  Word8
_    <- Word8 -> Parser Word8
word8 Word8
09
  Maybe SAM_V1_6_Alignment_BOPT
bopt <- Parser SAM_V1_6_Alignment_BOPT
-> Parser (Maybe SAM_V1_6_Alignment_BOPT)
forall a. Parser a -> Parser (Maybe a)
maybeOption Parser SAM_V1_6_Alignment_BOPT
parse_SAM_V1_6_Alignment_BOPT
  -- Return the parsed SAM_V1_6.
  SAM_V1_6_Alignment -> Parser SAM_V1_6_Alignment
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return SAM_V1_6_Alignment { sam_v1_6_alignment_qname :: ByteString
sam_v1_6_alignment_qname  = ByteString
qname
                            , sam_v1_6_alignment_flag :: Int
sam_v1_6_alignment_flag   = case (ByteString -> Maybe (Int, ByteString)
DBC8.readInt ByteString
flag) of
                                                            Maybe (Int, ByteString)
Nothing          -> (-Int
1)
                                                            Just (Int
flagint,ByteString
_) -> Int
flagint
                            , sam_v1_6_alignment_rname :: ByteString
sam_v1_6_alignment_rname = ByteString
rname
                            , sam_v1_6_alignment_pos :: Integer
sam_v1_6_alignment_pos   = case (ByteString -> Maybe (Integer, ByteString)
DBC8.readInteger ByteString
pos) of
                                                           Maybe (Integer, ByteString)
Nothing             -> Integer
0
                                                           Just (Integer
posinteger,ByteString
_) -> Integer
posinteger
                            , sam_v1_6_alignment_mapq :: Int
sam_v1_6_alignment_mapq  = case (ByteString -> Maybe (Int, ByteString)
DBC8.readInt ByteString
mapq) of
                                                           Maybe (Int, ByteString)
Nothing          -> Int
255
                                                           Just (Int
mapqint,ByteString
_) -> Int
mapqint
                            , sam_v1_6_alignment_cigar :: ByteString
sam_v1_6_alignment_cigar = ByteString
cigar
                            , sam_v1_6_alignment_rnext :: ByteString
sam_v1_6_alignment_rnext = ByteString
rnext
                            , sam_v1_6_alignment_pnext :: Integer
sam_v1_6_alignment_pnext = case (ByteString -> Maybe (Integer, ByteString)
DBC8.readInteger ByteString
pnext) of
                                                           Maybe (Integer, ByteString)
Nothing               -> Integer
0
                                                           Just (Integer
pnextinteger,ByteString
_) -> Integer
pnextinteger
                            , sam_v1_6_alignment_tlen :: Integer
sam_v1_6_alignment_tlen  = case (ByteString -> Maybe (Integer, ByteString)
DBC8.readInteger ByteString
tlen) of
                                                           Maybe (Integer, ByteString)
Nothing              -> Integer
0
                                                           Just (Integer
tleninteger,ByteString
_) -> Integer
tleninteger
                            , sam_v1_6_alignment_seq :: ByteString
sam_v1_6_alignment_seq   = ByteString
seq
                            , sam_v1_6_alignment_qual :: ByteString
sam_v1_6_alignment_qual  = ByteString
qual
                            , sam_v1_6_alignment_aopt :: Maybe ByteString
sam_v1_6_alignment_aopt  = Maybe ByteString
aopt
                            , sam_v1_6_alignment_iopt :: Maybe Integer
sam_v1_6_alignment_iopt  = Maybe Integer
iopt
                            , sam_v1_6_alignment_fopt :: Maybe Float
sam_v1_6_alignment_fopt  = Maybe Float
fopt
                            , sam_v1_6_alignment_zopt :: Maybe ByteString
sam_v1_6_alignment_zopt  = Maybe ByteString
zopt
                            , sam_v1_6_alignment_hopt :: Maybe (Seq Word8)
sam_v1_6_alignment_hopt  = Maybe (Seq Word8)
hopt
                            , sam_v1_6_alignment_bopt :: Maybe SAM_V1_6_Alignment_BOPT
sam_v1_6_alignment_bopt  = Maybe SAM_V1_6_Alignment_BOPT
bopt
                            }