module Bio.TaxonomyData where
import Prelude
import qualified Data.ByteString as B
import qualified Data.Aeson as A
import qualified Data.Vector as V
import Data.Graph.Inductive
import qualified Data.Text as T
import qualified Data.Text.Encoding
data SimpleTaxon = SimpleTaxon
{
simpleTaxId :: Int,
simpleScientificName :: B.ByteString,
simpleParentTaxId :: Int,
simpleRank :: Rank
}
deriving (Show, Read, Eq)
data CompareTaxon = CompareTaxon
{
compareScientificName :: B.ByteString,
compareRank :: Rank,
inTree :: [Int]
}
deriving (Show, Read, Eq)
data Taxon = Taxon
{ taxonTaxId :: Int
, taxonScientificName :: String
, taxonParentTaxId :: Int
, taxonRank :: Rank
, division :: String
, geneticCode :: TaxGenCode
, mitoGeneticCode :: TaxGenCode
, lineage :: String
, lineageEx :: [LineageTaxon]
, createDate :: String
, updateDate :: String
, pubDate :: String
} deriving (Show, Eq)
data TaxonName = TaxonName
{ classCDE :: String
, dispName :: String
} deriving (Show, Eq)
data LineageTaxon = LineageTaxon
{ lineageTaxId :: Int
, lineageScienticName :: String
, lineageRank :: Rank}
deriving (Show, Eq)
data NCBITaxDump = NCBITaxDump
{
taxCitations :: [TaxCitation],
taxDelNodes :: [TaxDelNode],
taxDivisions :: [TaxDivision],
taxGenCodes :: [TaxGenCode],
taxMergedNodes :: [TaxMergedNode],
taxNames :: [TaxName],
taxNodes :: [TaxNode]
}
deriving (Show, Read, Eq)
data TaxCitation = TaxCitation
{
citId :: Int,
citKey :: Maybe String,
pubmedId :: Maybe Int,
medlineId :: Maybe Int,
url :: Maybe String,
text :: Maybe String,
taxIdList :: Maybe [Int]
}
deriving (Show, Read, Eq)
data TaxDelNode = TaxDelNode
{
delTaxId :: Int
}
deriving (Show, Read, Eq)
data TaxDivision = TaxDivision
{
divisionId :: Int,
divisionCDE :: String,
divisonName :: String,
divisionComments :: Maybe String
}
deriving (Show, Read, Eq)
data TaxGenCode = TaxGenCode
{
geneticCodeId :: Int,
abbreviation :: Maybe String,
geneCodeName :: String,
cde :: String,
starts :: String
}
deriving (Show, Read, Eq)
data TaxMergedNode = TaxMergedNode
{
oldTaxId :: Int,
newTaxId :: Int
}
deriving (Show, Read, Eq)
data TaxName = TaxName
{
nameTaxId :: Int,
nameTxt :: B.ByteString,
uniqueName :: B.ByteString,
nameClass :: B.ByteString
}
deriving (Show, Read, Eq)
data Rank = Norank | Form | Variety | Infraspecies | Subspecies | Speciessubgroup | Species | Speciesgroup | Superspecies | Series | Section | Subgenus | Genus | Subtribe | Tribe | Supertribe | Subfamily | Family | Superfamily | Parvorder | Infraorder | Suborder | Order | Superorder | Magnorder | Cohort | Legion | Parvclass | Infraclass | Subclass | Class | Superclass | Microphylum | Infraphylum | Subphylum | Phylum | Superphylum | Infrakingdom | Subkingdom | Kingdom | Superkingdom | Domain deriving (Eq, Ord, Show, Bounded, Enum)
readsRank :: String -> [(Rank, String)]
instance Read Rank where
readsPrec _ = readsRank
readsRank input
| input == "domain" = [(Domain,"")]
| input == "superkingdom" = [(Superkingdom,"")]
| input == "kingdom" = [(Kingdom,"")]
| input == "subkingdom" = [(Subkingdom,"")]
| input == "infrakingdom" = [(Infrakingdom,"")]
| input == "superphylum" = [(Superphylum,"")]
| input == "phylum" = [(Phylum,"")]
| input == "subphylum" = [(Subphylum,"")]
| input == "infraphylum" = [(Infraphylum,"")]
| input == "microphylum" = [(Microphylum,"")]
| input == "superclass" = [(Superclass,"")]
| input == "class" = [(Class,"")]
| input == "subclass" = [(Subclass,"")]
| input == "infraclass" = [(Infraclass,"")]
| input == "parvclass " = [(Parvclass ,"")]
| input == "legion" = [(Legion,"")]
| input == "cohort" = [(Cohort,"")]
| input == "magnorder " = [(Magnorder ,"")]
| input == "superorder" = [(Superorder,"")]
| input == "order" = [(Order,"")]
| input == "suborder" = [(Suborder,"")]
| input == "infraorder" = [(Infraorder,"")]
| input == "parvorder" = [(Parvorder,"")]
| input == "superfamily" = [(Superfamily,"")]
| input == "family" = [(Family,"")]
| input == "subfamily" = [(Subfamily,"")]
| input == "supertribe" = [(Supertribe,"")]
| input == "tribe" = [(Tribe,"")]
| input == "subtribe" = [(Subtribe,"")]
| input == "genus" = [(Genus,"")]
| input == "subgenus" = [(Subgenus,"")]
| input == "section" = [(Section,"")]
| input == "series" = [(Series,"")]
| input == "superspecies" = [(Superspecies,"")]
| input == "species group" = [(Speciesgroup,"")]
| input == "species" = [(Species,"")]
| input == "species subgroup" = [(Speciessubgroup,"")]
| input == "subspecies" = [(Subspecies,"")]
| input == "infraspecies" = [(Infraspecies,"")]
| input == "varietas" = [(Variety,"")]
| input == "forma" = [(Form,"")]
| input == "no rank" = [(Norank,"")]
| otherwise = [(Norank,"")]
data TaxNode = TaxNode
{
taxId :: Int,
parentTaxId :: Int,
rank :: Rank,
emblCode :: Maybe String,
nodeDivisionId :: String,
inheritedDivFlag :: Bool,
nodeGeneticCodeId :: String,
inheritedGCFlag :: Bool,
mitochondrialGeneticCodeId :: String,
inheritedMGCFlag :: Bool,
genBankHiddenFlag :: Bool,
hiddenSubtreeRootFlag :: Bool,
nodeComments :: Maybe String
}
deriving (Show, Read, Eq)
data SimpleGene2Accession = SimpleGene2Accession
{ simpleTaxIdEntry :: Int,
simpleGenomicNucleotideAccessionVersion :: String
} deriving (Show, Eq, Read)
data Gene2Accession = Gene2Accession
{ taxIdEntry :: Int,
geneID :: Int,
status :: String,
rnaNucleotideAccessionVersion :: String,
rnaNucleotideGi :: String,
proteinAccessionVersion :: String,
proteinGi :: String,
genomicNucleotideAccessionVersion :: String,
genomicNucleotideGi :: String,
startPositionOnTheGenomicAccession :: String,
endPositionOnTheGenomicAccession :: String,
orientation :: String,
assembly :: String,
maturePeptideAccessionVersion :: String,
maturePeptideGi :: String
} deriving (Show, Eq, Read)
instance A.ToJSON (Gr SimpleTaxon Double) where
toJSON inputGraph = simpleTaxonJSONValue inputGraph 1
simpleTaxonJSONValue :: Gr SimpleTaxon Double -> Node -> A.Value
simpleTaxonJSONValue inputGraph node = jsonValue
where jsonValue = A.object [currentScientificName,T.pack "children" A..= children]
childNodes = suc inputGraph node
currentLabel = lab inputGraph node
currentScientificName = T.pack "name" A..= maybe (T.pack "notFound") (Data.Text.Encoding.decodeUtf8 . simpleScientificName) currentLabel
children = A.Array (V.fromList (map (simpleTaxonJSONValue inputGraph) childNodes))