Copyright | © 2016-2017 George Steel and Peter Jurgec |
---|---|
License | GPL-2+ |
Maintainer | george.steel@gmail.com |
Safe Haskell | None |
Language | Haskell2010 |
Implementations of deterministic finite state transducers containing both a polymorphic DFST
functor and a typeclass for fast specialized types (with several implementations provided). Input alphabets are assumed to be finite rectangles inside Ix
types. Trandsuction functions are provided for both Monoid
and Semiring
output types. DFSTs may be created directly, generated from globs, or from smaller DFSTs using the product construction.
Optimized C functions for common output types (
, Sum
Int
Multicount
,
, and Expectation
Vec
) are included which use the Expectation
Double
PackedDFA
typeclass to convert to and from the generic type. The specialized types additionally support the following operations.
- Packing strings into a compact format (
PackedText
) - Transducing packed strings into integer counts and Multicounts
- Summing over all paths of expectation transducers
- Applying maxent weights to vector counts to get expectations
- fnArray :: (Ix i, IArray a e) => (i, i) -> (i -> e) -> a i e
- xbd :: (a, a) -> (b, b) -> ((a, b), (a, b))
- data DFST q sigma k = DFST {
- initialState :: q
- transitionMatrix :: Array (q, sigma) (q, k)
- finalWeights :: Array q k
- stateBounds :: (Ix q, Ix sigma) => DFST q sigma w -> (q, q)
- segBounds :: (Ix q, Ix sigma) => DFST q sigma k -> (sigma, sigma)
- transition :: (Ix q, Ix sigma) => DFST q sigma k -> q -> sigma -> (q, k)
- transduceM :: (Ix q, Ix sigma, Monoid k) => DFST q sigma k -> [sigma] -> k
- transduceR :: (Ix q, Ix sigma, Semiring k) => DFST q sigma k -> [sigma] -> k
- class PackedDFA pd k | pd -> k where
- pruneUnreachable :: forall q sigma k. (Ix q, Ix sigma) => DFST q sigma k -> DFST Int sigma k
- pruneAndPack :: forall q sigma pd k. (Ix q, Ix sigma, PackedDFA pd k) => DFST q sigma k -> pd sigma
- rawIntersection :: (Ix q1, Ix q2, Ix sigma) => (k1 -> k2 -> k3) -> DFST q1 sigma k1 -> DFST q2 sigma k2 -> DFST (q1, q2) sigma k3
- dfaProduct :: (Ix l1, Ix l2, Ix sigma) => (w1 -> w2 -> w3) -> DFST l1 sigma w1 -> DFST l2 sigma w2 -> DFST Int sigma w3
- nildfa :: (Ix sigma, Monoid k) => (sigma, sigma) -> DFST Int sigma k
- data PackedText sigma
- packSingleText :: Ix sigma => (sigma, sigma) -> [sigma] -> PackedText sigma
- packMultiText :: Ix sigma => (sigma, sigma) -> [([sigma], Int)] -> PackedText sigma
- data ShortDFST sigma
- transducePackedShort :: Ix sigma => ShortDFST sigma -> PackedText sigma -> Int
- data MulticountDFST sigma
- transducePackedMulti :: Ix sigma => MulticountDFST sigma -> PackedText sigma -> Multicount
- data ExpVecDFST sigma
- weightExpVec :: Ix sigma => MulticountDFST sigma -> Vec -> ExpVecDFST sigma
- expsByLengthVec :: Ix sigma => ExpVecDFST sigma -> Int -> Array Int (Expectation Vec)
- data ExpDoubleDFST sigma
- weightExpPartial :: Ix sigma => MulticountDFST sigma -> Vec -> Vec -> ExpDoubleDFST sigma
- expsByLengthDouble :: Ix sigma => ExpDoubleDFST sigma -> Int -> Array Int (Expectation Double)
- data GlobReps
- type SegSet sigma = UArray sigma Bool
- data ListGlob sigma = ListGlob !Bool !Bool [(GlobReps, SegSet sigma)]
- matchCounter :: forall sigma. Ix sigma => ListGlob sigma -> ShortDFST sigma
Documentation
fnArray :: (Ix i, IArray a e) => (i, i) -> (i -> e) -> a i e Source #
Create an array by caching a function over a rectangle. Depending on the array type used, this can be used to memoise or precompute.
xbd :: (a, a) -> (b, b) -> ((a, b), (a, b)) Source #
Turn a pair of interval tuples into an interval of pairs. Used to compute array bounds for a cartesian product.
Polymorphic DFSTs
Polymorphic type for deterministic finite state transducers.
For an efficient implementation, the set of states and input characters are both limited to be Ix
rectangles and their product is the array bounds.
This type is a functor over its output type (note that transduction is only possible into a Monoid
or Semiring
).
DFST | |
|
segBounds :: (Ix q, Ix sigma) => DFST q sigma k -> (sigma, sigma) Source #
boounds for accepted segments (characters)
transition :: (Ix q, Ix sigma) => DFST q sigma k -> q -> sigma -> (q, k) Source #
advance by one state and get weight output
transduceM :: (Ix q, Ix sigma, Monoid k) => DFST q sigma k -> [sigma] -> k Source #
Transduce a string of segments where and output the product of the weights (as a Monoid).
transduceR :: (Ix q, Ix sigma, Semiring k) => DFST q sigma k -> [sigma] -> k Source #
Transduce a string of segments where and output the product of the weights (as a Ring).
Specialized DFSTs
class PackedDFA pd k | pd -> k where Source #
Typeclass for converting speaiclized DFSTs to and from polymorphic ones. This is used by several optimized versions for various output types that can be manupulated by fast C functions.
numStates :: Ix sigma => pd sigma -> Int Source #
Number fo states in DFA
psegBounds :: Ix sigma => pd sigma -> (sigma, sigma) Source #
Bounds of input rectangle
unpackDFA :: Ix sigma => pd sigma -> DFST Int sigma k Source #
umpack
packDFA :: forall sigma. Ix sigma => Int16 -> Int16 -> (sigma, sigma) -> ((Int16, sigma) -> Int16) -> ((Int16, sigma) -> k) -> (Int16 -> k) -> pd sigma Source #
Pack a DFA from its component functions. Use pruneAndPack
to convert a polymorphic DFST
.
pruneUnreachable :: forall q sigma k. (Ix q, Ix sigma) => DFST q sigma k -> DFST Int sigma k Source #
Remove unreachable states and renumber as integers (starting from 1) using a mark-sweep algorithm. Result is polymorphic.
pruneAndPack :: forall q sigma pd k. (Ix q, Ix sigma, PackedDFA pd k) => DFST q sigma k -> pd sigma Source #
Prune unreachable states and pack into a specialized implementation
rawIntersection :: (Ix q1, Ix q2, Ix sigma) => (k1 -> k2 -> k3) -> DFST q1 sigma k1 -> DFST q2 sigma k2 -> DFST (q1, q2) sigma k3 Source #
Lifts a weight combining function into one that combines DFAs. Performs no pruning and new states are all pairs of old ones. For boolean weights, use (&&) for intersection and (||) for union.
dfaProduct :: (Ix l1, Ix l2, Ix sigma) => (w1 -> w2 -> w3) -> DFST l1 sigma w1 -> DFST l2 sigma w2 -> DFST Int sigma w3 Source #
Product construction with pruning.
nildfa :: (Ix sigma, Monoid k) => (sigma, sigma) -> DFST Int sigma k Source #
Given input bounds, Construct a DFST which always returns mempty for any string.
data PackedText sigma Source #
Structure holding text and word frequencies as a flat array of segment indices (in the input rectangle) for fast transduction.
packSingleText :: Ix sigma => (sigma, sigma) -> [sigma] -> PackedText sigma Source #
Pack a single string
packMultiText :: Ix sigma => (sigma, sigma) -> [([sigma], Int)] -> PackedText sigma Source #
Pack a list of string, fewquency pairs
Optimized DFST specialized to transduce integers
transducePackedShort :: Ix sigma => ShortDFST sigma -> PackedText sigma -> Int Source #
Fast transduction of integers. For multiple words, returns the sum of all transductions.
data MulticountDFST sigma Source #
Optimized DFST specialized to transduce into Multicount
mnd count multiple quantities in parallel.
PackedDFA MulticountDFST Multicount Source # | |
Show sigma => Show (MulticountDFST sigma) Source # | |
NFData sigma => NFData (MulticountDFST sigma) Source # | |
transducePackedMulti :: Ix sigma => MulticountDFST sigma -> PackedText sigma -> Multicount Source #
Fast transduction of Multicount
. For multiple words, returns the sum of all transductions.
data ExpVecDFST sigma Source #
Optimized DFST form calculating verctor expectations over the entire probability distribution defined by the DFA.
PackedDFA ExpVecDFST (Expectation Vec) Source # | |
Show sigma => Show (ExpVecDFST sigma) Source # | |
weightExpVec :: Ix sigma => MulticountDFST sigma -> Vec -> ExpVecDFST sigma Source #
Assign maxent weights to the counts in a Multicount to get expectations (which include probabilities).
expsByLengthVec :: Ix sigma => ExpVecDFST sigma -> Int -> Array Int (Expectation Vec) Source #
Get the total expectations over each length of string up to a maximum
data ExpDoubleDFST sigma Source #
Optimized DFST form calculating scalar expectations over the entire probability distribution defined by the DFA.
PackedDFA ExpDoubleDFST (Expectation Double) Source # | |
Show sigma => Show (ExpDoubleDFST sigma) Source # | |
weightExpPartial :: Ix sigma => MulticountDFST sigma -> Vec -> Vec -> ExpDoubleDFST sigma Source #
Assign maxent weights to the counts in a Multicount to and apply a covector to the resulting expectations.
expsByLengthDouble :: Ix sigma => ExpDoubleDFST sigma -> Int -> Array Int (Expectation Double) Source #
Get the total expectations over each length of string up to a maximum
Glob recognition
Type for Glob quantifiers
type SegSet sigma = UArray sigma Bool Source #
Fast reperesentation of a set of segments by its characteristic function over an enclosing rectangle of segments.
Glob of segment lists, nore generalized version of ngrams allowing for repeated classes as well as single ones. The two boolean parameters restrict the glob to match a prefixes or suffixes only.
matchCounter :: forall sigma. Ix sigma => ListGlob sigma -> ShortDFST sigma Source #
Create a DFST countign the violations of a ListGlob. Each SegSet
in the glob must have the same bounds and the glob must not be empty.