improve csv decoding
This commit is contained in:
parent
b8da6c0fab
commit
45de414367
@ -1,5 +1,6 @@
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
module Colonnade.Decoding where
|
||||
|
||||
import Colonnade.Internal (EitherWrap(..))
|
||||
@ -24,24 +25,47 @@ headless f = DecodingAp Headless f (DecodingPure id)
|
||||
headed :: content -> (content -> Either String a) -> Decoding Headed content a
|
||||
headed h f = DecodingAp (Headed h) f (DecodingPure id)
|
||||
|
||||
-- | This function uses 'unsafeIndex' to access
|
||||
-- elements of the 'Vector'.
|
||||
uncheckedRunWithRow ::
|
||||
Int
|
||||
-> Decoding (Indexed f) content a
|
||||
-> Vector content
|
||||
-> Either (DecodingRowError f content) a
|
||||
uncheckedRunWithRow i d v = mapLeft (DecodingRowError i . RowErrorDecode) (uncheckedRun d v)
|
||||
|
||||
-- | This function does not check to make sure that the indicies in
|
||||
-- the 'Decoding' are in the 'Vector'.
|
||||
uncheckedRun :: forall content a f.
|
||||
Vector content
|
||||
-> Decoding (Indexed f) content a
|
||||
-> Either (DecodingErrors f content) a
|
||||
uncheckedRun v = getEitherWrap . go
|
||||
Decoding (Indexed f) content a
|
||||
-> Vector content
|
||||
-> Either (DecodingCellErrors f content) a
|
||||
uncheckedRun dc v = getEitherWrap (go dc)
|
||||
where
|
||||
go :: forall b.
|
||||
Decoding (Indexed f) content b
|
||||
-> EitherWrap (DecodingErrors f content) b
|
||||
-> EitherWrap (DecodingCellErrors f content) b
|
||||
go (DecodingPure b) = EitherWrap (Right b)
|
||||
go (DecodingAp ixed@(Indexed ix h) decode apNext) =
|
||||
let rnext = go apNext
|
||||
content = Vector.unsafeIndex v ix
|
||||
rcurrent = mapLeft (DecodingErrors . Vector.singleton . DecodingError content ixed) (decode content)
|
||||
rcurrent = mapLeft (DecodingCellErrors . Vector.singleton . DecodingCellError content ixed) (decode content)
|
||||
in rnext <*> (EitherWrap rcurrent)
|
||||
|
||||
headlessToIndexed :: forall c a.
|
||||
Decoding Headless c a -> Decoding (Indexed Headless) c a
|
||||
headlessToIndexed = go 0 where
|
||||
go :: forall b. Int -> Decoding Headless c b -> Decoding (Indexed Headless) c b
|
||||
go !ix (DecodingPure a) = DecodingPure a
|
||||
go !ix (DecodingAp Headless decode apNext) =
|
||||
DecodingAp (Indexed ix Headless) decode (go (ix + 1) apNext)
|
||||
|
||||
length :: forall f c a. Decoding f c a -> Int
|
||||
length = go 0 where
|
||||
go :: forall b. Int -> Decoding f c b -> Int
|
||||
go !a (DecodingPure _) = a
|
||||
go !a (DecodingAp _ _ apNext) = go (a + 1) apNext
|
||||
|
||||
-- | Maps over a 'Decoding' that expects headers, converting these
|
||||
-- expected headers into the indices of the columns that they
|
||||
-- correspond to.
|
||||
|
||||
@ -9,8 +9,10 @@ module Colonnade.Types
|
||||
, Headless(..)
|
||||
, Indexed(..)
|
||||
, HeadingErrors(..)
|
||||
, DecodingError(..)
|
||||
, DecodingErrors(..)
|
||||
, DecodingCellError(..)
|
||||
, DecodingRowError(..)
|
||||
, DecodingCellErrors(..)
|
||||
, RowError(..)
|
||||
) where
|
||||
|
||||
import Data.Vector (Vector)
|
||||
@ -29,8 +31,8 @@ data Headless a = Headless
|
||||
deriving (Eq,Ord,Functor,Show,Read)
|
||||
|
||||
data Indexed f a = Indexed
|
||||
{ indexedIndex :: Int
|
||||
, indexedHeading :: f a
|
||||
{ indexedIndex :: !Int
|
||||
, indexedHeading :: !(f a)
|
||||
} deriving (Eq,Ord,Functor,Show,Read)
|
||||
|
||||
data HeadingErrors content = HeadingErrors
|
||||
@ -45,18 +47,31 @@ instance Monoid (HeadingErrors content) where
|
||||
mappend (HeadingErrors a1 b1) (HeadingErrors a2 b2) = HeadingErrors
|
||||
(a1 Vector.++ a2) (b1 Vector.++ b2)
|
||||
|
||||
data DecodingError f content = DecodingError
|
||||
{ decodingErrorContent :: content
|
||||
, decodingErrorHeader :: Indexed f content
|
||||
, decodingErrorMessage :: String
|
||||
data DecodingCellError f content = DecodingCellError
|
||||
{ decodingCellErrorContent :: !content
|
||||
, decodingCellErrorHeader :: !(Indexed f content)
|
||||
, decodingCellErrorMessage :: !String
|
||||
} deriving (Show,Read)
|
||||
|
||||
-- instance (Show (f content), Typeable content) => Exception (DecodingError f content)
|
||||
|
||||
newtype DecodingErrors f content = DecodingErrors
|
||||
{ getDecodingErrors :: Vector (DecodingError f content)
|
||||
newtype DecodingCellErrors f content = DecodingCellErrors
|
||||
{ getDecodingCellErrors :: Vector (DecodingCellError f content)
|
||||
} deriving (Monoid,Show,Read)
|
||||
|
||||
-- newtype ParseRowError = ParseRowError String
|
||||
|
||||
data DecodingRowError f content = DecodingRowError
|
||||
{ decodingRowErrorRow :: !Int
|
||||
, decodingRowErrorError :: !(RowError f content)
|
||||
}
|
||||
|
||||
data RowError f content
|
||||
= RowErrorParse !String -- ^ Error occurred parsing the document into cells
|
||||
| RowErrorDecode !(DecodingCellErrors f content) -- ^ Error decoding the content
|
||||
| RowErrorSize !Int !Int -- ^ Wrong number of cells in the row
|
||||
| RowErrorHeading !(HeadingErrors content)
|
||||
|
||||
-- instance (Show (f content), Typeable content) => Exception (DecodingErrors f content)
|
||||
|
||||
instance Contravariant Headless where
|
||||
|
||||
@ -1,13 +1,17 @@
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
|
||||
module Siphon.Decoding where
|
||||
|
||||
import Siphon.Types
|
||||
import Colonnade.Types
|
||||
import Siphon.Internal (row,comma)
|
||||
import Data.Text (Text)
|
||||
import Data.ByteString (ByteString)
|
||||
import Pipes (yield,Pipe,Consumer',Producer,await)
|
||||
import Data.Vector (Vector)
|
||||
import qualified Data.Vector as Vector
|
||||
import qualified Colonnade.Decoding as Decoding
|
||||
import qualified Data.Attoparsec.ByteString as AttoByteString
|
||||
import qualified Data.ByteString.Char8 as ByteString
|
||||
import qualified Data.Attoparsec.Types as Atto
|
||||
@ -24,24 +28,96 @@ byteStringChar8 = SiphonDecoding
|
||||
-- -> Vector c
|
||||
-- -> Either DecodingErrors a
|
||||
|
||||
pipe :: Monad m
|
||||
=> SiphonDecoding c1 c2
|
||||
-> Atto.Parser c1 (WithEnd c2)
|
||||
-> Pipe c1 (Vector c2) m String
|
||||
pipe (SiphonDecoding parse isNull) p = go1 where
|
||||
go1 = do
|
||||
-- decodeVectorPipe ::
|
||||
-- Monad m
|
||||
-- => Decoding (Indexed f) c a
|
||||
-- -> Pipe (Vector c) a m ()
|
||||
-- decodeVectorPipe
|
||||
|
||||
mkParseError :: Int -> [String] -> String -> DecodingRowError f content
|
||||
mkParseError i ctxs msg = id
|
||||
$ DecodingRowError i
|
||||
$ RowErrorParse $ concat
|
||||
[ "Contexts: ["
|
||||
, concat ctxs
|
||||
, "], Error Message: ["
|
||||
, msg
|
||||
, "]"
|
||||
]
|
||||
|
||||
headlessPipe :: Monad m
|
||||
=> SiphonDecoding c1 c2
|
||||
-> Decoding Headless c2 a
|
||||
-> Pipe c1 a m (DecodingRowError Headless c2)
|
||||
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
|
||||
where
|
||||
indexedDecoding = Decoding.headlessToIndexed decoding
|
||||
requiredLength = Decoding.length indexedDecoding
|
||||
|
||||
headedPipe :: (Monad m, Eq c2)
|
||||
=> SiphonDecoding c1 c2
|
||||
-> Decoding Headed c2 a
|
||||
-> Pipe c1 a m (DecodingRowError Headed c2)
|
||||
headedPipe sd decoding = do
|
||||
(headers, mleftovers) <- consumeGeneral sd mkParseError
|
||||
case Decoding.headedToIndexed headers decoding of
|
||||
Left headingErrs -> return (DecodingRowError 0 (RowErrorHeading headingErrs))
|
||||
Right indexedDecoding ->
|
||||
let requiredLength = Decoding.length indexedDecoding
|
||||
in uncheckedPipe requiredLength 1 sd indexedDecoding mleftovers
|
||||
|
||||
|
||||
uncheckedPipe :: Monad m
|
||||
=> Int -- ^ expected length of each row
|
||||
-> Int -- ^ index of first row, usually zero or one
|
||||
-> SiphonDecoding c1 c2
|
||||
-> Decoding (Indexed f) c2 a
|
||||
-> Maybe c1
|
||||
-> Pipe c1 a m (DecodingRowError f c2)
|
||||
uncheckedPipe requiredLength ix sd d mleftovers =
|
||||
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
|
||||
where
|
||||
checkedRunWithRow rowIx v =
|
||||
let vlen = Vector.length v in
|
||||
if vlen /= requiredLength
|
||||
then Left $ DecodingRowError rowIx
|
||||
$ RowErrorSize requiredLength vlen
|
||||
else Decoding.uncheckedRunWithRow rowIx d v
|
||||
|
||||
consumeGeneral :: Monad m
|
||||
=> SiphonDecoding c1 c2
|
||||
-> (Int -> [String] -> String -> e)
|
||||
-> Consumer' c1 m (Vector c2, Maybe c1)
|
||||
consumeGeneral = error "ahh"
|
||||
|
||||
pipeGeneral :: Monad m
|
||||
=> Int -- ^ index of first row, usually zero or one
|
||||
-> SiphonDecoding c1 c2
|
||||
-> (Int -> [String] -> String -> e)
|
||||
-> (Int -> Vector c2 -> Either e a)
|
||||
-> Maybe c1 -- ^ leftovers that should be handled first
|
||||
-> Pipe c1 a m e
|
||||
pipeGeneral initIx (SiphonDecoding parse isNull) wrapParseError decodeRow mleftovers =
|
||||
case mleftovers of
|
||||
Nothing -> go1 initIx
|
||||
Just leftovers -> handleResult initIx (parse leftovers)
|
||||
where
|
||||
go1 !ix = do
|
||||
c1 <- awaitSkip isNull
|
||||
handleResult (parse c1)
|
||||
go2 c1 = handleResult (parse c1)
|
||||
go3 k = do
|
||||
handleResult ix (parse c1)
|
||||
go2 !ix c1 = handleResult ix (parse c1)
|
||||
go3 !ix k = do
|
||||
c1 <- awaitSkip isNull
|
||||
handleResult (k c1)
|
||||
handleResult r = case r of
|
||||
Atto.Fail _ _ _ -> error "ahh"
|
||||
handleResult ix (k c1)
|
||||
handleResult !ix r = case r of
|
||||
Atto.Fail _ ctxs msg -> return $ wrapParseError ix ctxs msg
|
||||
Atto.Done c1 v -> do
|
||||
yield v
|
||||
if isNull c1 then go1 else go2 c1
|
||||
Atto.Partial k -> go3 k
|
||||
case decodeRow ix v of
|
||||
Left err -> return err
|
||||
Right r -> do
|
||||
yield r
|
||||
if isNull c1 then go1 ix else go2 ix c1
|
||||
Atto.Partial k -> go3 ix k
|
||||
|
||||
awaitSkip :: Monad m
|
||||
=> (a -> Bool)
|
||||
|
||||
@ -18,10 +18,10 @@ data SiphonDecoding c1 c2 = SiphonDecoding
|
||||
, siphonDecodingNull :: c1 -> Bool
|
||||
}
|
||||
|
||||
data WithEnd c = WithEnd
|
||||
{ withEndEnded :: Bool
|
||||
, withEndContent :: c
|
||||
}
|
||||
-- data WithEnd c = WithEnd
|
||||
-- { withEndEnded :: !Bool
|
||||
-- , withEndContent :: !c
|
||||
-- }
|
||||
|
||||
-- data SiphonDecodingError
|
||||
-- { clarify
|
||||
|
||||
Loading…
Reference in New Issue
Block a user