mirror of
https://github.com/byteverse/colonnade.git
synced 2026-05-05 22:35:44 +02:00
fix problem in siphon
This commit is contained in:
parent
83e069d1b6
commit
45c961fdd1
@ -251,8 +251,11 @@ field !delim = do
|
|||||||
case mb of
|
case mb of
|
||||||
Just b
|
Just b
|
||||||
| b == doubleQuote -> do
|
| b == doubleQuote -> do
|
||||||
bs <- escapedField delim
|
(bs,tc) <- escapedField delim
|
||||||
return (CellResultData bs)
|
case tc of
|
||||||
|
TrailCharComma -> return (CellResultData bs)
|
||||||
|
TrailCharNewline -> return (CellResultNewline bs EndedNo)
|
||||||
|
TrailCharEnd -> return (CellResultNewline bs EndedYes)
|
||||||
| b == 10 || b == 13 -> do
|
| b == 10 || b == 13 -> do
|
||||||
_ <- eatNewlines
|
_ <- eatNewlines
|
||||||
isEnd <- A.atEnd
|
isEnd <- A.atEnd
|
||||||
@ -271,21 +274,31 @@ field !delim = do
|
|||||||
eatNewlines :: AL.Parser S.ByteString
|
eatNewlines :: AL.Parser S.ByteString
|
||||||
eatNewlines = A.takeWhile (\x -> x == 10 || x == 13)
|
eatNewlines = A.takeWhile (\x -> x == 10 || x == 13)
|
||||||
|
|
||||||
escapedField :: Word8 -> AL.Parser S.ByteString
|
escapedField :: Word8 -> AL.Parser (S.ByteString,TrailChar)
|
||||||
escapedField !delim = do
|
escapedField !delim = do
|
||||||
_ <- dquote
|
_ <- dquote
|
||||||
-- The scan state is 'True' if the previous character was a double
|
-- The scan state is 'True' if the previous character was a double
|
||||||
-- quote. We need to drop a trailing double quote left by scan.
|
-- quote. We need to drop a trailing double quote left by scan.
|
||||||
s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote
|
s <- S.init <$>
|
||||||
then Just (not s)
|
( A.scan False $ \s c ->
|
||||||
else if s then Nothing
|
if c == doubleQuote
|
||||||
else Just False)
|
then Just (not s)
|
||||||
A.option () (A.skip (== delim))
|
else if s
|
||||||
|
then Nothing
|
||||||
|
else Just False
|
||||||
|
)
|
||||||
|
mb <- A.peekWord8
|
||||||
|
trailChar <- case mb of
|
||||||
|
Just b
|
||||||
|
| b == comma -> A.anyWord8 >> return TrailCharComma
|
||||||
|
| b == newline || b == cr -> A.anyWord8 >> return TrailCharNewline
|
||||||
|
| otherwise -> fail "encountered double quote after escaped field"
|
||||||
|
Nothing -> return TrailCharEnd
|
||||||
if doubleQuote `S.elem` s
|
if doubleQuote `S.elem` s
|
||||||
then case Z.parse unescape s of
|
then case Z.parse unescape s of
|
||||||
Right r -> return r
|
Right r -> return (r,trailChar)
|
||||||
Left err -> fail err
|
Left err -> fail err
|
||||||
else return s
|
else return (s,trailChar)
|
||||||
|
|
||||||
data TrailChar = TrailCharNewline | TrailCharComma | TrailCharEnd
|
data TrailChar = TrailCharNewline | TrailCharComma | TrailCharEnd
|
||||||
|
|
||||||
@ -303,7 +316,7 @@ unescapedField !delim = do
|
|||||||
Just b
|
Just b
|
||||||
| b == comma -> A.anyWord8 >> return (bs,TrailCharComma)
|
| b == comma -> A.anyWord8 >> return (bs,TrailCharComma)
|
||||||
| b == newline || b == cr -> A.anyWord8 >> return (bs,TrailCharNewline)
|
| b == newline || b == cr -> A.anyWord8 >> return (bs,TrailCharNewline)
|
||||||
| otherwise -> fail "encounter double quote in unescaped field"
|
| otherwise -> fail "encountered double quote in unescaped field"
|
||||||
Nothing -> return (bs,TrailCharEnd)
|
Nothing -> return (bs,TrailCharEnd)
|
||||||
|
|
||||||
dquote :: AL.Parser Char
|
dquote :: AL.Parser Char
|
||||||
|
|||||||
@ -76,7 +76,17 @@ tests =
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
) @?= ([(244,'z',True)] :> Nothing)
|
) @?= ([(244,'z',True)] :> Nothing)
|
||||||
, testCase "Headed Decoding (escaped characters)"
|
, testCase "Headed Decoding (escaped characters, one big chunk)"
|
||||||
|
$ ( runIdentity . SMP.toList )
|
||||||
|
( S.decodeHeadedUtf8Csv decodingF
|
||||||
|
( SMP.yield $ BC8.pack $ concat
|
||||||
|
[ "name\n"
|
||||||
|
, "drew\n"
|
||||||
|
, "\"martin, drew\"\n"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
) @?= (["drew","martin, drew"] :> Nothing)
|
||||||
|
, testCase "Headed Decoding (escaped characters, character per chunk)"
|
||||||
$ ( runIdentity . SMP.toList )
|
$ ( runIdentity . SMP.toList )
|
||||||
( S.decodeHeadedUtf8Csv decodingF
|
( S.decodeHeadedUtf8Csv decodingF
|
||||||
( mapM_ (SMP.yield . BC8.singleton) $ concat
|
( mapM_ (SMP.yield . BC8.singleton) $ concat
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user