fix problem in siphon

This commit is contained in:
Andrew Martin 2017-07-20 22:15:17 -04:00
parent 83e069d1b6
commit 45c961fdd1
2 changed files with 37 additions and 14 deletions

View File

@ -251,8 +251,11 @@ field !delim = do
case mb of case mb of
Just b Just b
| b == doubleQuote -> do | b == doubleQuote -> do
bs <- escapedField delim (bs,tc) <- escapedField delim
return (CellResultData bs) case tc of
TrailCharComma -> return (CellResultData bs)
TrailCharNewline -> return (CellResultNewline bs EndedNo)
TrailCharEnd -> return (CellResultNewline bs EndedYes)
| b == 10 || b == 13 -> do | b == 10 || b == 13 -> do
_ <- eatNewlines _ <- eatNewlines
isEnd <- A.atEnd isEnd <- A.atEnd
@ -271,21 +274,31 @@ field !delim = do
eatNewlines :: AL.Parser S.ByteString eatNewlines :: AL.Parser S.ByteString
eatNewlines = A.takeWhile (\x -> x == 10 || x == 13) eatNewlines = A.takeWhile (\x -> x == 10 || x == 13)
escapedField :: Word8 -> AL.Parser S.ByteString escapedField :: Word8 -> AL.Parser (S.ByteString,TrailChar)
escapedField !delim = do escapedField !delim = do
_ <- dquote _ <- dquote
-- The scan state is 'True' if the previous character was a double -- The scan state is 'True' if the previous character was a double
-- quote. We need to drop a trailing double quote left by scan. -- quote. We need to drop a trailing double quote left by scan.
s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote s <- S.init <$>
then Just (not s) ( A.scan False $ \s c ->
else if s then Nothing if c == doubleQuote
else Just False) then Just (not s)
A.option () (A.skip (== delim)) else if s
then Nothing
else Just False
)
mb <- A.peekWord8
trailChar <- case mb of
Just b
| b == comma -> A.anyWord8 >> return TrailCharComma
| b == newline || b == cr -> A.anyWord8 >> return TrailCharNewline
| otherwise -> fail "encountered double quote after escaped field"
Nothing -> return TrailCharEnd
if doubleQuote `S.elem` s if doubleQuote `S.elem` s
then case Z.parse unescape s of then case Z.parse unescape s of
Right r -> return r Right r -> return (r,trailChar)
Left err -> fail err Left err -> fail err
else return s else return (s,trailChar)
data TrailChar = TrailCharNewline | TrailCharComma | TrailCharEnd data TrailChar = TrailCharNewline | TrailCharComma | TrailCharEnd
@ -303,7 +316,7 @@ unescapedField !delim = do
Just b Just b
| b == comma -> A.anyWord8 >> return (bs,TrailCharComma) | b == comma -> A.anyWord8 >> return (bs,TrailCharComma)
| b == newline || b == cr -> A.anyWord8 >> return (bs,TrailCharNewline) | b == newline || b == cr -> A.anyWord8 >> return (bs,TrailCharNewline)
| otherwise -> fail "encounter double quote in unescaped field" | otherwise -> fail "encountered double quote in unescaped field"
Nothing -> return (bs,TrailCharEnd) Nothing -> return (bs,TrailCharEnd)
dquote :: AL.Parser Char dquote :: AL.Parser Char

View File

@ -76,7 +76,17 @@ tests =
] ]
) )
) @?= ([(244,'z',True)] :> Nothing) ) @?= ([(244,'z',True)] :> Nothing)
, testCase "Headed Decoding (escaped characters)" , testCase "Headed Decoding (escaped characters, one big chunk)"
$ ( runIdentity . SMP.toList )
( S.decodeHeadedUtf8Csv decodingF
( SMP.yield $ BC8.pack $ concat
[ "name\n"
, "drew\n"
, "\"martin, drew\"\n"
]
)
) @?= (["drew","martin, drew"] :> Nothing)
, testCase "Headed Decoding (escaped characters, character per chunk)"
$ ( runIdentity . SMP.toList ) $ ( runIdentity . SMP.toList )
( S.decodeHeadedUtf8Csv decodingF ( S.decodeHeadedUtf8Csv decodingF
( mapM_ (SMP.yield . BC8.singleton) $ concat ( mapM_ (SMP.yield . BC8.singleton) $ concat