colonnade-old/colonnade/src/Colonnade/Decoding.hs

161 lines
6.6 KiB
Haskell

{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
module Colonnade.Decoding where
import Colonnade.Internal (EitherWrap(..),mapLeft)
import Colonnade.Types
import Data.Functor.Contravariant
import Data.Vector (Vector)
import qualified Data.Vector as Vector
import Data.Char (chr)
-- | Converts the content type of a 'Decolonnade'. The @'Contravariant' f@
-- constraint means that @f@ can be 'Headless' but not 'Headed'.
contramapContent :: forall c1 c2 f a. Contravariant f => (c2 -> c1) -> Decolonnade f c1 a -> Decolonnade f c2 a
contramapContent f = go
where
go :: forall b. Decolonnade f c1 b -> Decolonnade f c2 b
go (DecolonnadePure x) = DecolonnadePure x
go (DecolonnadeAp h decode apNext) =
DecolonnadeAp (contramap f h) (decode . f) (go apNext)
headless :: (content -> Either String a) -> Decolonnade Headless content a
headless f = DecolonnadeAp Headless f (DecolonnadePure id)
headed :: content -> (content -> Either String a) -> Decolonnade Headed content a
headed h f = DecolonnadeAp (Headed h) f (DecolonnadePure id)
indexed :: Int -> (content -> Either String a) -> Decolonnade (Indexed Headless) content a
indexed ix f = DecolonnadeAp (Indexed ix Headless) f (DecolonnadePure id)
maxIndex :: forall f c a. Decolonnade (Indexed f) c a -> Int
maxIndex = go 0 where
go :: forall b. Int -> Decolonnade (Indexed f) c b -> Int
go !ix (DecolonnadePure _) = ix
go !ix1 (DecolonnadeAp (Indexed ix2 _) decode apNext) =
go (max ix1 ix2) apNext
-- | This function uses 'unsafeIndex' to access
-- elements of the 'Vector'.
uncheckedRunWithRow ::
Int
-> Decolonnade (Indexed f) content a
-> Vector content
-> Either (DecolonnadeRowError f content) a
uncheckedRunWithRow i d v = mapLeft (DecolonnadeRowError i . RowErrorDecode) (uncheckedRun d v)
-- | This function does not check to make sure that the indicies in
-- the 'Decolonnade' are in the 'Vector'.
uncheckedRun :: forall content a f.
Decolonnade (Indexed f) content a
-> Vector content
-> Either (DecolonnadeCellErrors f content) a
uncheckedRun dc v = getEitherWrap (go dc)
where
go :: forall b.
Decolonnade (Indexed f) content b
-> EitherWrap (DecolonnadeCellErrors f content) b
go (DecolonnadePure b) = EitherWrap (Right b)
go (DecolonnadeAp ixed@(Indexed ix h) decode apNext) =
let rnext = go apNext
content = Vector.unsafeIndex v ix
rcurrent = mapLeft (DecolonnadeCellErrors . Vector.singleton . DecolonnadeCellError content ixed) (decode content)
in rnext <*> (EitherWrap rcurrent)
headlessToIndexed :: forall c a.
Decolonnade Headless c a -> Decolonnade (Indexed Headless) c a
headlessToIndexed = go 0 where
go :: forall b. Int -> Decolonnade Headless c b -> Decolonnade (Indexed Headless) c b
go !ix (DecolonnadePure a) = DecolonnadePure a
go !ix (DecolonnadeAp Headless decode apNext) =
DecolonnadeAp (Indexed ix Headless) decode (go (ix + 1) apNext)
length :: forall f c a. Decolonnade f c a -> Int
length = go 0 where
go :: forall b. Int -> Decolonnade f c b -> Int
go !a (DecolonnadePure _) = a
go !a (DecolonnadeAp _ _ apNext) = go (a + 1) apNext
-- | Maps over a 'Decolonnade' that expects headers, converting these
-- expected headers into the indices of the columns that they
-- correspond to.
headedToIndexed :: forall content a. Eq content
=> Vector content -- ^ Headers in the source document
-> Decolonnade Headed content a -- ^ Decolonnade that contains expected headers
-> Either (HeadingErrors content) (Decolonnade (Indexed Headed) content a)
headedToIndexed v = getEitherWrap . go
where
go :: forall b. Eq content
=> Decolonnade Headed content b
-> EitherWrap (HeadingErrors content) (Decolonnade (Indexed Headed) content b)
go (DecolonnadePure b) = EitherWrap (Right (DecolonnadePure b))
go (DecolonnadeAp hd@(Headed h) decode apNext) =
let rnext = go apNext
ixs = Vector.elemIndices h v
ixsLen = Vector.length ixs
rcurrent
| ixsLen == 1 = Right (Vector.unsafeIndex ixs 0)
| ixsLen == 0 = Left (HeadingErrors (Vector.singleton h) Vector.empty)
| otherwise = Left (HeadingErrors Vector.empty (Vector.singleton (h,ixsLen)))
in (\ix ap -> DecolonnadeAp (Indexed ix hd) decode ap)
<$> EitherWrap rcurrent
<*> rnext
-- | This adds one to the index because text editors consider
-- line number to be one-based, not zero-based.
prettyError :: (c -> String) -> DecolonnadeRowError f c -> String
prettyError toStr (DecolonnadeRowError ix e) = unlines
$ ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.")
: ("Error Category: " ++ descr)
: map (" " ++) errDescrs
where (descr,errDescrs) = prettyRowError toStr e
prettyRowError :: (content -> String) -> RowError f content -> (String, [String])
prettyRowError toStr x = case x of
RowErrorParse err -> (,) "CSV Parsing"
[ "The line could not be parsed into cells correctly."
, "Original parser error: " ++ err
]
RowErrorSize reqLen actualLen -> (,) "Row Length"
[ "Expected the row to have exactly " ++ show reqLen ++ " cells."
, "The row only has " ++ show actualLen ++ " cells."
]
RowErrorMinSize reqLen actualLen -> (,) "Row Min Length"
[ "Expected the row to have at least " ++ show reqLen ++ " cells."
, "The row only has " ++ show actualLen ++ " cells."
]
RowErrorMalformed enc -> (,) "Text Decolonnade"
[ "Tried to decode the input as " ++ enc ++ " text"
, "There is a mistake in the encoding of the text."
]
RowErrorHeading errs -> (,) "Header" (prettyHeadingErrors toStr errs)
RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors toStr errs)
prettyCellErrors :: (c -> String) -> DecolonnadeCellErrors f c -> [String]
prettyCellErrors toStr (DecolonnadeCellErrors errs) = drop 1 $
flip concatMap errs $ \(DecolonnadeCellError content (Indexed ix _) msg) ->
let str = toStr content in
[ "-----------"
, "Column " ++ columnNumToLetters ix
, "Original parse error: " ++ msg
, "Cell Content Length: " ++ show (Prelude.length str)
, "Cell Content: " ++ if null str
then "[empty cell]"
else str
]
prettyHeadingErrors :: (c -> String) -> HeadingErrors c -> [String]
prettyHeadingErrors conv (HeadingErrors missing duplicates) = concat
[ concatMap (\h -> ["The header " ++ conv h ++ " was missing."]) missing
, concatMap (\(h,n) -> ["The header " ++ conv h ++ " occurred " ++ show n ++ " times."]) duplicates
]
columnNumToLetters :: Int -> String
columnNumToLetters i
| i >= 0 && i < 25 = [chr (i + 65)]
| otherwise = "Beyond Z. Fix this."