uni2work.workflows.visualiser/app/YamlParser.hs
2023-08-28 02:06:32 +02:00

237 lines
10 KiB
Haskell

-- SPDX-FileCopyrightText: 2023 David Mosbach <david.mosbach@campus.lmu.de>
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
{-# Language DuplicateRecordFields,
NoFieldSelectors,
OverloadedRecordDot #-}
module YamlParser where
import Prelude hiding (lookup)
import qualified Prelude as P
import Control.Monad.State.Lazy
import Data.Map.Lazy (Map, insert, lookup, empty, fromList, toList, (!))
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
import Data.Text.Lazy (toStrict)
import Debug.Trace (trace)
import Data.Maybe (fromJust, isNothing, isJust, fromMaybe)
import Data.Text (pack, unpack, Text)
import Data.YAML (decode1Strict, Node, Pos, Parser, parseEither)
import Data.YAML.Event hiding (Scalar)
import qualified Data.YAML.Event as Y
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy as BS.L
activateTrace = False
showTrace :: EvPos -> a -> a
showTrace event action = if activateTrace
then trace (show (eEvent event) ++ " @" ++ show (posLine $ ePos event)) action
else action
data ParseState = PState {
rootNodes :: [YAMLNode],
anchors :: Map Text YAMLNode,
comments :: [Comment] -- YAML comment queue for the next node.
}
data AnchorData = NoAnchor | AnchorDef Text | AnchorAlias Text deriving (Show, Eq, Ord)
data MergeData = MergeData {keys :: [Text], anchor :: AnchorData} deriving (Show, Eq, Ord)
data YAMLNode =
Scalar {
bytes :: BS.ByteString,
{-tag :: Tag,-}
{-style :: Style,-}
comment :: [Comment], -- TODO every node preceded by a scalar preceded by a comment stores said comment
anchorData :: AnchorData,
pos :: Pos
} | Mapping {
content :: [(YAMLNode, YAMLNode)],
comment :: [Comment],
anchorData :: AnchorData,
mergeData :: [MergeData], -- keys of the maps merged into this mapping by "<<"
pos :: Pos
} | Sequence {
children :: [YAMLNode],
comment :: [Comment],
anchorData :: AnchorData,
pos :: Pos
} deriving (Show, Eq)
instance Ord YAMLNode where
(Scalar b1 _ _ _) <= (Scalar b2 _ _ _) = b1 <= b2
_ <= _ = undefined
type Comment = Text
parse :: EvStream -> State ParseState YAMLNode
parse ((Right (EvPos (DocumentEnd _) pos)):_) = get >>= \pState -> return $ if length pState.rootNodes == 1
then head pState.rootNodes
else Sequence pState.rootNodes [] NoAnchor pos
parse [] = get >>= \pState -> return $ if length pState.rootNodes == 1
then head pState.rootNodes
else Sequence pState.rootNodes [] NoAnchor undefined
parse ((Right (EvPos StreamStart _)):es) = parseComment es >>= parse
parse ((Right (EvPos (DocumentStart _) _)):es) = parse es
parse es = do
(root, es') <- parseNode es
pState <- get
when (isJust root) . put $ pState {rootNodes = fromJust root : pState.rootNodes}
parse es'
parseComment :: EvStream -> State ParseState EvStream
parseComment ((Right (EvPos (Y.Comment comment) _)):es) = do
pState <- get
put $ pState {comments = comment : pState.comments}
parseComment es
parseComment es = return es
parseNode :: EvStream -> State ParseState (Maybe YAMLNode, EvStream)
parseNode [] = trace "Unexpected eof" $ return (Nothing, [])
parseNode ((Left (p,s)):es) = trace ("Failed to parse: " ++ show s ++ " @ line " ++ show p.posLine) $ parseNode es
parseNode es@((Right (EvPos event pos)):es') = do
pState <- get
showTrace (EvPos event pos) $ case event of
Y.Comment _ -> parseComment es >>= parseNode
Y.SequenceStart anchor _ _ -> parseSequence es' anchor [] >>= \(seq, es'') -> return (Just seq, es'')
Y.MappingStart anchor _ _ -> parseMapping es' anchor [] [] >>= \(map, es'') -> return (Just map, es'')
Y.Scalar anchor _ _ text -> parseScalar anchor text pos >>= \scal -> return (Just scal, es')
Y.Alias anchor -> parseAlias anchor >>= \a -> return (Just a, es')
_ -> return (Nothing, es) -- error $ "Unexpected event: " ++ show event ++ " @" ++ show (posLine pos)
parseSequence :: EvStream -> Maybe Anchor -> [YAMLNode] -> State ParseState (YAMLNode, EvStream)
parseSequence ((Right (EvPos SequenceEnd pos)):es) anchor children = showTrace (EvPos SequenceEnd pos) $ do
pState <- get
let anchorData = maybe NoAnchor AnchorDef anchor
let seq = Sequence (reverse children) [] anchorData pos
let anchors = if isNothing anchor then pState.anchors else insert (fromJust anchor) seq pState.anchors
put $ pState {anchors = anchors}
return (seq, es)
parseSequence es anchor children = do
(child, es') <- parseNode es
case child of
Nothing -> parseSequence es' anchor children
Just c -> parseSequence es' anchor (c : children)
parseMapping :: EvStream -> Maybe Anchor -> [(YAMLNode, YAMLNode)] -> [MergeData] -> State ParseState (YAMLNode, EvStream)
parseMapping ((Right (EvPos MappingEnd pos)):es) anchor content mergeData = showTrace (EvPos MappingEnd pos) $ do
pState <- get
let anchorData = maybe NoAnchor AnchorDef anchor
let map = Mapping (reverse content) [] anchorData mergeData pos
let anchors = if isNothing anchor then pState.anchors else insert (fromJust anchor) map pState.anchors
put $ pState {anchors = anchors}
return (map, es)
parseMapping es anchor content mergeData = do
(maybeKey, es') <- parseNode es
case maybeKey of
Nothing -> parseMapping es' anchor content mergeData
Just key -> do
unless (isScalar key) . error $ "Key not a scalar: " ++ show key
(maybeVal, es'') <- parseNode es'
let val = fromJust maybeVal
if isMerge key then do
let (content', mergeKeys) = mergeMappings [] content val
let mergeData' = (MergeData mergeKeys key.anchorData) : mergeData
parseMapping es'' anchor content' mergeData'
else do
let content' = (key {comment = []}, val {comment = key.comment}) : content -- migrate comment to val to preserve it for the workflow data structure. alternative. don't use Data.Map for e.g. nodes and stages but a custom type and transfer it later.
parseMapping es'' anchor content' mergeData where
isScalar :: YAMLNode -> Bool
isScalar (Scalar {}) = True
isScalar _ = False
isMapping :: YAMLNode -> Bool
isMapping (Mapping {}) = True
isMapping _ = False
isSequence :: YAMLNode -> Bool
isSequence (Sequence {}) = True
isSequence _ = False
isMerge :: YAMLNode -> Bool
isMerge (Scalar b _ _ _) = unpack (decodeUtf8 b) == "<<"
mergeMappings :: [Text] -> [(YAMLNode, YAMLNode)] -> YAMLNode -> ([(YAMLNode, YAMLNode)], [Text])
mergeMappings mergeKeys content (Mapping [] _ _ _ _) = (content, mergeKeys)
mergeMappings mergeKeys content m@(Mapping (x@(key, _):xs) _ _ _ _)
| isJust $ P.lookup key content = mergeMappings mergeKeys content m {content = xs}
| otherwise = mergeMappings ((decodeUtf8 key.bytes) : mergeKeys) (x : content) m {content = xs}
mergeMappings mergeKeys content (Sequence [] _ _ _) = (content, mergeKeys)
mergeMappings mergeKeys content s@(Sequence (m@(Mapping {}):xs) _ _ _) = mergeMappings mergeKeys' content' s {children = xs} where
(content', mergeKeys') = mergeMappings mergeKeys content m
parseScalar :: Maybe Anchor -> Text -> Pos -> State ParseState YAMLNode
parseScalar anchor text pos = do
pState <- get
let comments = pState.comments
let anchorData = maybe NoAnchor AnchorDef anchor
let scal = Scalar (encodeUtf8 text) comments anchorData pos
let anchors = if isNothing anchor then pState.anchors else insert (fromJust anchor) scal pState.anchors
put $ pState {anchors = anchors, comments = []}
return scal
parseAlias :: Anchor -> State ParseState YAMLNode
parseAlias anchor = do
pState <- get
case lookup anchor pState.anchors of
Nothing -> error $ "Anchor '" ++ show anchor ++ "' not defined"
Just node -> return node {anchorData = AnchorAlias anchor}
class FromYAML' a where
fromYAML :: YAMLNode -> Parser a
instance FromYAML' a => FromYAML' (Maybe a) where
fromYAML y@(Scalar bs _ _ _)
| decodeUtf8 bs == pack "null" = pure Nothing
| otherwise = Just <$> fromYAML y
fromYAML y = Just <$> fromYAML y
instance (Ord k, FromYAML' k, FromYAML' v) => FromYAML' (Map k v) where
fromYAML (Mapping c _ _ _ _) = fromList <$> mapM (\(a,b) -> (,) <$> fromYAML a <*> fromYAML b) c
instance FromYAML' Text where
fromYAML (Scalar bs _ _ _) = pure $ decodeUtf8 bs
instance FromYAML' YAMLNode where
fromYAML = pure
instance FromYAML' v => FromYAML' [v] where
fromYAML (Sequence c _ _ _) = mapM fromYAML c
instance (FromYAML' a, FromYAML' b) => FromYAML' (a,b) where
fromYAML (Sequence [n1, n2] _ _ _) = (,) <$> fromYAML n1
<*> fromYAML n2
decodeWithComments1 :: FromYAML' v => BS.L.ByteString -> Either (Pos, String) v
decodeWithComments1 input = do
let events = parseEvents input
-- let mainEvents = validHead events
-- unless (isJust mainEvents) . error $ "Missing DocumentStart event"
let initState = PState [] empty []
let content = evalState (parse events) initState
parseEither . fromYAML $ content
where
validHead :: EvStream -> Maybe EvStream
validHead ((Right (EvPos StreamStart _)):(Right (EvPos (DocumentStart _) _)):es) = Just es
validHead _ = Nothing
decodeWithComments1Strict :: FromYAML' v => BS.ByteString -> Either (Pos, String) v
decodeWithComments1Strict = decodeWithComments1 . BS.L.fromChunks . (:[])
(<|) :: FromYAML' a => [(YAMLNode, YAMLNode)] -> Text -> Parser a
mapping <| key = maybe (fail $ "key " ++ show key ++ " not found") fromYAML (P.lookup key $ prep mapping) where
prep :: [(YAMLNode, YAMLNode)] -> [(Text, YAMLNode)]
prep mapping = reverse [(decodeUtf8 scalar.bytes, val) | (scalar, val) <- mapping]
(<|?) :: FromYAML' a => [(YAMLNode, YAMLNode)] -> Text -> Parser (Maybe a)
mapping <|? key = maybe (pure Nothing) fromYAML (P.lookup key $ prep mapping) where
prep :: [(YAMLNode, YAMLNode)] -> [(Text, YAMLNode)]
prep mapping = reverse [(decodeUtf8 scalar.bytes, val) | (scalar, val) <- mapping]