fix up css parsing
This commit is contained in:
parent
ae3b146712
commit
66545752c6
@ -59,7 +59,7 @@ If anyone knows of better sources or thinks a particular tag/attribute/value may
|
|||||||
|
|
||||||
style attribute
|
style attribute
|
||||||
----------------
|
----------------
|
||||||
style attributes are now *parsed* with the css-text and autoparsec-text dependencies. They are then ran through a white list for properties and keywords. Whitespace is not preserved.
|
style attributes are now *parsed* with the css-text and autoparsec-text dependencies. They are then ran through a white list for properties and keywords. Whitespace is not preserved. This code was again translated from sanitizer.py, but uses attopoarsec-text instead of regexes.
|
||||||
|
|
||||||
data attributes
|
data attributes
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|||||||
@ -21,6 +21,7 @@ import Network.URI ( parseURIReference, URI (..),
|
|||||||
import Codec.Binary.UTF8.String ( encodeString )
|
import Codec.Binary.UTF8.String ( encodeString )
|
||||||
|
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
import Data.Maybe (catMaybes)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -74,7 +75,7 @@ safeTags (t@(TagClose name):tags)
|
|||||||
| otherwise = safeTags tags
|
| otherwise = safeTags tags
|
||||||
safeTags (TagOpen name attributes:tags)
|
safeTags (TagOpen name attributes:tags)
|
||||||
| safeTagName name = TagOpen name
|
| safeTagName name = TagOpen name
|
||||||
(map sanitizeAttribute $ filter safeAttribute attributes) : safeTags tags
|
(catMaybes $ map sanitizeAttribute $ filter safeAttribute attributes) : safeTags tags
|
||||||
| otherwise = safeTags tags
|
| otherwise = safeTags tags
|
||||||
safeTags (t:tags) = t:safeTags tags
|
safeTags (t:tags) = t:safeTags tags
|
||||||
|
|
||||||
@ -85,9 +86,10 @@ safeAttribute :: (Text, Text) -> Bool
|
|||||||
safeAttribute (name, value) = name `member` sanitaryAttributes &&
|
safeAttribute (name, value) = name `member` sanitaryAttributes &&
|
||||||
(name `notMember` uri_attributes || sanitaryURI value)
|
(name `notMember` uri_attributes || sanitaryURI value)
|
||||||
|
|
||||||
sanitizeAttribute :: (Text, Text) -> (Text, Text)
|
sanitizeAttribute :: (Text, Text) -> Maybe (Text, Text)
|
||||||
sanitizeAttribute ("style", value) = ("style", sanitizeCSS value)
|
sanitizeAttribute ("style", value) =
|
||||||
sanitizeAttribute attrs = attrs
|
let css = sanitizeCSS value in if T.null css then Nothing else Just ("style", css)
|
||||||
|
sanitizeAttribute attr = Just attr
|
||||||
|
|
||||||
|
|
||||||
-- | Returns @True@ if the specified URI is not a potential security risk.
|
-- | Returns @True@ if the specified URI is not a potential security risk.
|
||||||
@ -174,7 +176,7 @@ acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
|
|||||||
"replace", "required", "rev", "rightspacing", "rows", "rowspan",
|
"replace", "required", "rev", "rightspacing", "rows", "rowspan",
|
||||||
"rules", "scope", "selected", "shape", "size", "span", "start",
|
"rules", "scope", "selected", "shape", "size", "span", "start",
|
||||||
"step",
|
"step",
|
||||||
-- "style", TODO: allow this with further filtering
|
"style", -- gets further filtering
|
||||||
"summary", "suppress", "tabindex", "target",
|
"summary", "suppress", "tabindex", "target",
|
||||||
"template", "title", "toppadding", "type", "unselectable", "usemap",
|
"template", "title", "toppadding", "type", "unselectable", "usemap",
|
||||||
"urn", "valign", "value", "variable", "volume", "vspace", "vrml",
|
"urn", "valign", "value", "variable", "volume", "vspace", "vrml",
|
||||||
|
|||||||
@ -1,5 +1,10 @@
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings, CPP #-}
|
||||||
module Text.HTML.SanitizeXSS.Css (sanitizeCSS) where
|
module Text.HTML.SanitizeXSS.Css (
|
||||||
|
sanitizeCSS
|
||||||
|
#ifdef TEST
|
||||||
|
, allowedCssAttributeValue
|
||||||
|
#endif
|
||||||
|
) where
|
||||||
|
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
@ -8,23 +13,48 @@ import Data.Text.Lazy.Builder (toLazyText)
|
|||||||
import Data.Text.Lazy (toStrict)
|
import Data.Text.Lazy (toStrict)
|
||||||
import Data.Set (member, fromList, Set)
|
import Data.Set (member, fromList, Set)
|
||||||
import Data.Char (isDigit)
|
import Data.Char (isDigit)
|
||||||
import Control.Applicative ((<|>))
|
import Control.Applicative ((<|>), pure)
|
||||||
import Text.CSS.Render (renderAttrs)
|
import Text.CSS.Render (renderAttrs)
|
||||||
import Text.CSS.Parse (parseAttrs)
|
import Text.CSS.Parse (parseAttrs)
|
||||||
|
import Prelude hiding (takeWhile)
|
||||||
|
|
||||||
|
-- import FileLocation (debug, debugM)
|
||||||
|
|
||||||
|
|
||||||
-- this is a direct translation from sanitizer.py, except
|
-- this is a direct translation from sanitizer.py, except
|
||||||
-- sanitizer.py filters out url(), but this is redundant
|
-- sanitizer.py filters out url(), but this is redundant
|
||||||
sanitizeCSS :: Text -> Text
|
sanitizeCSS :: Text -> Text
|
||||||
sanitizeCSS css = toStrict . toLazyText . renderAttrs . filter isSanitaryAttr $ parseAttributes
|
sanitizeCSS css = toStrict . toLazyText .
|
||||||
|
renderAttrs . filter isSanitaryAttr . filterUrl $ parseAttributes
|
||||||
where
|
where
|
||||||
|
filterUrl :: [(Text,Text)] -> [(Text,Text)]
|
||||||
|
filterUrl = map filterUrlAttribute
|
||||||
|
where
|
||||||
|
filterUrlAttribute :: (Text, Text) -> (Text, Text)
|
||||||
|
filterUrlAttribute (prop,value) =
|
||||||
|
case parseOnly rejectUrl value of
|
||||||
|
Left _ -> (prop,value)
|
||||||
|
Right noUrl -> filterUrlAttribute (prop, noUrl)
|
||||||
|
|
||||||
|
rejectUrl = do
|
||||||
|
pre <- manyTill anyChar (string "url")
|
||||||
|
skipMany space
|
||||||
|
_<-char '('
|
||||||
|
skipWhile (/= ')')
|
||||||
|
_<-char ')'
|
||||||
|
rest <- takeText
|
||||||
|
return $ T.append (T.pack pre) rest
|
||||||
|
|
||||||
|
|
||||||
parseAttributes = case parseAttrs css of
|
parseAttributes = case parseAttrs css of
|
||||||
Left _ -> []
|
Left _ -> []
|
||||||
Right as -> as
|
Right as -> as
|
||||||
|
|
||||||
|
isSanitaryAttr (_, "") = False
|
||||||
|
isSanitaryAttr ("",_) = False
|
||||||
isSanitaryAttr (prop, value)
|
isSanitaryAttr (prop, value)
|
||||||
| prop `member` allowed_css_properties = True
|
| prop `member` allowed_css_properties = True
|
||||||
| (T.takeWhile (/= '-') value) `member` allowed_css_unit_properties &&
|
| (T.takeWhile (/= '-') prop) `member` allowed_css_unit_properties &&
|
||||||
all allowedCssAttributeValue (T.words value) = True
|
all allowedCssAttributeValue (T.words value) = True
|
||||||
| prop `member` allowed_svg_properties = True
|
| prop `member` allowed_svg_properties = True
|
||||||
| otherwise = False
|
| otherwise = False
|
||||||
@ -32,40 +62,45 @@ sanitizeCSS css = toStrict . toLazyText . renderAttrs . filter isSanitaryAttr $
|
|||||||
allowed_css_unit_properties :: Set Text
|
allowed_css_unit_properties :: Set Text
|
||||||
allowed_css_unit_properties = fromList ["background","border","margin","padding"]
|
allowed_css_unit_properties = fromList ["background","border","margin","padding"]
|
||||||
|
|
||||||
allowedCssAttributeValue :: Text -> Bool
|
allowedCssAttributeValue :: Text -> Bool
|
||||||
allowedCssAttributeValue val =
|
allowedCssAttributeValue val =
|
||||||
val `member` allowed_css_keywords ||
|
val `member` allowed_css_keywords ||
|
||||||
case parseOnly allowedCssAttributeParser val of
|
case parseOnly allowedCssAttributeParser val of
|
||||||
Left _ -> False
|
Left _ -> False
|
||||||
Right b -> b
|
Right b -> b
|
||||||
where
|
where
|
||||||
allowedCssAttributeParser = do
|
allowedCssAttributeParser = do
|
||||||
hex <|> rgb <|> cssUnit
|
rgb <|> hex <|> rgb <|> cssUnit
|
||||||
|
|
||||||
aToF = fromList "abcdef"
|
aToF = fromList "abcdef"
|
||||||
|
|
||||||
hex = do
|
hex = do
|
||||||
_ <- char '#'
|
_ <- char '#'
|
||||||
hx <- takeText
|
hx <- takeText
|
||||||
return $ T.all (\c -> isDigit c || (c `member` aToF)) hx
|
return $ T.all (\c -> isDigit c || (c `member` aToF)) hx
|
||||||
|
|
||||||
rgb = do
|
-- should have used sepBy (symbol ",")
|
||||||
_<- string "rgb("
|
rgb = do
|
||||||
skip isDigit >> try (skipWhile isDigit) >> try (skip (== '%'))
|
_<- string "rgb("
|
||||||
skip (== ',')
|
skipMany1 digit >> skipOk (== '%')
|
||||||
try (skipWhile isDigit) >> try (skip (== '%'))
|
skip (== ',')
|
||||||
skip (== ',')
|
skipMany digit >> skipOk (== '%')
|
||||||
try (skipWhile isDigit) >> try (skip (== '%'))
|
skip (== ',')
|
||||||
skip (== ',')
|
skipMany digit >> skipOk (== '%')
|
||||||
skip (== ')')
|
skip (== ')')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
cssUnit = do
|
cssUnit = do
|
||||||
try $ skip isDigit >> skip isDigit
|
skip isDigit
|
||||||
try $ skip (== '.')
|
skipOk isDigit
|
||||||
try $ skip isDigit >> skip isDigit
|
skipOk (== '.')
|
||||||
unit <- takeText
|
skipOk isDigit >> skipOk isDigit
|
||||||
return $ unit `member` allowed_css_attribute_value_units
|
skipSpace
|
||||||
|
unit <- takeText
|
||||||
|
return $ T.null unit || unit `member` allowed_css_attribute_value_units
|
||||||
|
|
||||||
|
skipOk :: (Char -> Bool) -> Parser ()
|
||||||
|
skipOk p = skip p <|> pure ()
|
||||||
|
|
||||||
allowed_css_attribute_value_units :: Set Text
|
allowed_css_attribute_value_units :: Set Text
|
||||||
allowed_css_attribute_value_units = fromList
|
allowed_css_attribute_value_units = fromList
|
||||||
|
|||||||
@ -21,7 +21,6 @@ library
|
|||||||
, css-text >= 0.1 && < 0.2
|
, css-text >= 0.1 && < 0.2
|
||||||
, text >= 0.11 && < 0.12
|
, text >= 0.11 && < 0.12
|
||||||
, attoparsec-text >= 0.8.5.1 && < 0.9
|
, attoparsec-text >= 0.8.5.1 && < 0.9
|
||||||
, file-location
|
|
||||||
|
|
||||||
|
|
||||||
exposed-modules: Text.HTML.SanitizeXSS
|
exposed-modules: Text.HTML.SanitizeXSS
|
||||||
@ -31,15 +30,16 @@ library
|
|||||||
test-suite test
|
test-suite test
|
||||||
type: exitcode-stdio-1.0
|
type: exitcode-stdio-1.0
|
||||||
main-is: test/main.hs
|
main-is: test/main.hs
|
||||||
|
cpp-options: -DTEST
|
||||||
build-depends: base >= 4 && < 5, containers
|
build-depends: base >= 4 && < 5, containers
|
||||||
, xss-sanitize >= 0.3
|
|
||||||
, tagsoup >= 0.11
|
, tagsoup >= 0.11
|
||||||
, utf8-string >= 0.3
|
, utf8-string >= 0.3
|
||||||
, network >= 2
|
, network >= 2
|
||||||
, css-text >= 0.1 && < 0.2
|
, css-text >= 0.1 && < 0.2
|
||||||
, text >= 0.11 && < 0.12
|
, text >= 0.11 && < 0.12
|
||||||
, attoparsec-text >= 0.8.5.1 && < 0.9
|
, attoparsec-text >= 0.8.5.1 && < 0.9
|
||||||
, file-location
|
, hspec
|
||||||
|
, HUnit >= 1.2
|
||||||
|
|
||||||
|
|
||||||
source-repository head
|
source-repository head
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user