fix up css parsing

This commit is contained in:
Greg Weber 2011-08-09 09:24:16 -07:00
parent ae3b146712
commit 66545752c6
4 changed files with 81 additions and 44 deletions

View File

@ -59,7 +59,7 @@ If anyone knows of better sources or thinks a particular tag/attribute/value may
style attribute style attribute
---------------- ----------------
style attributes are now *parsed* with the css-text and autoparsec-text dependencies. They are then ran through a white list for properties and keywords. Whitespace is not preserved. style attributes are now *parsed* with the css-text and autoparsec-text dependencies. They are then ran through a white list for properties and keywords. Whitespace is not preserved. This code was again translated from sanitizer.py, but uses attopoarsec-text instead of regexes.
data attributes data attributes
------------------------- -------------------------

View File

@ -21,6 +21,7 @@ import Network.URI ( parseURIReference, URI (..),
import Codec.Binary.UTF8.String ( encodeString ) import Codec.Binary.UTF8.String ( encodeString )
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe (catMaybes)
@ -74,7 +75,7 @@ safeTags (t@(TagClose name):tags)
| otherwise = safeTags tags | otherwise = safeTags tags
safeTags (TagOpen name attributes:tags) safeTags (TagOpen name attributes:tags)
| safeTagName name = TagOpen name | safeTagName name = TagOpen name
(map sanitizeAttribute $ filter safeAttribute attributes) : safeTags tags (catMaybes $ map sanitizeAttribute $ filter safeAttribute attributes) : safeTags tags
| otherwise = safeTags tags | otherwise = safeTags tags
safeTags (t:tags) = t:safeTags tags safeTags (t:tags) = t:safeTags tags
@ -85,9 +86,10 @@ safeAttribute :: (Text, Text) -> Bool
safeAttribute (name, value) = name `member` sanitaryAttributes && safeAttribute (name, value) = name `member` sanitaryAttributes &&
(name `notMember` uri_attributes || sanitaryURI value) (name `notMember` uri_attributes || sanitaryURI value)
sanitizeAttribute :: (Text, Text) -> (Text, Text) sanitizeAttribute :: (Text, Text) -> Maybe (Text, Text)
sanitizeAttribute ("style", value) = ("style", sanitizeCSS value) sanitizeAttribute ("style", value) =
sanitizeAttribute attrs = attrs let css = sanitizeCSS value in if T.null css then Nothing else Just ("style", css)
sanitizeAttribute attr = Just attr
-- | Returns @True@ if the specified URI is not a potential security risk. -- | Returns @True@ if the specified URI is not a potential security risk.
@ -174,7 +176,7 @@ acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
"replace", "required", "rev", "rightspacing", "rows", "rowspan", "replace", "required", "rev", "rightspacing", "rows", "rowspan",
"rules", "scope", "selected", "shape", "size", "span", "start", "rules", "scope", "selected", "shape", "size", "span", "start",
"step", "step",
-- "style", TODO: allow this with further filtering "style", -- gets further filtering
"summary", "suppress", "tabindex", "target", "summary", "suppress", "tabindex", "target",
"template", "title", "toppadding", "type", "unselectable", "usemap", "template", "title", "toppadding", "type", "unselectable", "usemap",
"urn", "valign", "value", "variable", "volume", "vspace", "vrml", "urn", "valign", "value", "variable", "volume", "vspace", "vrml",

View File

@ -1,5 +1,10 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings, CPP #-}
module Text.HTML.SanitizeXSS.Css (sanitizeCSS) where module Text.HTML.SanitizeXSS.Css (
sanitizeCSS
#ifdef TEST
, allowedCssAttributeValue
#endif
) where
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
@ -8,23 +13,48 @@ import Data.Text.Lazy.Builder (toLazyText)
import Data.Text.Lazy (toStrict) import Data.Text.Lazy (toStrict)
import Data.Set (member, fromList, Set) import Data.Set (member, fromList, Set)
import Data.Char (isDigit) import Data.Char (isDigit)
import Control.Applicative ((<|>)) import Control.Applicative ((<|>), pure)
import Text.CSS.Render (renderAttrs) import Text.CSS.Render (renderAttrs)
import Text.CSS.Parse (parseAttrs) import Text.CSS.Parse (parseAttrs)
import Prelude hiding (takeWhile)
-- import FileLocation (debug, debugM)
-- this is a direct translation from sanitizer.py, except -- this is a direct translation from sanitizer.py, except
-- sanitizer.py filters out url(), but this is redundant -- sanitizer.py filters out url(), but this is redundant
sanitizeCSS :: Text -> Text sanitizeCSS :: Text -> Text
sanitizeCSS css = toStrict . toLazyText . renderAttrs . filter isSanitaryAttr $ parseAttributes sanitizeCSS css = toStrict . toLazyText .
renderAttrs . filter isSanitaryAttr . filterUrl $ parseAttributes
where where
filterUrl :: [(Text,Text)] -> [(Text,Text)]
filterUrl = map filterUrlAttribute
where
filterUrlAttribute :: (Text, Text) -> (Text, Text)
filterUrlAttribute (prop,value) =
case parseOnly rejectUrl value of
Left _ -> (prop,value)
Right noUrl -> filterUrlAttribute (prop, noUrl)
rejectUrl = do
pre <- manyTill anyChar (string "url")
skipMany space
_<-char '('
skipWhile (/= ')')
_<-char ')'
rest <- takeText
return $ T.append (T.pack pre) rest
parseAttributes = case parseAttrs css of parseAttributes = case parseAttrs css of
Left _ -> [] Left _ -> []
Right as -> as Right as -> as
isSanitaryAttr (_, "") = False
isSanitaryAttr ("",_) = False
isSanitaryAttr (prop, value) isSanitaryAttr (prop, value)
| prop `member` allowed_css_properties = True | prop `member` allowed_css_properties = True
| (T.takeWhile (/= '-') value) `member` allowed_css_unit_properties && | (T.takeWhile (/= '-') prop) `member` allowed_css_unit_properties &&
all allowedCssAttributeValue (T.words value) = True all allowedCssAttributeValue (T.words value) = True
| prop `member` allowed_svg_properties = True | prop `member` allowed_svg_properties = True
| otherwise = False | otherwise = False
@ -32,40 +62,45 @@ sanitizeCSS css = toStrict . toLazyText . renderAttrs . filter isSanitaryAttr $
allowed_css_unit_properties :: Set Text allowed_css_unit_properties :: Set Text
allowed_css_unit_properties = fromList ["background","border","margin","padding"] allowed_css_unit_properties = fromList ["background","border","margin","padding"]
allowedCssAttributeValue :: Text -> Bool allowedCssAttributeValue :: Text -> Bool
allowedCssAttributeValue val = allowedCssAttributeValue val =
val `member` allowed_css_keywords || val `member` allowed_css_keywords ||
case parseOnly allowedCssAttributeParser val of case parseOnly allowedCssAttributeParser val of
Left _ -> False Left _ -> False
Right b -> b Right b -> b
where where
allowedCssAttributeParser = do allowedCssAttributeParser = do
hex <|> rgb <|> cssUnit rgb <|> hex <|> rgb <|> cssUnit
aToF = fromList "abcdef" aToF = fromList "abcdef"
hex = do hex = do
_ <- char '#' _ <- char '#'
hx <- takeText hx <- takeText
return $ T.all (\c -> isDigit c || (c `member` aToF)) hx return $ T.all (\c -> isDigit c || (c `member` aToF)) hx
rgb = do -- should have used sepBy (symbol ",")
_<- string "rgb(" rgb = do
skip isDigit >> try (skipWhile isDigit) >> try (skip (== '%')) _<- string "rgb("
skip (== ',') skipMany1 digit >> skipOk (== '%')
try (skipWhile isDigit) >> try (skip (== '%')) skip (== ',')
skip (== ',') skipMany digit >> skipOk (== '%')
try (skipWhile isDigit) >> try (skip (== '%')) skip (== ',')
skip (== ',') skipMany digit >> skipOk (== '%')
skip (== ')') skip (== ')')
return True return True
cssUnit = do cssUnit = do
try $ skip isDigit >> skip isDigit skip isDigit
try $ skip (== '.') skipOk isDigit
try $ skip isDigit >> skip isDigit skipOk (== '.')
unit <- takeText skipOk isDigit >> skipOk isDigit
return $ unit `member` allowed_css_attribute_value_units skipSpace
unit <- takeText
return $ T.null unit || unit `member` allowed_css_attribute_value_units
skipOk :: (Char -> Bool) -> Parser ()
skipOk p = skip p <|> pure ()
allowed_css_attribute_value_units :: Set Text allowed_css_attribute_value_units :: Set Text
allowed_css_attribute_value_units = fromList allowed_css_attribute_value_units = fromList

View File

@ -21,7 +21,6 @@ library
, css-text >= 0.1 && < 0.2 , css-text >= 0.1 && < 0.2
, text >= 0.11 && < 0.12 , text >= 0.11 && < 0.12
, attoparsec-text >= 0.8.5.1 && < 0.9 , attoparsec-text >= 0.8.5.1 && < 0.9
, file-location
exposed-modules: Text.HTML.SanitizeXSS exposed-modules: Text.HTML.SanitizeXSS
@ -31,15 +30,16 @@ library
test-suite test test-suite test
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
main-is: test/main.hs main-is: test/main.hs
cpp-options: -DTEST
build-depends: base >= 4 && < 5, containers build-depends: base >= 4 && < 5, containers
, xss-sanitize >= 0.3
, tagsoup >= 0.11 , tagsoup >= 0.11
, utf8-string >= 0.3 , utf8-string >= 0.3
, network >= 2 , network >= 2
, css-text >= 0.1 && < 0.2 , css-text >= 0.1 && < 0.2
, text >= 0.11 && < 0.12 , text >= 0.11 && < 0.12
, attoparsec-text >= 0.8.5.1 && < 0.9 , attoparsec-text >= 0.8.5.1 && < 0.9
, file-location , hspec
, HUnit >= 1.2
source-repository head source-repository head