lower case them all. Goes against my philosophy of trying to not modify. But it is easier, and uppercase is completely deprecated.
264 lines
12 KiB
Haskell
264 lines
12 KiB
Haskell
module Text.HTML.SanitizeXSS
|
|
( sanitize
|
|
, sanitizeBalance
|
|
, sanitizeXSS
|
|
, filterTags
|
|
, safeTags
|
|
) where
|
|
|
|
import Text.HTML.TagSoup
|
|
|
|
import Data.Set (Set(), member, notMember, (\\), fromList)
|
|
import Data.Char ( toLower )
|
|
|
|
import Network.URI ( parseURIReference, URI (..),
|
|
isAllowedInURI, escapeURIString, uriScheme )
|
|
import Codec.Binary.UTF8.String ( encodeString )
|
|
|
|
import qualified Data.Map as Map
|
|
|
|
{-
|
|
import Debug.Trace
|
|
debug :: (Show a) => a -> a
|
|
debug a = trace ("DEBUG: " ++ show a) a
|
|
-}
|
|
|
|
|
|
-- | santize the html to prevent XSS attacks. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
|
|
sanitize :: String -> String
|
|
sanitize = sanitizeXSS
|
|
|
|
-- | alias of sanitize function
|
|
sanitizeXSS :: String -> String
|
|
sanitizeXSS = filterTags safeTags
|
|
|
|
-- | same as sanitize but makes sure there are no lone closing tags. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
|
|
sanitizeBalance :: String -> String
|
|
sanitizeBalance = filterTags (balance Map.empty . safeTags)
|
|
|
|
-- | insert custom tag filtering. Don't forget to compose your filter with safeTags!
|
|
filterTags :: ([Tag String] -> [Tag String]) -> String -> String
|
|
filterTags f = renderTagsOptions renderOptions {
|
|
optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
|
|
} . f . canonicalizeTags . parseTags
|
|
|
|
balance :: Map.Map String Int -> [Tag String] -> [Tag String]
|
|
balance m [] =
|
|
concatMap go $ Map.toList m
|
|
where
|
|
go (name, i)
|
|
| noClosing name = []
|
|
| otherwise = replicate i $ TagClose name
|
|
noClosing = flip elem ["br", "img"]
|
|
balance m (t@(TagClose name):tags) =
|
|
case Map.lookup name m of
|
|
Nothing -> TagOpen name [] : TagClose name : balance m tags
|
|
Just i ->
|
|
let m' = if i == 1
|
|
then Map.delete name m
|
|
else Map.insert name (i - 1) m
|
|
in t : balance m' tags
|
|
balance m (TagOpen name as : tags) =
|
|
TagOpen name as : balance m' tags
|
|
where
|
|
m' = case Map.lookup name m of
|
|
Nothing -> Map.insert name 1 m
|
|
Just i -> Map.insert name (i + 1) m
|
|
balance m (t:ts) = t : balance m ts
|
|
|
|
-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
|
|
safeTags :: [Tag String] -> [Tag String]
|
|
safeTags [] = []
|
|
safeTags (t@(TagClose name):tags)
|
|
| safeTagName name = t : safeTags tags
|
|
| otherwise = safeTags tags
|
|
safeTags (TagOpen name attributes:tags)
|
|
| safeTagName name = TagOpen name (filter safeAttribute attributes) : safeTags tags
|
|
| otherwise = safeTags tags
|
|
safeTags (t:tags) = t:safeTags tags
|
|
|
|
safeTagName :: String -> Bool
|
|
safeTagName tagname = tagname `member` sanitaryTags
|
|
|
|
safeAttribute :: (String, String) -> Bool
|
|
safeAttribute (name, value) = name `member` sanitaryAttributes &&
|
|
(name `notMember` attrValIsUri || sanitaryURI value)
|
|
|
|
|
|
-- | Returns @True@ if the specified URI is not a potential security risk.
|
|
sanitaryURI :: String -> Bool
|
|
sanitaryURI u =
|
|
case parseURIReference (escapeURI u) of
|
|
Just p -> (null (uriScheme p)) ||
|
|
((map toLower $ init $ uriScheme p) `member` safeURISchemes)
|
|
Nothing -> False
|
|
|
|
|
|
-- | Escape unicode characters in a URI. Characters that are
|
|
-- already valid in a URI, including % and ?, are left alone.
|
|
escapeURI :: String -> String
|
|
escapeURI = escapeURIString isAllowedInURI . encodeString
|
|
|
|
safeURISchemes :: Set String
|
|
safeURISchemes = fromList acceptable_protocols
|
|
|
|
sanitaryTags :: Set String
|
|
sanitaryTags = fromList (acceptable_elements ++ mathml_elements ++ svg_elements)
|
|
\\ (fromList svg_allow_local_href) -- extra filtering not implemented
|
|
|
|
sanitaryAttributes :: Set String
|
|
sanitaryAttributes = fromList (acceptable_attributes ++ mathml_attributes ++ svg_attributes)
|
|
\\ (fromList svg_attr_val_allows_ref) -- extra unescaping not implemented
|
|
|
|
attrValIsUri :: Set String
|
|
attrValIsUri = fromList ["href", "src", "cite", "action", "longdesc",
|
|
"xlink:href", "xml:base"]
|
|
|
|
acceptable_elements :: [String]
|
|
acceptable_elements = ["a", "abbr", "acronym", "address", "area",
|
|
"article", "aside", "audio", "b", "big", "blockquote", "br", "button",
|
|
"canvas", "caption", "center", "cite", "code", "col", "colgroup",
|
|
"command", "datagrid", "datalist", "dd", "del", "details", "dfn",
|
|
"dialog", "dir", "div", "dl", "dt", "em", "event-source", "fieldset",
|
|
"figure", "footer", "font", "form", "header", "h1", "h2", "h3", "h4",
|
|
"h5", "h6", "hr", "i", "img", "input", "ins", "keygen", "kbd",
|
|
"label", "legend", "li", "m", "map", "menu", "meter", "multicol",
|
|
"nav", "nextid", "ol", "output", "optgroup", "option", "p", "pre",
|
|
"progress", "q", "s", "samp", "section", "select", "small", "sound",
|
|
"source", "spacer", "span", "strike", "strong", "sub", "sup", "table",
|
|
"tbody", "td", "textarea", "time", "tfoot", "th", "thead", "tr", "tt",
|
|
"u", "ul", "var", "video"]
|
|
|
|
mathml_elements :: [String]
|
|
mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
|
|
"mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom",
|
|
"mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub",
|
|
"msubsup", "msup", "mtable", "mtd", "mtext", "mtr", "munder",
|
|
"munderover", "none"]
|
|
|
|
-- this should include altGlyph I think
|
|
svg_elements :: [String]
|
|
svg_elements = ["a", "animate", "animateColor", "animateMotion",
|
|
"animateTransform", "clipPath", "circle", "defs", "desc", "ellipse",
|
|
"font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern",
|
|
"linearGradient", "line", "marker", "metadata", "missing-glyph",
|
|
"mpath", "path", "polygon", "polyline", "radialGradient", "rect",
|
|
"set", "stop", "svg", "switch", "text", "title", "tspan", "use"]
|
|
|
|
acceptable_attributes :: [String]
|
|
acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
|
|
"action", "align", "alt", "autocomplete", "autofocus", "axis",
|
|
"background", "balance", "bgcolor", "bgproperties", "border",
|
|
"bordercolor", "bordercolordark", "bordercolorlight", "bottompadding",
|
|
"cellpadding", "cellspacing", "ch", "challenge", "char", "charoff",
|
|
"choff", "charset", "checked", "cite", "class", "clear", "color",
|
|
"cols", "colspan", "compact", "contenteditable", "controls", "coords",
|
|
-- "data", TODO: allow this with further filtering
|
|
"datafld", "datapagesize", "datasrc", "datetime", "default",
|
|
"delay", "dir", "disabled", "draggable", "dynsrc", "enctype", "end",
|
|
"face", "for", "form", "frame", "galleryimg", "gutter", "headers",
|
|
"height", "hidefocus", "hidden", "high", "href", "hreflang", "hspace",
|
|
"icon", "id", "inputmode", "ismap", "keytype", "label", "leftspacing",
|
|
"lang", "list", "longdesc", "loop", "loopcount", "loopend",
|
|
"loopstart", "low", "lowsrc", "max", "maxlength", "media", "method",
|
|
"min", "multiple", "name", "nohref", "noshade", "nowrap", "open",
|
|
"optimum", "pattern", "ping", "point-size", "prompt", "pqg",
|
|
"radiogroup", "readonly", "rel", "repeat-max", "repeat-min",
|
|
"replace", "required", "rev", "rightspacing", "rows", "rowspan",
|
|
"rules", "scope", "selected", "shape", "size", "span", "src", "start",
|
|
"step",
|
|
-- "style", TODO: allow this with further filtering
|
|
"summary", "suppress", "tabindex", "target",
|
|
"template", "title", "toppadding", "type", "unselectable", "usemap",
|
|
"urn", "valign", "value", "variable", "volume", "vspace", "vrml",
|
|
"width", "wrap", "xml:lang"]
|
|
|
|
acceptable_protocols :: [String]
|
|
acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc",
|
|
"mailto", "news", "gopher", "nntp", "telnet", "webcal",
|
|
"xmpp", "callto", "feed", "urn", "aim", "rsync", "tag",
|
|
"ssh", "sftp", "rtsp", "afs" ]
|
|
|
|
mathml_attributes :: [String]
|
|
mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
|
|
"columnalign", "columnlines", "columnspacing", "columnspan", "depth",
|
|
"display", "displaystyle", "equalcolumns", "equalrows", "fence",
|
|
"fontstyle", "fontweight", "frame", "height", "linethickness", "lspace",
|
|
"mathbackground", "mathcolor", "mathvariant", "mathvariant", "maxsize",
|
|
"minsize", "other", "rowalign", "rowalign", "rowalign", "rowlines",
|
|
"rowspacing", "rowspan", "rspace", "scriptlevel", "selection",
|
|
"separator", "stretchy", "width", "width", "xlink:href", "xlink:show",
|
|
"xlink:type", "xmlns", "xmlns:xlink"]
|
|
|
|
svg_attributes :: [String]
|
|
svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
|
|
"arabic-form", "ascent", "attributeName", "attributeType",
|
|
"baseProfile", "bbox", "begin", "by", "calcMode", "cap-height",
|
|
"class", "clip-path", "color", "color-rendering", "content", "cx",
|
|
"cy", "d", "dx", "dy", "descent", "display", "dur", "end", "fill",
|
|
"fill-opacity", "fill-rule", "font-family", "font-size",
|
|
"font-stretch", "font-style", "font-variant", "font-weight", "from",
|
|
"fx", "fy", "g1", "g2", "glyph-name", "gradientUnits", "hanging",
|
|
"height", "horiz-adv-x", "horiz-origin-x", "id", "ideographic", "k",
|
|
"keyPoints", "keySplines", "keyTimes", "lang", "marker-end",
|
|
"marker-mid", "marker-start", "markerHeight", "markerUnits",
|
|
"markerWidth", "mathematical", "max", "min", "name", "offset",
|
|
"opacity", "orient", "origin", "overline-position",
|
|
"overline-thickness", "panose-1", "path", "pathLength", "points",
|
|
"preserveAspectRatio", "r", "refX", "refY", "repeatCount",
|
|
"repeatDur", "requiredExtensions", "requiredFeatures", "restart",
|
|
"rotate", "rx", "ry", "slope", "stemh", "stemv", "stop-color",
|
|
"stop-opacity", "strikethrough-position", "strikethrough-thickness",
|
|
"stroke", "stroke-dasharray", "stroke-dashoffset", "stroke-linecap",
|
|
"stroke-linejoin", "stroke-miterlimit", "stroke-opacity",
|
|
"stroke-width", "systemLanguage", "target", "text-anchor", "to",
|
|
"transform", "type", "u1", "u2", "underline-position",
|
|
"underline-thickness", "unicode", "unicode-range", "units-per-em",
|
|
"values", "version", "viewBox", "visibility", "width", "widths", "x",
|
|
"x-height", "x1", "x2", "xlink:actuate", "xlink:arcrole",
|
|
"xlink:href", "xlink:role", "xlink:show", "xlink:title", "xlink:type",
|
|
"xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", "y",
|
|
"y1", "y2", "zoomAndPan"]
|
|
|
|
-- the values for these need to be escaped
|
|
svg_attr_val_allows_ref :: [String]
|
|
svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill",
|
|
"filter", "marker", "marker-start", "marker-mid", "marker-end",
|
|
"mask", "stroke"]
|
|
|
|
svg_allow_local_href :: [String]
|
|
svg_allow_local_href = ["altGlyph", "animate", "animateColor",
|
|
"animateMotion", "animateTransform", "cursor", "feImage", "filter",
|
|
"linearGradient", "pattern", "radialGradient", "textpath", "tref",
|
|
"set", "use"]
|
|
|
|
{- style value (css) filtering not implemented
|
|
-
|
|
- this is used for css filtering
|
|
allowed_svg_properties = fromList acceptable_svg_properties
|
|
acceptable_svg_properties = [ "fill", "fill-opacity", "fill-rule",
|
|
"stroke", "stroke-width", "stroke-linecap", "stroke-linejoin",
|
|
"stroke-opacity"]
|
|
|
|
|
|
allowed_css_properties = fromList acceptable_css_properties
|
|
allowed_css_keywords = fromList acceptable_css_keywords
|
|
acceptable_css_properties = ["azimuth", "background-color",
|
|
"border-bottom-color", "border-collapse", "border-color",
|
|
"border-left-color", "border-right-color", "border-top-color", "clear",
|
|
"color", "cursor", "direction", "display", "elevation", "float", "font",
|
|
"font-family", "font-size", "font-style", "font-variant", "font-weight",
|
|
"height", "letter-spacing", "line-height", "overflow", "pause",
|
|
"pause-after", "pause-before", "pitch", "pitch-range", "richness",
|
|
"speak", "speak-header", "speak-numeral", "speak-punctuation",
|
|
"speech-rate", "stress", "text-align", "text-decoration", "text-indent",
|
|
"unicode-bidi", "vertical-align", "voice-family", "volume",
|
|
"white-space", "width"]
|
|
acceptable_css_keywords = ["auto", "aqua", "black", "block", "blue",
|
|
"bold", "both", "bottom", "brown", "center", "collapse", "dashed",
|
|
"dotted", "fuchsia", "gray", "green", "!important", "italic", "left",
|
|
"lime", "maroon", "medium", "none", "navy", "normal", "nowrap", "olive",
|
|
"pointer", "purple", "red", "right", "solid", "silver", "teal", "top",
|
|
"transparent", "underline", "white", "yellow"]
|
|
-}
|