diff --git a/SanitizeXSS.hs b/Text/HTML/SanitizeXSS.hs similarity index 67% rename from SanitizeXSS.hs rename to Text/HTML/SanitizeXSS.hs index 63b19af..03b0460 100644 --- a/SanitizeXSS.hs +++ b/Text/HTML/SanitizeXSS.hs @@ -1,39 +1,42 @@ -module SanitizeXSS where +module Text.HTML.SanitizeXSS where -import Data.Set +import Data.Set (Set(..), member, notMember, fromList) import Network.URI ( parseURIReference, URI (..) ) import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, isLetter, isDigit ) -import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString ) +import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString, uriScheme ) import Codec.Binary.UTF8.String ( encodeString, decodeString ) +import Text.HTML.TagSoup sanitizeXSS :: String -> String -sanitizeXSS unsafeHtml = error +sanitizeXSS = renderTags . safeTags . parseTagsOptions parseOptions { optTagPosition = True } + where + safeTags :: [Tag String] -> [Tag String] + safeTags [] = [] + safeTags (t@(TagClose name):tags) | safeTagName name = t:(safeTags tags) + | otherwise = safeTags tags + safeTags (TagOpen name attributes:tags) + | safeTagName name = TagOpen name (filter safeAttribute attributes) : safeTags tags + | otherwise = safeTags tags + safeTags (t:tags) = t:safeTags tags -unSafeTag tag = tag `notElem` sanitaryTags -unSafeAttributes tag = (attr `notElem` sanitaryAttributes || - (attr `elem` ["href","src"] && unsanitaryURI val)) +safeTagName :: String -> Bool +safeTagName tagname = tagname `member` sanitaryTags --- | Returns @True@ if the specified URI is potentially a security risk. -unsanitaryURI :: String -> Bool -unsanitaryURI u = - let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:", - "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", - "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", - "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", - "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", - "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", - "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", - "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", - "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", - "snews:", "webcal:", "ymsgr:"] - in case parseURIReference (escapeURI u) of - -- uriScheme member of URI record data structure - Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes - Nothing -> True +safeAttribute :: (String, String) -> Bool +safeAttribute (name, value) = name `member` sanitaryAttributes && + (name `notElem` ["href","src"] || sanitaryURI value) + + +-- | Returns @True@ if the specified URI is not a potential security risk. +sanitaryURI :: String -> Bool +sanitaryURI u = + case parseURIReference (escapeURI u) of + Just p -> (map toLower $ uriScheme p) `member` safeURISchemes + Nothing -> False -- | Escape unicode characters in a URI. Characters that are @@ -49,9 +52,20 @@ unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) . +safeURISchemes :: Set String +safeURISchemes = fromList [ "", "http:", "https:", "ftp:", "mailto:", "file:", + "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", + "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", + "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", + "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", + "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", + "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", + "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", + "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", + "snews:", "webcal:", "ymsgr:"] -sanitaryTags :: [[Char]] -sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", +sanitaryTags :: Set String +sanitaryTags = fromList ["a", "abbr", "acronym", "address", "area", "b", "big", "blockquote", "br", "button", "caption", "center", "cite", "code", "col", "colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt", "em", "fieldset", "font", @@ -63,8 +77,8 @@ sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", "td", "textarea", "tfoot", "th", "thead", "tr", "tt", "u", "ul", "var"] -sanitaryAttributes :: [[Char]] -sanitaryAttributes = ["abbr", "accept", "accept-charset", +sanitaryAttributes :: Set String +sanitaryAttributes = fromList ["abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt", "axis", "border", "cellpadding", "cellspacing", "char", "charoff", "charset", "checked", "cite", "class", diff --git a/test.hs b/test.hs new file mode 100644 index 0000000..f116d2c --- /dev/null +++ b/test.hs @@ -0,0 +1,7 @@ +import Text.HTML.SanitizeXSS + +main = do + let test = " anchor " + let result = (sanitizeXSS test) + let expected = " anchor " + putStrLn $ if result == expected then "pass" else "failure parsing:" ++ (show test) ++ "\nexpected:" ++ (show expected) ++ "\nactual: " ++ (show result)