Package web2py :: Package gluon :: Module sanitizer
[hide private]
[frames] | no frames]

Source Code for Module web2py.gluon.sanitizer

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  4  """ 
  5  :: 
  7      # from 
  8      # Title: Cross-site scripting (XSS) defense 
  9      # Submitter: Josh Goldfoot (other recipes) 
 10      # Last Updated: 2006/08/05 
 11      # Version no: 1.0 
 13  """ 
 16  from htmllib import HTMLParser 
 17  from cgi import escape 
 18  from urlparse import urlparse 
 19  from formatter import AbstractFormatter 
 20  from htmlentitydefs import entitydefs 
 21  from xml.sax.saxutils import quoteattr 
 23  __all__ = ['sanitize'] 
26 -def xssescape(text):
27 """Gets rid of < and > and & and, for good measure, :""" 28 29 return escape(text, quote=True).replace(':', '&#58;')
30 31
32 -class XssCleaner(HTMLParser):
34 - def __init__( 35 self, 36 permitted_tags=[ 37 'a', 38 'b', 39 'blockquote', 40 'br/', 41 'i', 42 'li', 43 'ol', 44 'ul', 45 'p', 46 'cite', 47 'code', 48 'pre', 49 'img/', 50 ], 51 allowed_attributes={'a': ['href', 'title'], 'img': ['src', 'alt' 52 ], 'blockquote': ['type']}, 53 fmt=AbstractFormatter, 54 strip_disallowed = False 55 ):
56 57 HTMLParser.__init__(self, fmt) 58 self.result = '' 59 self.open_tags = [] 60 self.permitted_tags = [i for i in permitted_tags if i[-1] != '/'] 61 self.requires_no_close = [i[:-1] for i in permitted_tags 62 if i[-1] == '/'] 63 self.permitted_tags += self.requires_no_close 64 self.allowed_attributes = allowed_attributes 65 66 # The only schemes allowed in URLs (for href and src attributes). 67 # Adding "javascript" or "vbscript" to this list would not be smart. 68 69 self.allowed_schemes = ['http', 'https', 'ftp'] 70 71 #to strip or escape disallowed tags? 72 self.strip_disallowed = strip_disallowed 73 self.in_disallowed = False
75 - def handle_data(self, data):
76 if data and not self.in_disallowed: 77 self.result += xssescape(data)
79 - def handle_charref(self, ref):
80 if self.in_disallowed: 81 return 82 elif len(ref) < 7 and ref.isdigit(): 83 self.result += '&#%s;' % ref 84 else: 85 self.result += xssescape('&#%s' % ref)
87 - def handle_entityref(self, ref):
88 if self.in_disallowed: 89 return 90 elif ref in entitydefs: 91 self.result += '&%s;' % ref 92 else: 93 self.result += xssescape('&%s' % ref)
95 - def handle_comment(self, comment):
96 if self.in_disallowed: 97 return 98 elif comment: 99 self.result += xssescape('<!--%s-->' % comment)
101 - def handle_starttag( 102 self, 103 tag, 104 method, 105 attrs, 106 ):
107 if tag not in self.permitted_tags: 108 if self.strip_disallowed: 109 self.in_disallowed = True 110 else: 111 self.result += xssescape('<%s>' % tag) 112 else: 113 bt = '<' + tag 114 if tag in self.allowed_attributes: 115 attrs = dict(attrs) 116 self.allowed_attributes_here = [x for x in 117 self.allowed_attributes[tag] if x in attrs 118 and len(attrs[x]) > 0] 119 for attribute in self.allowed_attributes_here: 120 if attribute in ['href', 'src', 'background']: 121 if self.url_is_acceptable(attrs[attribute]): 122 bt += ' %s="%s"' % (attribute, 123 attrs[attribute]) 124 else: 125 bt += ' %s=%s' % (xssescape(attribute), 126 quoteattr(attrs[attribute])) 127 if bt == '<a' or bt == '<img': 128 return 129 if tag in self.requires_no_close: 130 bt += ' /' 131 bt += '>' 132 self.result += bt 133 self.open_tags.insert(0, tag)
135 - def handle_endtag(self, tag, attrs):
136 bracketed = '</%s>' % tag 137 if tag not in self.permitted_tags: 138 if self.strip_disallowed: 139 self.in_disallowed = False 140 else: 141 self.result += xssescape(bracketed) 142 elif tag in self.open_tags: 143 self.result += bracketed 144 self.open_tags.remove(tag)
146 - def unknown_starttag(self, tag, attributes):
147 self.handle_starttag(tag, None, attributes)
149 - def unknown_endtag(self, tag):
150 self.handle_endtag(tag, None)
152 - def url_is_acceptable(self, url):
153 """ 154 Accepts relative and absolute urls 155 """ 156 157 parsed = urlparse(url) 158 return (parsed[0] in self.allowed_schemes and '.' in parsed[1]) \ 159 or (parsed[0] == '' and parsed[2].startswith('/'))
161 - def strip(self, rawstring, escape=True):
162 """ 163 Returns the argument stripped of potentially harmful 164 HTML or Javascript code 165 166 @type escape: boolean 167 @param escape: If True (default) it escapes the potentially harmful 168 content, otherwise remove it 169 """ 170 171 if not isinstance(rawstring, str): return str(rawstring) 172 for tag in self.requires_no_close: 173 rawstring = rawstring.replace("<%s/>" % tag, "<%s />" % tag) 174 if not escape: 175 self.strip_disallowed = True 176 self.result = '' 177 self.feed(rawstring) 178 for endtag in self.open_tags: 179 if endtag not in self.requires_no_close: 180 self.result += '</%s>' % endtag 181 return self.result
183 - def xtags(self):
184 """ 185 Returns a printable string informing the user which tags are allowed 186 """ 187 188 tg = '' 189 for x in sorted(self.permitted_tags): 190 tg += '<' + x 191 if x in self.allowed_attributes: 192 for y in self.allowed_attributes[x]: 193 tg += ' %s=""' % y 194 tg += '> ' 195 return xssescape(tg.strip())
196 197
198 -def sanitize(text, permitted_tags=[ 199 'a', 200 'b', 201 'blockquote', 202 'br/', 203 'i', 204 'li', 205 'ol', 206 'ul', 207 'p', 208 'cite', 209 'code', 210 'pre', 211 'img/', 212 'h1','h2','h3','h4','h5','h6', 213 'table','tr','td','div', 214 ], 215 allowed_attributes = { 216 'a': ['href', 'title'], 217 'img': ['src', 'alt'], 218 'blockquote': ['type'], 219 'td': ['colspan'], 220 }, 221 escape=True):
222 if not isinstance(text, str): return str(text) 223 return XssCleaner(permitted_tags=permitted_tags, 224 allowed_attributes=allowed_attributes).strip(text, escape)