web2py.gluon.sanitizer

33

34 - def __init__( 35 self, 36 permitted_tags=[ 37 'a', 38 'b', 39 'blockquote', 40 'br/', 41 'i', 42 'li', 43 'ol', 44 'ul', 45 'p', 46 'cite', 47 'code', 48 'pre', 49 'img/', 50 ], 51 allowed_attributes={'a': ['href', 'title'], 'img': ['src', 'alt' 52 ], 'blockquote': ['type']}, 53 fmt=AbstractFormatter, 54 strip_disallowed = False 55 ):

56 57 HTMLParser.__init__(self, fmt) 58 self.result = '' 59 self.open_tags = [] 60 self.permitted_tags = [i for i in permitted_tags if i[-1] != '/'] 61 self.requires_no_close = [i[:-1] for i in permitted_tags 62 if i[-1] == '/'] 63 self.permitted_tags += self.requires_no_close 64 self.allowed_attributes = allowed_attributes 65 66 # The only schemes allowed in URLs (for href and src attributes). 67 # Adding "javascript" or "vbscript" to this list would not be smart. 68 69 self.allowed_schemes = ['http', 'https', 'ftp'] 70 71 #to strip or escape disallowed tags? 72 self.strip_disallowed = strip_disallowed 73 self.in_disallowed = False

74

75 - def handle_data(self, data):

76 if data and not self.in_disallowed: 77 self.result += xssescape(data)

78

79 - def handle_charref(self, ref):

80 if self.in_disallowed: 81 return 82 elif len(ref) < 7 and ref.isdigit(): 83 self.result += '&#%s;' % ref 84 else: 85 self.result += xssescape('&#%s' % ref)

86

87 - def handle_entityref(self, ref):

88 if self.in_disallowed: 89 return 90 elif ref in entitydefs: 91 self.result += '&%s;' % ref 92 else: 93 self.result += xssescape('&%s' % ref)

94

95 - def handle_comment(self, comment):

96 if self.in_disallowed: 97 return 98 elif comment: 99 self.result += xssescape('' % comment)

100

101 - def handle_starttag( 102 self, 103 tag, 104 method, 105 attrs, 106 ):

107 if tag not in self.permitted_tags: 108 if self.strip_disallowed: 109 self.in_disallowed = True 110 else: 111 self.result += xssescape('<%s>' % tag) 112 else: 113 bt = '<' + tag 114 if tag in self.allowed_attributes: 115 attrs = dict(attrs) 116 self.allowed_attributes_here = [x for x in 117 self.allowed_attributes[tag] if x in attrs 118 and len(attrs[x]) > 0] 119 for attribute in self.allowed_attributes_here: 120 if attribute in ['href', 'src', 'background']: 121 if self.url_is_acceptable(attrs[attribute]): 122 bt += ' %s="%s"' % (attribute, 123 attrs[attribute]) 124 else: 125 bt += ' %s=%s' % (xssescape(attribute), 126 quoteattr(attrs[attribute])) 127 if bt == '<a' or bt == '<img': 128 return 129 if tag in self.requires_no_close: 130 bt += ' /' 131 bt += '>' 132 self.result += bt 133 self.open_tags.insert(0, tag)

134

135 - def handle_endtag(self, tag, attrs):

136 bracketed = '</%s>' % tag 137 if tag not in self.permitted_tags: 138 if self.strip_disallowed: 139 self.in_disallowed = False 140 else: 141 self.result += xssescape(bracketed) 142 elif tag in self.open_tags: 143 self.result += bracketed 144 self.open_tags.remove(tag)

145

146 - def unknown_starttag(self, tag, attributes):

147 self.handle_starttag(tag, None, attributes)

148

149 - def unknown_endtag(self, tag):

150 self.handle_endtag(tag, None)

151

152 - def url_is_acceptable(self, url):

153 """ 154 Accepts relative and absolute urls 155 """ 156 157 parsed = urlparse(url) 158 return (parsed[0] in self.allowed_schemes and '.' in parsed[1]) \ 159 or (parsed[0] == '' and parsed[2].startswith('/'))

160

161 - def strip(self, rawstring, escape=True):

162 """ 163 Returns the argument stripped of potentially harmful 164 HTML or Javascript code 165 166 @type escape: boolean 167 @param escape: If True (default) it escapes the potentially harmful 168 content, otherwise remove it 169 """ 170 171 if not isinstance(rawstring, str): return str(rawstring) 172 for tag in self.requires_no_close: 173 rawstring = rawstring.replace("<%s/>" % tag, "<%s />" % tag) 174 if not escape: 175 self.strip_disallowed = True 176 self.result = '' 177 self.feed(rawstring) 178 for endtag in self.open_tags: 179 if endtag not in self.requires_no_close: 180 self.result += '</%s>' % endtag 181 return self.result

182

183 - def xtags(self):

184 """ 185 Returns a printable string informing the user which tags are allowed 186 """ 187 188 tg = '' 189 for x in sorted(self.permitted_tags): 190 tg += '<' + x 191 if x in self.allowed_attributes: 192 for y in self.allowed_attributes[x]: 193 tg += ' %s=""' % y 194 tg += '> ' 195 return xssescape(tg.strip())

Source Code for Module web2py.gluon.sanitizer