1
2
3
4 """
5 This file is part of the web2py Web Framework
6 Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
7 License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
8 """
9
10 import re
11 import cgi
12
13 __all__ = ['highlight']
14
15
17
18 """
19 Do syntax highlighting.
20 """
21
22 - def __init__(
23 self,
24 mode,
25 link=None,
26 styles={},
27 ):
28 """
29 Initialise highlighter:
30 mode = language (PYTHON, WEB2PY,C, CPP, HTML, HTML_PLAIN)
31 """
32
33 mode = mode.upper()
34 if link and link[-1] != '/':
35 link = link + '/'
36 self.link = link
37 self.styles = styles
38 self.output = []
39 self.span_style = None
40 if mode == 'WEB2PY':
41 (mode, self.suppress_tokens) = ('PYTHON', [])
42 elif mode == 'PYTHON':
43 self.suppress_tokens = ['GOTOHTML']
44 elif mode == 'CPP':
45 (mode, self.suppress_tokens) = ('C', [])
46 elif mode == 'C':
47 self.suppress_tokens = ['CPPKEYWORD']
48 elif mode == 'HTML_PLAIN':
49 (mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON'])
50 elif mode == 'HTML':
51 self.suppress_tokens = []
52 else:
53 raise SyntaxError, 'Unknown mode: %s' % mode
54 self.mode = mode
55
56 - def c_tokenizer(
57 self,
58 token,
59 match,
60 style,
61 ):
62 """
63 Callback for C specific highlighting.
64 """
65
66 value = cgi.escape(match.group())
67 self.change_style(token, style)
68 self.output.append(value)
69
76 """
77 Callback for python specific highlighting.
78 """
79
80 value = cgi.escape(match.group())
81 if token == 'MULTILINESTRING':
82 self.change_style(token, style)
83 self.output.append(value)
84 self.strMultilineString = match.group(1)
85 return 'PYTHONMultilineString'
86 elif token == 'ENDMULTILINESTRING':
87 if match.group(1) == self.strMultilineString:
88 self.output.append(value)
89 self.strMultilineString = ''
90 return 'PYTHON'
91 if style and style[:5] == 'link:':
92 self.change_style(None, None)
93 (url, style) = style[5:].split(';', 1)
94 if url == 'None' or url == '':
95 self.output.append('<span style="%s">%s</span>'
96 % (style, value))
97 else:
98 self.output.append('<a href="%s%s" style="%s">%s</a>'
99 % (url, value, style, value))
100 else:
101 self.change_style(token, style)
102 self.output.append(value)
103 if token == 'GOTOHTML':
104 return 'HTML'
105 return None
106
107 - def html_tokenizer(
108 self,
109 token,
110 match,
111 style,
112 ):
113 """
114 Callback for HTML specific highlighting.
115 """
116
117 value = cgi.escape(match.group())
118 self.change_style(token, style)
119 self.output.append(value)
120 if token == 'GOTOPYTHON':
121 return 'PYTHON'
122 return None
123
124 all_styles = {
125 'C': (c_tokenizer, (
126 ('COMMENT', re.compile(r'//.*\r?\n'),
127 'color: green; font-style: italic'),
128 ('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL),
129 'color: green; font-style: italic'),
130 ('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n',
131 re.DOTALL), 'color: magenta; font-style: italic'),
132 ('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'),
133 'font-weight: bold'),
134 ('NUMBER',
135 re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
136 'color: red'),
137 ('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|'
138 + r'signed|unsigned|float|double|'
139 + r'goto|break|return|continue|asm|'
140 + r'case|default|if|else|switch|while|for|do|'
141 + r'struct|union|enum|typedef|'
142 + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'),
143 'color:#185369; font-weight: bold'),
144 ('CPPKEYWORD',
145 re.compile(r'(class|private|protected|public|template|new|delete|'
146 + r'this|friend|using|inline|export|bool|throw|try|catch|'
147 + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'),
148 'color: blue; font-weight: bold'),
149 ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'),
150 'color: #FF9966'),
151 ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
152 None),
153 ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
154 )),
155 'PYTHON': (python_tokenizer, (
156 ('GOTOHTML', re.compile(r'\}\}'), 'color: red'),
157 ('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'),
158 'font-weight: bold'),
159 ('NUMBER',
160 re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'
161 ), 'color: red'),
162 ('KEYWORD',
163 re.compile(r'(def|class|break|continue|del|exec|finally|pass|'
164 + r'print|raise|return|try|except|global|assert|lambda|'
165 + r'yield|for|while|if|elif|else|and|in|is|not|or|import|'
166 + r'from|True|False)(?![a-zA-Z0-9_])'),
167 'color:#185369; font-weight: bold'),
168 ('WEB2PY',
169 re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])'
170 ), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'),
171 ('MAGIC', re.compile(r'self|None'),
172 'color:#185369; font-weight: bold'),
173 ('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'),
174 'color: #FF9966'),
175 ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'
176 ), 'color: #FF9966'),
177 ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
178 None),
179 ('COMMENT', re.compile(r'\#.*\r?\n'),
180 'color: green; font-style: italic'),
181 ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
182 )),
183 'PYTHONMultilineString': (python_tokenizer,
184 (('ENDMULTILINESTRING',
185 re.compile(r'.*?("""|\'\'\')',
186 re.DOTALL), 'color: darkred'), )),
187 'HTML': (html_tokenizer, (
188 ('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'),
189 ('COMMENT', re.compile(r'<!--[^>]*-->|<!>'),
190 'color: green; font-style: italic'),
191 ('XMLCRAP', re.compile(r'<![^>]*>'),
192 'color: blue; font-style: italic'),
193 ('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE
194 + re.DOTALL), 'color: black'),
195 ('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'),
196 'color: darkred; font-weight: bold'),
197 ('ENDTAG', re.compile(r'/?>'),
198 'color: darkred; font-weight: bold'),
199 )),
200 }
201
203 """
204 Syntax highlight some python code.
205 Returns html version of code.
206 """
207
208 i = 0
209 mode = self.mode
210 while i < len(data):
211 for (token, o_re, style) in Highlighter.all_styles[mode][1]:
212 if not token in self.suppress_tokens:
213 match = o_re.match(data, i)
214 if match:
215 if style:
216 new_mode = \
217 Highlighter.all_styles[mode][0](self,
218 token, match, style
219 % dict(link=self.link))
220 else:
221 new_mode = \
222 Highlighter.all_styles[mode][0](self,
223 token, match, style)
224 if new_mode != None:
225 mode = new_mode
226 i += max(1, len(match.group()))
227 break
228 else:
229 self.change_style(None, None)
230 self.output.append(data[i])
231 i += 1
232 self.change_style(None, None)
233 return ''.join(self.output).expandtabs(4)
234
236 """
237 Generate output to change from existing style to another style only.
238 """
239
240 if token in self.styles:
241 style = self.styles[token]
242 if self.span_style != style:
243 if style != 'Keep':
244 if self.span_style != None:
245 self.output.append('</span>')
246 if style != None:
247 self.output.append('<span style="%s">' % style)
248 self.span_style = style
249
250
251 -def highlight(
252 code,
253 language,
254 link='/examples/globals/vars/',
255 counter=1,
256 styles={},
257 highlight_line=None,
258 attributes={},
259 ):
260 if not 'CODE' in styles:
261 code_style = """
262 font-size: 11px;
263 font-family: Bitstream Vera Sans Mono,monospace;
264 background-color: transparent;
265 margin: 0;
266 padding: 5px;
267 border: none;
268 overflow: auto;
269 white-space: pre !important;\n"""
270 else:
271 code_style = styles['CODE']
272 if not 'LINENUMBERS' in styles:
273 linenumbers_style = """
274 font-size: 11px;
275 font-family: Bitstream Vera Sans Mono,monospace;
276 background-color: transparent;
277 margin: 0;
278 padding: 5px;
279 border: none;
280 color: #A0A0A0;\n"""
281 else:
282 linenumbers_style = styles['LINENUMBERS']
283 if not 'LINEHIGHLIGHT' in styles:
284 linehighlight_style = "background-color: #EBDDE2;"
285 else:
286 linehighlight_style = styles['LINEHIGHLIGHT']
287
288 if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML',
289 'WEB2PY']:
290 code = Highlighter(language, link, styles).highlight(code)
291 else:
292 code = cgi.escape(code)
293 lines = code.split('\n')
294
295 if counter is None:
296 linenumbers = [''] * len(lines)
297 elif isinstance(counter, str):
298 linenumbers = [cgi.escape(counter)] * len(lines)
299 else:
300 linenumbers = [str(i + counter) + '.' for i in
301 xrange(len(lines))]
302
303 if highlight_line:
304 if counter and not isinstance(counter, str):
305 lineno = highlight_line - counter
306 else:
307 lineno = highlight_line
308 if lineno<len(lines):
309 lines[lineno] = '<div style="%s">%s</div>' % (linehighlight_style, lines[lineno])
310 linenumbers[lineno] = '<div style="%s">%s</div>' % (linehighlight_style, linenumbers[lineno])
311
312 code = '<br/>'.join(lines)
313 numbers = '<br/>'.join(linenumbers)
314
315 items = attributes.items()
316 fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1]
317 == '_' and value == None] + ['%s="%s"'
318 % (key[1:].lower(), str(value).replace('"', "'"))
319 for (key, value) in attributes.items() if key[:1]
320 == '_' and value])
321 if fa:
322 fa = ' ' + fa
323 return '<table%s><tr valign="top"><td style="width:40px; text-align: right;"><pre style="%s">%s</pre></td><td><pre style="%s">%s</pre></td></tr></table>'\
324 % (fa, linenumbers_style, numbers, code_style, code)
325
326
327 if __name__ == '__main__':
328 import sys
329 argfp = open(sys.argv[1])
330 data = argfp.read()
331 argfp.close()
332 print '<html><body>' + highlight(data, sys.argv[2])\
333 + '</body></html>'
334