Package web2py :: Package gluon :: Module decoder
[hide private]
[frames] | no frames]

Source Code for Module web2py.gluon.decoder

 1  import codecs, encodings 
 2   
 3  """Caller will hand this library a buffer and ask it to either convert 
 4  it or auto-detect the type. 
 5   
 6  Based on http://code.activestate.com/recipes/52257/ 
 7   
 8  Licensed under the PSF License 
 9  """ 
10   
11  # None represents a potentially variable byte. "##" in the XML spec... 
12  autodetect_dict={ # bytepattern     : ("name", 
13                  (0x00, 0x00, 0xFE, 0xFF) : ("ucs4_be"), 
14                  (0xFF, 0xFE, 0x00, 0x00) : ("ucs4_le"), 
15                  (0xFE, 0xFF, None, None) : ("utf_16_be"), 
16                  (0xFF, 0xFE, None, None) : ("utf_16_le"), 
17                  (0x00, 0x3C, 0x00, 0x3F) : ("utf_16_be"), 
18                  (0x3C, 0x00, 0x3F, 0x00) : ("utf_16_le"), 
19                  (0x3C, 0x3F, 0x78, 0x6D): ("utf_8"), 
20                  (0x4C, 0x6F, 0xA7, 0x94): ("EBCDIC") 
21                   } 
22   
23 -def autoDetectXMLEncoding(buffer):
24 """ buffer -> encoding_name 25 The buffer should be at least 4 bytes long. 26 Returns None if encoding cannot be detected. 27 Note that encoding_name might not have an installed 28 decoder (e.g. EBCDIC) 29 """ 30 # a more efficient implementation would not decode the whole 31 # buffer at once but otherwise we'd have to decode a character at 32 # a time looking for the quote character...that's a pain 33 34 encoding = "utf_8" # according to the XML spec, this is the default 35 # this code successively tries to refine the default 36 # whenever it fails to refine, it falls back to 37 # the last place encoding was set. 38 if len(buffer)>=4: 39 bytes = (byte1, byte2, byte3, byte4) = tuple(map(ord, buffer[0:4])) 40 enc_info = autodetect_dict.get(bytes, None) 41 if not enc_info: # try autodetection again removing potentially 42 # variable bytes 43 bytes = (byte1, byte2, None, None) 44 enc_info = autodetect_dict.get(bytes) 45 else: 46 enc_info = None 47 48 if enc_info: 49 encoding = enc_info # we've got a guess... these are 50 #the new defaults 51 52 # try to find a more precise encoding using xml declaration 53 secret_decoder_ring = codecs.lookup(encoding)[1] 54 (decoded,length) = secret_decoder_ring(buffer) 55 first_line = decoded.split("\n")[0] 56 if first_line and first_line.startswith(u"<?xml"): 57 encoding_pos = first_line.find(u"encoding") 58 if encoding_pos!=-1: 59 # look for double quote 60 quote_pos=first_line.find('"', encoding_pos) 61 62 if quote_pos==-1: # look for single quote 63 quote_pos=first_line.find("'", encoding_pos) 64 65 if quote_pos>-1: 66 quote_char,rest=(first_line[quote_pos], 67 first_line[quote_pos+1:]) 68 encoding=rest[:rest.find(quote_char)] 69 70 return encoding
71
72 -def decoder(buffer):
73 encoding = autoDetectXMLEncoding(buffer) 74 return buffer.decode(encoding).encode('utf8')
75