Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | speedup mimetype_from_content() by using a 256 byte array. <br>Mark VT and Ctrl-Z as text bytes, not binary. <br>Decrease maximum UTF-16 line length to 2731 <br>Check for FFFF in addition to 0, in UTF-16/binary detection. |
|---|---|
| Downloads: | Tarball | ZIP archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA1: |
d804902f2333e4198223063c27cbbc17 |
| User & Date: | jan.nijtmans 2012-11-02 08:31:20.275 |
Context
|
2012-11-02
| ||
| 17:22 | Adjustments to looks_like_utf16 to handle wchar_t being missing or not 2 bytes. ... (check-in: 7d881d8280 user: mistachkin tags: trunk) | |
| 10:55 | Generate warning when to-be-committed file contains invalid UTF-8 ... (check-in: 4e86b06a9f user: jan.nijtmans tags: improve_commit_warning) | |
| 08:31 | speedup mimetype_from_content() by using a 256 byte array. <br>Mark VT and Ctrl-Z as text bytes, not binary. <br>Decrease maximum UTF-16 line length to 2731 <br>Check for FFFF in addition to 0, in UTF-16/binary detection. ... (check-in: d804902f23 user: jan.nijtmans tags: trunk) | |
| 03:30 | Add the new moderation permissions to the list maintained by the JSON code. ... (check-in: 1cc7e8ce29 user: mistachkin tags: trunk) | |
Changes
Changes to src/diff.c.
| ︙ | ︙ | |||
219 220 221 222 223 224 225 |
if( j>LENGTH_MASK ){
return 0; /* Very long line -> binary */
}
return result; /* No problems seen -> not binary */
}
/*
| | | > < | > | 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
if( j>LENGTH_MASK ){
return 0; /* Very long line -> binary */
}
return result; /* No problems seen -> not binary */
}
/*
** Maximum length of a line in a text file, in UTF-16 characters. (2731)
** The number of bytes represented by this value after conversion to
** UTF-8 (which can increase the size by 50%) cannot exceed LENGTH_MASK
** bytes, because that is the line buffer size used by the diff engine.
*/
#define UTF16_LENGTH_MASK (LENGTH_MASK/3)
/*
** The carriage-return / line-feed characters in the UTF-16be and UTF-16le
** encodings.
*/
#define UTF16BE_CR ((wchar_t)'\r')
#define UTF16BE_LF ((wchar_t)'\n')
#define UTF16LE_CR (((wchar_t)'\r')<<(sizeof(wchar_t)<<2))
#define UTF16LE_LF (((wchar_t)'\n')<<(sizeof(wchar_t)<<2))
#define UTF16_FFFF ((wchar_t)-1)
/*
** This function attempts to scan each logical line within the blob to
** determine the type of content it appears to contain. Possible return
** values are:
**
** (1) -- The content appears to consist entirely of text, with lines
|
| ︙ | ︙ | |||
269 270 271 272 273 274 275 |
if( n==0 ) return result; /* Empty file -> text */
if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
c = *z;
if( c==0 ) return 0; /* NUL character in a file -> binary */
j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
while( (n-=2)>0 ){
c = *++z; ++j;
| | | 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 |
if( n==0 ) return result; /* Empty file -> text */
if( n%2 ) return 0; /* Odd number of bytes -> binary (or UTF-8) */
c = *z;
if( c==0 ) return 0; /* NUL character in a file -> binary */
j = ((c!=UTF16BE_LF) && (c!=UTF16LE_LF));
while( (n-=2)>0 ){
c = *++z; ++j;
if( c==0 || c==UTF16_FFFF ) return 0; /* NUL/FFFF character in a file -> binary */
if( c==UTF16BE_LF || c==UTF16LE_LF ){
int c2 = z[-1];
if( c2==UTF16BE_CR || c2==UTF16LE_CR ){
result = -1; /* Contains CR/NL, continue */
}
if( j>UTF16_LENGTH_MASK ){
return 0; /* Very long line -> binary */
|
| ︙ | ︙ |
Changes to src/doc.c.
| ︙ | ︙ | |||
33 34 35 36 37 38 39 |
** For any other binary type, return "unknown/unknown".
*/
const char *mimetype_from_content(Blob *pBlob){
int i;
int n;
const unsigned char *x;
| | | | | | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
** For any other binary type, return "unknown/unknown".
*/
const char *mimetype_from_content(Blob *pBlob){
int i;
int n;
const unsigned char *x;
static const char isBinary[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1
};
/* A table of mimetypes based on file content prefixes
*/
static const struct {
const char *zPrefix; /* The file prefix */
int size; /* Length of the prefix */
const char *zMimetype; /* The corresponding mimetype */
} aMime[] = {
{ "GIF87a", 6, "image/gif" },
{ "GIF89a", 6, "image/gif" },
{ "\211PNG\r\n\032\n", 8, "image/png" },
{ "\377\332\377", 3, "image/jpeg" },
{ "\377\330\377", 3, "image/jpeg" },
};
x = (const unsigned char*)blob_buffer(pBlob);
n = blob_size(pBlob);
for(i=0; i<n; i++){
unsigned char c = x[i];
if( isBinary[c] ){
break;
}
}
if( i>=n ){
return 0; /* Plain text */
}
for(i=0; i<sizeof(aMime)/sizeof(aMime[0]); i++){
|
| ︙ | ︙ |