Fossil

Changes On Branch invalid-utf8
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch invalid-utf8 Excluding Merge-Ins

This is equivalent to a diff from 6728a8bd08 to 81eeb6f553

2014-07-10
07:36
When committing a (non-binary) file which contains bytes forming an invalid UTF-8 stream, add the possibility to convert it to a valid UTF-8 stream ('c') if you like. check-in: 45f5184e2a user: jan.nijtmans tags: trunk
2014-07-08
15:35
Add "Hide/Show Files" button to Parents and children/Ascendants and Descendants /timeline pages. Add "20 Entries"/"200 Entries" buttons to Ascendants and Descendants /timeline page. check-in: df3ada575c user: jan.nijtmans tags: trunk
10:48
First attempt in makeing fossil work on VxWorks. Based on feedback by Andy Ling. check-in: 18ae9fddb8 user: jan.nijtmans tags: vxworks
2014-07-07
20:59
Update 'config.guess' and 'config.sub' from upstream per request on mailing list by Joe Prostko. Please review for trunk. Closed-Leaf check-in: c84d28d795 user: mistachkin tags: pending-review
20:21
typo (found by s.beal) check-in: b4a53ba45f user: bch tags: trunk
04:52
Bring in latest fixes. check-in: 088e961a2b user: andybradford tags: cluster-changes
2014-07-06
07:05
Allow repository paths of up to 4096 bytes in stead of just 512 bytes. This change needs a minor change in SQLite, allowing the maximum path length no longer to be hardcoded, but configurable at compile-time. See: [http://www.sqlite.org/src/info/c060923a54] check-in: 7f64b35032 user: jan.nijtmans tags: longpath
2014-07-04
10:11
Next step in "invalid-utf8" handling: If a source file contains invalid UTF-8 byte sequences, most likely the real encoding is either ISO-8859-1 or CP1252 (note that CP1252 is a superset of ISO-8859-1). Therefore, after providing a warning, we can now offer the option ('c') to convert it to valid UTF-8, just like we provide such option for UTF-16 and eol-handling as well. Closed-Leaf check-in: 81eeb6f553 user: jan.nijtmans tags: invalid-utf8
2014-07-01
12:40
Update the built-in SQLite to the latest 3.8.6 alpha version from upstream. check-in: 6728a8bd08 user: drh tags: trunk
2014-06-30
12:41
Make "plink" the default ssh client on Windows, no matter what win32 compiler fossil was compiled with. check-in: e6d7b35a24 user: jan.nijtmans tags: trunk

Changes to src/blob.c.

1002
1003
1004
1005
1006
1007
1008























































1009
1010
1011
1012
1013
1014
1015
  for(i=j=0; z[i]; i++){
    if( z[i]!='\r' ) z[j++] = z[i];
    else if( z[i+1]!='\n' ) z[j++] = '\n';
  }
  z[j] = 0;
  p->nUsed = j;
}
























































/*
** Shell-escape the given string.  Append the result to a blob.
*/
void shell_escape(Blob *pBlob, const char *zIn){
  int n = blob_size(pBlob);
  int k = strlen(zIn);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
  for(i=j=0; z[i]; i++){
    if( z[i]!='\r' ) z[j++] = z[i];
    else if( z[i+1]!='\n' ) z[j++] = '\n';
  }
  z[j] = 0;
  p->nUsed = j;
}

/*
** Convert blob from cp1252 to utf-8. As cp1252 is a superset
** of iso8895-1, this is useful on UNIX as well.
**
** This table contains the character translations for 0x80..0xA0.
*/

static const unsigned short cp1252[32] = {
  0x20ac,   0x81, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152,   0x8D, 0x017D,   0x8F,
    0x90, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
   0x2DC, 0x2122, 0x0161, 0x203A, 0x0153,   0x9D, 0x017E, 0x0178
};

void blob_cp1252_to_utf8(Blob *p){
  unsigned char *z = (unsigned char *)p->aData;
  int j   = p->nUsed;
  int i, n;
  for(i=n=0; i<j; i++){
    if( z[i]>=0x80 ){
      if( (z[i]<0xa0) && (cp1252[z[i]&0x1f]>=0x800)){
        n++;
      }
      n++;
    }
  }
  j += n;
  if( j>=p->nAlloc ){
    blob_resize(p, j);
    z = (unsigned char *)p->aData;
  }
  p->nUsed = j;
  z[j] = 0;
  while( j>i ){
    if( z[--i]>=0x80 ){
      if( z[i]<0xa0 ){
        unsigned short sym = cp1252[z[i]&0x1f];
        if( sym>=0x800 ){
          z[--j] = 0x80 | (sym&0x3f);
          z[--j] = 0x80 | ((sym>>6)&0x3f);
          z[--j] = 0xe0 | (sym>>12);
        }else{
          z[--j] = 0x80 | (sym&0x3f);
          z[--j] = 0xc0 | (sym>>6);
        }
      }else{
        z[--j] = 0x80 | (z[i]&0x3f);
        z[--j] = 0xC0 | (z[i]>>6);
      }
    }else{
      z[--j] = z[i];
    }
  }
}

/*
** Shell-escape the given string.  Append the result to a blob.
*/
void shell_escape(Blob *pBlob, const char *zIn){
  int n = blob_size(pBlob);
  int k = strlen(zIn);

Changes to src/checkin.c.

1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
      }
      zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
    }else if( fHasInvalidUtf8 ){
      if( encodingOk ){
        return 0; /* We don't want encoding warnings for this file. */
      }
      zWarning = "invalid UTF-8";
      zConvert = ""; /* Possible conversion to UTF-8 not yet implemented. */
      zDisable = "\"encoding-glob\" setting";
    }else if( fHasAnyCr ){
      if( crnlOk ){
        return 0; /* We don't want CR/NL warnings for this file. */
      }
      if( fHasLoneCrOnly ){
        zWarning = "CR line endings";







<







1294
1295
1296
1297
1298
1299
1300

1301
1302
1303
1304
1305
1306
1307
      }
      zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
    }else if( fHasInvalidUtf8 ){
      if( encodingOk ){
        return 0; /* We don't want encoding warnings for this file. */
      }
      zWarning = "invalid UTF-8";

      zDisable = "\"encoding-glob\" setting";
    }else if( fHasAnyCr ){
      if( crnlOk ){
        return 0; /* We don't want CR/NL warnings for this file. */
      }
      if( fHasLoneCrOnly ){
        zWarning = "CR line endings";
1339
1340
1341
1342
1343
1344
1345


1346
1347
1348
1349
1350
1351
1352
        fossil_warning("cannot open %s for writing", zFilename);
      }else{
        if( fUnicode ) {
          int bomSize;
          const unsigned char *bom = get_utf8_bom(&bomSize);
          fwrite(bom, 1, bomSize, f);
          blob_to_utf8_no_bom(p, 0);


        }
        if( fHasAnyCr ){
          blob_to_lf_only(p);
        }
        fwrite(blob_buffer(p), 1, blob_size(p), f);
        fclose(f);
      }







>
>







1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
        fossil_warning("cannot open %s for writing", zFilename);
      }else{
        if( fUnicode ) {
          int bomSize;
          const unsigned char *bom = get_utf8_bom(&bomSize);
          fwrite(bom, 1, bomSize, f);
          blob_to_utf8_no_bom(p, 0);
        }else if( fHasInvalidUtf8 ){
          blob_cp1252_to_utf8(p);
        }
        if( fHasAnyCr ){
          blob_to_lf_only(p);
        }
        fwrite(blob_buffer(p), 1, blob_size(p), f);
        fclose(f);
      }