Fossil

Check-in [79f7eb2fc5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow looks_like_utf8/16 to specify when the loop should stop, in stead of leaving it unspecified. Minor comment/doc fixes.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 79f7eb2fc5c906cbc3fb3df8bc25fb49e7242ed2
User & Date: jan.nijtmans 2013-03-27 09:45:47.911
Context
2013-03-27
20:53
Update the built-in SQLite sources to the latest 3.7.16.1 beta for the purpose of testing SQLite. check-in: 2e9be37f50 user: drh tags: trunk
09:45
Allow looks_like_utf8/16 to specify when the loop should stop, in stead of leaving it unspecified. Minor comment/doc fixes. check-in: 79f7eb2fc5 user: jan.nijtmans tags: trunk
2013-03-26
11:12
Enable fossil to distinguish correctly CR/CRNL/mixed line endings. check-in: f89e2eccbb user: jan.nijtmans tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/checkin.c.
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
    for(ii=2; ii<g.argc; ii++){
      int iId;
      file_tree_name(g.argv[ii], &b, 1);
      iId = db_int(-1, "SELECT id FROM vfile WHERE pathname=%Q", blob_str(&b));
      if( iId<0 ){
        fossil_warning("fossil knows nothing about: %s", g.argv[ii]);
        result = 1;
      } else {
        g.aCommitFile[jj++] = iId;
      }
      blob_reset(&b);
    }
    g.aCommitFile[jj] = 0;
  }
  return result;







|







633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
    for(ii=2; ii<g.argc; ii++){
      int iId;
      file_tree_name(g.argv[ii], &b, 1);
      iId = db_int(-1, "SELECT id FROM vfile WHERE pathname=%Q", blob_str(&b));
      if( iId<0 ){
        fossil_warning("fossil knows nothing about: %s", g.argv[ii]);
        result = 1;
      }else{
        g.aCommitFile[jj++] = iId;
      }
      blob_reset(&b);
    }
    g.aCommitFile[jj] = 0;
  }
  return result;
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
  char *zMsg;             /* Warning message */
  Blob fname;             /* Relative pathname of the file */
  static int allOk = 0;   /* Set to true to disable this routine */

  if( allOk ) return 0;
  fUnicode = could_be_utf16(p, &bReverse);
  if( fUnicode ){
    lookFlags = looks_like_utf16(p, bReverse);
  }else{
    lookFlags = looks_like_utf8(p);
  }
  if( lookFlags&(LOOK_BINARY|LOOK_LONG|LOOK_CR) || fUnicode ){
    const char *zWarning;
    const char *zDisable;
    const char *zConvert = "c=convert/";
    Blob ans;
    char cReply;







|

|







913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
  char *zMsg;             /* Warning message */
  Blob fname;             /* Relative pathname of the file */
  static int allOk = 0;   /* Set to true to disable this routine */

  if( allOk ) return 0;
  fUnicode = could_be_utf16(p, &bReverse);
  if( fUnicode ){
    lookFlags = looks_like_utf16(p, bReverse, LOOK_NUL);
  }else{
    lookFlags = looks_like_utf8(p, LOOK_NUL);
  }
  if( lookFlags&(LOOK_BINARY|LOOK_LONG|LOOK_CR) || fUnicode ){
    const char *zWarning;
    const char *zDisable;
    const char *zConvert = "c=convert/";
    Blob ans;
    char cReply;
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
        zWarning = "long lines";
        zConvert = ""; /* We cannot convert binary files. */
      }else{
        zWarning = "binary data";
        zConvert = ""; /* We cannot convert binary files. */
      }
      zDisable = "\"binary-glob\" setting";
    }else if( lookFlags&(LOOK_CR) && fUnicode ){
      if( crnlOk && encodingOk ){
        return 0; /* We don't want CR/NL and Unicode warnings for this file. */
      }
      if( (lookFlags&LOOK_EOL) == LOOK_LONE_CR ){
        zWarning = "CR line endings and Unicode";
      }else if( (lookFlags&LOOK_EOL) == LOOK_CRLF ){
        zWarning = "CR/NL line endings and Unicode";







|







938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
        zWarning = "long lines";
        zConvert = ""; /* We cannot convert binary files. */
      }else{
        zWarning = "binary data";
        zConvert = ""; /* We cannot convert binary files. */
      }
      zDisable = "\"binary-glob\" setting";
    }else if( (lookFlags&LOOK_CR) && fUnicode ){
      if( crnlOk && encodingOk ){
        return 0; /* We don't want CR/NL and Unicode warnings for this file. */
      }
      if( (lookFlags&LOOK_EOL) == LOOK_LONE_CR ){
        zWarning = "CR line endings and Unicode";
      }else if( (lookFlags&LOOK_EOL) == LOOK_CRLF ){
        zWarning = "CR/NL line endings and Unicode";
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
    db_multi_exec("UPDATE vfile SET mrid=%d, rid=%d WHERE id=%d", nrid,nrid,id);
    db_multi_exec("INSERT OR IGNORE INTO unsent VALUES(%d)", nrid);
  }
  db_finalize(&q);
  if( nConflict && !allowConflict ){
    fossil_fatal("abort due to unresolved merge conflicts; "
                 "use --allow-conflict to override");
  } else if( abortCommit ){
    fossil_fatal("one or more files were converted on your request; "
                 "please re-test before committing");
  }

  /* Create the new manifest */
  if( blob_size(&comment)==0 ){
    blob_append(&comment, "(no comment)", -1);







|







1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
    db_multi_exec("UPDATE vfile SET mrid=%d, rid=%d WHERE id=%d", nrid,nrid,id);
    db_multi_exec("INSERT OR IGNORE INTO unsent VALUES(%d)", nrid);
  }
  db_finalize(&q);
  if( nConflict && !allowConflict ){
    fossil_fatal("abort due to unresolved merge conflicts; "
                 "use --allow-conflict to override");
  }else if( abortCommit ){
    fossil_fatal("one or more files were converted on your request; "
                 "please re-test before committing");
  }

  /* Create the new manifest */
  if( blob_size(&comment)==0 ){
    blob_append(&comment, "(no comment)", -1);
Changes to src/db.c.
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
**                     TRUE for unix and FALSE for windows and mac.
**
**    clearsign        When enabled, fossil will attempt to sign all commits
**                     with gpg.  When disabled (the default), commits will
**                     be unsigned.  Default: off
**
**    crnl-glob        A comma or newline-separated list of GLOB patterns for
**     (versionable)   text files in which it is ok to have CR+NL line endings.
**                     Set to "*" to disable CR+NL checking.
**
**    default-perms    Permissions given automatically to new users.  For more
**                     information on permissions see Users page in Server
**                     Administration of the HTTP UI. Default: u.
**
**    diff-binary      If TRUE (the default), permit files that may be binary
**                     or that match the "binary-glob" setting to be used with







|
|







2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
**                     TRUE for unix and FALSE for windows and mac.
**
**    clearsign        When enabled, fossil will attempt to sign all commits
**                     with gpg.  When disabled (the default), commits will
**                     be unsigned.  Default: off
**
**    crnl-glob        A comma or newline-separated list of GLOB patterns for
**     (versionable)   text files in which it is ok to have CR, CR+NL or mixed
**                     line endings. Set to "*" to disable CR+NL checking.
**
**    default-perms    Permissions given automatically to new users.  For more
**                     information on permissions see Users page in Server
**                     Administration of the HTTP UI. Default: u.
**
**    diff-binary      If TRUE (the default), permit files that may be binary
**                     or that match the "binary-glob" setting to be used with
Changes to src/diff.c.
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#define DIFF_TOO_MANY_CHANGES_HTML \
    "<p class='generalError'>More than 10,000 changes</p>\n"

/*
** This macro is designed to return non-zero if the specified blob contains
** data that MAY be binary in nature; otherwise, zero will be returned.
*/
#define looks_like_binary(blob) ((looks_like_utf8(blob)&LOOK_BINARY)!=LOOK_NONE)

/*
** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
** to convey status information about the blob content.
*/
#define LOOK_NONE    ((int)0x00000000) /* Nothing special was found. */
#define LOOK_NUL     ((int)0x00000001) /* One or more NUL chars were found. */







|







59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#define DIFF_TOO_MANY_CHANGES_HTML \
    "<p class='generalError'>More than 10,000 changes</p>\n"

/*
** This macro is designed to return non-zero if the specified blob contains
** data that MAY be binary in nature; otherwise, zero will be returned.
*/
#define looks_like_binary(blob) ((looks_like_utf8(blob, LOOK_BINARY)&LOOK_BINARY)!=LOOK_NONE)

/*
** Output flags for the looks_like_utf8() and looks_like_utf16() routines used
** to convey status information about the blob content.
*/
#define LOOK_NONE    ((int)0x00000000) /* Nothing special was found. */
#define LOOK_NUL     ((int)0x00000001) /* One or more NUL chars were found. */
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
** UTF-8.  It assumes that all code points are the same size.  It does not
** validate any code points.  It makes no attempt to detect if any [invalid]
** switches between UTF-8 and other encodings occur.
**
** The only code points that this function cares about are the NUL character,
** carriage-return, and line-feed.
**
** Whether or not this function examines the entire contents of the blob is
** officially unspecified.
**
************************************ WARNING **********************************
*/
int looks_like_utf8(const Blob *pContent){
  const char *z = blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c, flags = LOOK_NONE;  /* Assume UTF-8 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  c = *z;
  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<=1 || z[1]!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( --n>0 ){
    int c2 = c;
    c = *++z; ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){
      flags |= LOOK_LF;
      if( c2=='\r' ){







|
|



|
















|







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
** UTF-8.  It assumes that all code points are the same size.  It does not
** validate any code points.  It makes no attempt to detect if any [invalid]
** switches between UTF-8 and other encodings occur.
**
** The only code points that this function cares about are the NUL character,
** carriage-return, and line-feed.
**
** This function examines the contents of the blob until one of the flags
** specified in "stopFlags" is set.
**
************************************ WARNING **********************************
*/
int looks_like_utf8(const Blob *pContent, int stopFlags){
  const char *z = blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c, flags = LOOK_NONE;  /* Assume UTF-8 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  c = *z;
  if( c==0 ){
    flags |= LOOK_NUL;  /* NUL character in a file -> binary */
  }else if( c=='\r' ){
    flags |= LOOK_CR;
    if( n<=1 || z[1]!='\n' ){
      flags |= LOOK_LONE_CR;  /* More chars, next char is not LF */
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( !(flags&stopFlags) && --n>0 ){
    int c2 = c;
    c = *++z; ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
    }else if( c=='\n' ){
      flags |= LOOK_LF;
      if( c2=='\r' ){
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
** UTF-16.  It assumes that all code points are the same size.  It does not
** validate any code points.  It makes no attempt to detect if any [invalid]
** switches between the UTF-16be and UTF-16le encodings occur.
**
** The only code points that this function cares about are the NUL character,
** carriage-return, and line-feed.
**
** Whether or not this function examines the entire contents of the blob is
** officially unspecified.
**
************************************ WARNING **********************************
*/
int looks_like_utf16(const Blob *pContent, int bReverse){
  const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c, flags = LOOK_NONE;  /* Assume UTF-16 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  if( n%sizeof(WCHAR_T) ){
    flags |= LOOK_ODD;  /* Odd number of bytes -> binary (UTF-8?) */







|
|



|







332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
** UTF-16.  It assumes that all code points are the same size.  It does not
** validate any code points.  It makes no attempt to detect if any [invalid]
** switches between the UTF-16be and UTF-16le encodings occur.
**
** The only code points that this function cares about are the NUL character,
** carriage-return, and line-feed.
**
** This function examines the contents of the blob until one of the flags
** specified in "stopFlags" is set.
**
************************************ WARNING **********************************
*/
int looks_like_utf16(const Blob *pContent, int bReverse, int stopFlags){
  const WCHAR_T *z = (WCHAR_T *)blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c, flags = LOOK_NONE;  /* Assume UTF-16 text, prove otherwise */

  if( n==0 ) return flags;  /* Empty file -> text */
  if( n%sizeof(WCHAR_T) ){
    flags |= LOOK_ODD;  /* Odd number of bytes -> binary (UTF-8?) */
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( 1 ){
    int c2 = c;
    n -= sizeof(WCHAR_T);
    if( n<sizeof(WCHAR_T) ) break;
    c = *++z;
    if( bReverse ){
      c = UTF16_SWAP(c);
    }
    ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */







|







364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
    }
  }
  j = (c!='\n');
  if( !j ) flags |= (LOOK_LF | LOOK_LONE_LF);  /* Found LF as first char */
  while( 1 ){
    int c2 = c;
    n -= sizeof(WCHAR_T);
    if( (flags&stopFlags) || n<sizeof(WCHAR_T) ) break;
    c = *++z;
    if( bReverse ){
      c = UTF16_SWAP(c);
    }
    ++j;
    if( c==0 ){
      flags |= LOOK_NUL;  /* NUL character in a file -> binary */
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
  int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
  int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */
  if( g.argc!=3 ) usage("FILENAME");
  blob_read_from_file(&blob, g.argv[2]);
  fUtf8 = starts_with_utf8_bom(&blob, 0);
  fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
  fUnicode = could_be_utf16(&blob, &bRevUnicode);
  lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode) :
                         looks_like_utf8(&blob);
  fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
  fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
  fossil_print("Starts with UTF-16 BOM: %s\n",
               fUtf16?(bRevUtf16?"reversed":"yes"):"no");
  fossil_print("Looks like UTF-%s: %s\n",fUnicode?"16":"8",
               (lookFlags&LOOK_BINARY)?"no":"yes");
  fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no");







|
|







2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
  int bRevUtf16 = 0; /* non-zero -> UTF-16 byte order reversed */
  int bRevUnicode = 0; /* non-zero -> UTF-16 byte order reversed */
  if( g.argc!=3 ) usage("FILENAME");
  blob_read_from_file(&blob, g.argv[2]);
  fUtf8 = starts_with_utf8_bom(&blob, 0);
  fUtf16 = starts_with_utf16_bom(&blob, 0, &bRevUtf16);
  fUnicode = could_be_utf16(&blob, &bRevUnicode);
  lookFlags = fUnicode ? looks_like_utf16(&blob, bRevUnicode, 0) :
                         looks_like_utf8(&blob, 0);
  fossil_print("File \"%s\" has %d bytes.\n",g.argv[2],blob_size(&blob));
  fossil_print("Starts with UTF-8 BOM: %s\n",fUtf8?"yes":"no");
  fossil_print("Starts with UTF-16 BOM: %s\n",
               fUtf16?(bRevUtf16?"reversed":"yes"):"no");
  fossil_print("Looks like UTF-%s: %s\n",fUnicode?"16":"8",
               (lookFlags&LOOK_BINARY)?"no":"yes");
  fossil_print("Has flag LOOK_NUL: %s\n",(lookFlags&LOOK_NUL)?"yes":"no");
Changes to src/utf8.c.
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
        *wUnicode = '\\';
      }
      ++wUnicode;
    }
    nByte = cygwin_conv_path(CCP_WIN_W_TO_POSIX, zUnicode, NULL, 0);
    zPath = fossil_malloc(nByte);
    cygwin_conv_path(CCP_WIN_W_TO_POSIX, zUnicode, zPath, nByte);
  } else {
    zPath = fossil_strdup(zUtf8);
    zUtf8 = p = zPath;
    while( (*p = *zUtf8++) != 0){
      if (*p++ == '\\' ) {
        p[-1] = '/';
      }
    }
  }
  return zPath;
#elif defined(__APPLE__) && !defined(WITHOUT_ICONV)
  return fossil_strdup(zUtf8);







|



|







198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
        *wUnicode = '\\';
      }
      ++wUnicode;
    }
    nByte = cygwin_conv_path(CCP_WIN_W_TO_POSIX, zUnicode, NULL, 0);
    zPath = fossil_malloc(nByte);
    cygwin_conv_path(CCP_WIN_W_TO_POSIX, zUnicode, zPath, nByte);
  }else{
    zPath = fossil_strdup(zUtf8);
    zUtf8 = p = zPath;
    while( (*p = *zUtf8++) != 0){
      if( *p++ == '\\' ) {
        p[-1] = '/';
      }
    }
  }
  return zPath;
#elif defined(__APPLE__) && !defined(WITHOUT_ICONV)
  return fossil_strdup(zUtf8);
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    return 0;
  }
  nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar);
  /* Split WriteConsoleW call into multiple chunks, if necessary. See:
   * <https://connect.microsoft.com/VisualStudio/feedback/details/635230> */
  while( written < nChar ){
    int size = nChar-written;
    if (size > 26000) size = 26000;
    WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode+written,
        size, &dummy, 0);
    written += size;
  }
  free(zUnicode);
  return nChar;
#else
  return -1;  /* No-op on unix */
#endif
}







|










260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    return 0;
  }
  nChar = MultiByteToWideChar(CP_UTF8, 0, zUtf8, nByte, zUnicode, nChar);
  /* Split WriteConsoleW call into multiple chunks, if necessary. See:
   * <https://connect.microsoft.com/VisualStudio/feedback/details/635230> */
  while( written < nChar ){
    int size = nChar-written;
    if( size > 26000 ) size = 26000;
    WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE - toStdErr), zUnicode+written,
        size, &dummy, 0);
    written += size;
  }
  free(zUnicode);
  return nChar;
#else
  return -1;  /* No-op on unix */
#endif
}
Changes to www/changes.wiki.
1
2
3



4
5
6
7
8
9
10
11
12
13
14
15
<title>Change Log</title>

<h2>Changes For Version 1.26 (as yet unreleased)</h2>



  *  Enhancements to /timeline.rss, adding more flags for filtering
     results, including the ability to subscribe to changes made
     to individual tickets. For example: [/timeline.rss?y=t&tkt=12fceeec82].
  *  JSON API: added the 'status' command to report local checkout status.


<h2>Changes For Version 1.25 (2013-02-16)</h2>
  *  Enhancements to ticket processing. There are now two tables: TICKET and
     TICKETCHNG. There is one row in TICKETCHNG for each ticket artifact.
     Fields from ticket artifacts go into either or both of TICKET and
     TICKETCHNG, whichever contain matching column names. Default ticket 
     edit and viewing scripts are updated to use TICKETCHNG. The TH1



>
>
>




<







1
2
3
4
5
6
7
8
9
10

11
12
13
14
15
16
17
<title>Change Log</title>

<h2>Changes For Version 1.26 (as yet unreleased)</h2>
  *  Cygwin: Fossil now understands win32 absolute paths starting with a drive
     letter everywhere. The default value of the "case-sensitive" setting is
     now FALSE.
  *  Enhancements to /timeline.rss, adding more flags for filtering
     results, including the ability to subscribe to changes made
     to individual tickets. For example: [/timeline.rss?y=t&tkt=12fceeec82].
  *  JSON API: added the 'status' command to report local checkout status.


<h2>Changes For Version 1.25 (2013-02-16)</h2>
  *  Enhancements to ticket processing. There are now two tables: TICKET and
     TICKETCHNG. There is one row in TICKETCHNG for each ticket artifact.
     Fields from ticket artifacts go into either or both of TICKET and
     TICKETCHNG, whichever contain matching column names. Default ticket 
     edit and viewing scripts are updated to use TICKETCHNG. The TH1