Fossil

Check-in [6182584217]
Login

Check-in [6182584217]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge commit warning and looks_like_text() enhancements to trunk. Further changes based on these will occur on a branch.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 618258421767778c41b643302f73e82954946b89
User & Date: mistachkin 2012-11-01 03:44:44.902
References
2012-11-01
10:20
Restore Style fix, which got lost by [618258421767778c] ... (check-in: ef6c243ed9 user: jan.nijtmans tags: trunk)
Context
2012-11-01
07:40
dont check for same BOM twice ... (check-in: 8c32e6f0dd user: jan.nijtmans tags: trunk)
03:44
Merge commit warning and looks_like_text() enhancements to trunk. Further changes based on these will occur on a branch. ... (check-in: 6182584217 user: mistachkin tags: trunk)
2012-10-31
19:48
Add the wiki-moderator and ticket-moderator permissions. Not yet used. ... (check-in: fc0bffd995 user: drh tags: trunk)
2012-10-30
02:17
Refactor commit warning functionality. Break out UTF-16 BOM detection into a new function. Style and comment fixes. ... (Closed-Leaf check-in: d57f0a9361 user: mistachkin tags: commitWarning)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/checkin.c.
885
886
887
888
889
890
891

892
893
894
895
896
897

898
899
900
901
902
903


904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
/*
** Issue a warning and give the user an opportunity to abandon out
** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
** is seen in a text file.
*/
static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
  int eType;              /* return value of looks_like_text() */

  char *zMsg;             /* Warning message */
  Blob fname;             /* Relative pathname of the file */
  static int allOk = 0;   /* Set to true to disable this routine */

  if( allOk ) return;
  eType = looks_like_text(p);

  if( eType<0 ){
    const char *zWarning ;
    Blob ans;
    char cReply;

    if( eType&1 ){


      if( crnlOk ){
        return; /* We don't want CR/NL warnings for this file. */
      }
      zWarning = "CR/NL line endings";
    }else{
      zWarning = "Unicode";
    }
    file_relative_name(zFilename, &fname, 0);
    blob_zero(&ans);
    zMsg = mprintf(
         "%s contains %s.  commit anyhow (a=all/y/N)? ",
         blob_str(&fname), zWarning );
    prompt_user(zMsg, &ans);
    fossil_free(zMsg);
    cReply = blob_str(&ans)[0];
    if( cReply=='a' || cReply=='A' ){
      allOk = 1;
    }else if( cReply!='y' && cReply!='Y' ){
      fossil_fatal("Abandoning commit due to %s in %s",
                   zWarning , blob_str(&fname));
    }
    blob_reset(&ans);
    blob_reset(&fname);
  }
}

/*







>






>
|
|



|
>
>










|
|







|







885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
/*
** Issue a warning and give the user an opportunity to abandon out
** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
** is seen in a text file.
*/
static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
  int eType;              /* return value of looks_like_text() */
  int fUnicode;           /* return value of starts_with_utf16_bom() */
  char *zMsg;             /* Warning message */
  Blob fname;             /* Relative pathname of the file */
  static int allOk = 0;   /* Set to true to disable this routine */

  if( allOk ) return;
  eType = looks_like_text(p);
  fUnicode = starts_with_utf16_bom(p);
  if( eType==-1 || fUnicode ){
    const char *zWarning;
    Blob ans;
    char cReply;

    if( eType==-1 && fUnicode ){
      zWarning = "Unicode and CR/NL line endings";
    }else if( eType==-1 ){
      if( crnlOk ){
        return; /* We don't want CR/NL warnings for this file. */
      }
      zWarning = "CR/NL line endings";
    }else{
      zWarning = "Unicode";
    }
    file_relative_name(zFilename, &fname, 0);
    blob_zero(&ans);
    zMsg = mprintf(
         "%s contains %s; commit anyhow (a=all/y/N)?",
         blob_str(&fname), zWarning);
    prompt_user(zMsg, &ans);
    fossil_free(zMsg);
    cReply = blob_str(&ans)[0];
    if( cReply=='a' || cReply=='A' ){
      allOk = 1;
    }else if( cReply!='y' && cReply!='Y' ){
      fossil_fatal("Abandoning commit due to %s in %s",
                   zWarning, blob_str(&fname));
    }
    blob_reset(&ans);
    blob_reset(&fname);
  }
}

/*
Changes to src/diff.c.
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
*/
#define DIFF_CANNOT_COMPUTE_BINARY \
    "cannot compute difference between binary files\n"

#define DIFF_CANNOT_COMPUTE_SYMLINK \
    "cannot compute difference between symlink and regular file\n"

#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
#endif /* INTERFACE */

/*
** Maximum length of a line in a text file.  (8192)
*/
#define LENGTH_MASK_SZ  13
#define LENGTH_MASK     ((1<<LENGTH_MASK_SZ)-1)







|







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
*/
#define DIFF_CANNOT_COMPUTE_BINARY \
    "cannot compute difference between binary files\n"

#define DIFF_CANNOT_COMPUTE_SYMLINK \
    "cannot compute difference between symlink and regular file\n"

#define looks_like_binary(blob) (looks_like_text((blob)) == 0)
#endif /* INTERFACE */

/*
** Maximum length of a line in a text file.  (8192)
*/
#define LENGTH_MASK_SZ  13
#define LENGTH_MASK     ((1<<LENGTH_MASK_SZ)-1)
177
178
179
180
181
182
183
184


185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229


















230
231
232
233
234
235
236
** values are:
**
**  (1) -- The content appears to consist entirely of text, with lines
**         delimited by line-feed characters; however, the encoding may
**         not be UTF-8.
**
**  (0) -- The content appears to be binary because it contains embedded
**         NUL (\000) characters or an extremely long line.


**
** (-1) -- The content appears to consist entirely of text, with lines
**         delimited by carriage-return, line-feed pairs; however, the
**         encoding may not be UTF-8.
**
** (-2) -- The content appears to consist entirely of text, in the
**         UTF-16 (BE or LE) encoding.
*/
int looks_like_text(const Blob *pContent){
  const char *z = blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c;
  int result = 1;  /* Assume text with no CR/NL */

  /* Check individual lines.
  */
  if( n==0 ) return result;  /* Empty file -> text */
  c = *z;
  if( c==0 ) return 0;  /* \000 byte in a file -> binary */
  if ( n > 1 ){
    if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
      return -2;
    } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
      return -2;
    }
  }
  j = (c!='\n');
  while( --n>0 ){
    c = *++z; ++j;
    if( c==0 ) return 0;  /* \000 byte in a file -> binary */
    if( c=='\n' ){
      if( z[-1]=='\r' ){
        result = -1;  /* Contains CR/NL, continue */
      }
      if( j>LENGTH_MASK ){
        return 0;  /* Very long line -> binary */
      }
      j = 0;
    }
  }
  if( j>LENGTH_MASK ){
    return 0;  /* Very long line -> binary */
  }
  return result;  /* No problems seen -> not binary */
}



















/*
** Return true if two DLine elements are identical.
*/
static int same_dline(DLine *pA, DLine *pB){
  return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
}







|
>
>





<
<












<
<
<
<
<
<
<



















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







177
178
179
180
181
182
183
184
185
186
187
188
189
190
191


192
193
194
195
196
197
198
199
200
201
202
203







204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
** values are:
**
**  (1) -- The content appears to consist entirely of text, with lines
**         delimited by line-feed characters; however, the encoding may
**         not be UTF-8.
**
**  (0) -- The content appears to be binary because it contains embedded
**         NUL (\000) characters or an extremely long line.  Since this
**         function does not understand UTF-16, it may falsely consider
**         UTF-16 text to be binary.
**
** (-1) -- The content appears to consist entirely of text, with lines
**         delimited by carriage-return, line-feed pairs; however, the
**         encoding may not be UTF-8.
**


*/
int looks_like_text(const Blob *pContent){
  const char *z = blob_buffer(pContent);
  unsigned int n = blob_size(pContent);
  int j, c;
  int result = 1;  /* Assume text with no CR/NL */

  /* Check individual lines.
  */
  if( n==0 ) return result;  /* Empty file -> text */
  c = *z;
  if( c==0 ) return 0;  /* \000 byte in a file -> binary */







  j = (c!='\n');
  while( --n>0 ){
    c = *++z; ++j;
    if( c==0 ) return 0;  /* \000 byte in a file -> binary */
    if( c=='\n' ){
      if( z[-1]=='\r' ){
        result = -1;  /* Contains CR/NL, continue */
      }
      if( j>LENGTH_MASK ){
        return 0;  /* Very long line -> binary */
      }
      j = 0;
    }
  }
  if( j>LENGTH_MASK ){
    return 0;  /* Very long line -> binary */
  }
  return result;  /* No problems seen -> not binary */
}

/*
** This function returns non-zero if the blob starts with a UTF-16le or
** UTF-16be byte-order-mark (BOM).
*/
int starts_with_utf16_bom(const Blob *pContent){
  const char *z = blob_buffer(pContent);
  int c1, c2;

  if( blob_size(pContent)<2 ) return 0;
  c1 = z[0]; c2 = z[1];
  if( (c1==(char)0xff) && (c2==(char)0xfe) ){
    return 1;
  }else if( (c1==(char)0xff) && (c2==(char)0xfe) ){
    return 1;
  }
  return 0;
}

/*
** Return true if two DLine elements are identical.
*/
static int same_dline(DLine *pA, DLine *pB){
  return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
}