Check-in [4d456c9fd1]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further fine-tuning of the check for valid UTF8 characters in filenames.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 4d456c9fd17bd289fd38b9b09d4e19a2c1af3183
User & Date: drh 2013-01-23 13:15:52.388
Context
2013-01-23
13:24
Add the max-download-time server option that limits the amount of real-time that the server will spend preparing an xfer protocol reply. check-in: 769c90a230 user: drh tags: trunk
13:15
Further fine-tuning of the check for valid UTF8 characters in filenames. check-in: 4d456c9fd1 user: drh tags: trunk
13:09
Only run ANALYZE if the --analyze flag is provided to "fossil rebuild" or "fossil all rebuild". check-in: 3104348ec5 user: drh tags: trunk
2013-01-21
13:12
Oops, make it work correct now. Closed-Leaf check-in: 7dabede3b3 user: jan.nijtmans tags: disallow-invalid-utf8-in-filenames
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/file.c.
488
489
490
491
492
493
494
495




496
497
498
499
500
501
502
503
504
505
506
507



508



509
510
511
512
513
514
515

516



517
518
519
520

521



522
523
524
525
526
527
528









529
530
531
532
533
534
535
**     *  Does not contain any of these characters in the path: "\"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character
**
** Invalid UTF8 characters result in a false return if bStrictUtf8 is
** true.  If bStrictUtf8 is false, invalid UTF8 characters are silently
** ignored.




*/
int file_is_simple_pathname(const char *z, int bStrictUtf8){
  int i;
  char c = z[0];
  char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=z[i])!=0; i++){
    if( c & maskNonAscii ){



      if( (c & 0xf0) == 0xf0 ) {



        /* Unicode characters > U+FFFF are not supported.
         * Windows XP and earlier cannot handle them.
         */
        return 0;
      }
      if( (c & 0xf0) == 0xe0 ) {
        /* This is a 3-byte UTF-8 character */

        if ( (c & 0xfe) == 0xee ){



          /* Range U+E000 - U+FFFF (Starting with 0xee or 0xef in UTF-8 ) */
          if ( !(c & 1) || ((z[i+1] & 0xff) < 0xa4) ){
            /* Unicode character in the range U+E000 - U+F8FF are for
             * private use, they shouldn't occur in filenames.  */

            return 0;



          }
        }else if( ((c & 0xff) == 0xed) && ((z[i+1] & 0xe0) == 0xa0) ){
          /* Unicode character in the range U+D800 - U+DFFF are for
           * surrogate pairs, they shouldn't occur in filenames. */
          return 0;
        }
      }









    }else if( c=='\\' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;
      if( z[i+1]=='.' ){
        if( z[i+2]=='/' || z[i+2]==0 ) return 0;







|
>
>
>
>



|






|

>
>
>
|
>
>
>
|
|
|
|
|
<

>
|
>
>
>
|
|
|
<
>

>
>
>

|
<
|



>
>
>
>
>
>
>
>
>







488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523

524
525
526
527
528
529
530
531
532

533
534
535
536
537
538
539

540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
**     *  Does not contain any of these characters in the path: "\"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character
**
** Invalid UTF8 characters result in a false return if bStrictUtf8 is
** true.  If bStrictUtf8 is false, invalid UTF8 characters are silently
** ignored. See http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences
** and http://en.wikipedia.org/wiki/Unicode (for the noncharacters)
**
** The bStrictUtf8 flag is true for new inputs, but is false when parsing
** legacy manifests, for backwards compatibility.
*/
int file_is_simple_pathname(const char *z, int bStrictUtf8){
  int i;
  unsigned char c = (unsigned char) z[0];
  char maskNonAscii = bStrictUtf8 ? 0x80 : 0x00;
  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=(unsigned char)z[i])!=0; i++){
    if( c & maskNonAscii ){
      if( c<0xc2 ){
        /* Invalid 1-byte UTF-8 sequence, or 2-byte overlong form. */
        return 0;
      }else if( (c&0xe0)==0xe0 ){
        /* 3-byte or more */
        int unicode;
        if( c&0x10 ){
          /* Unicode characters > U+FFFF are not supported.
           * Windows XP and earlier cannot handle them.
           */
          return 0;
        }

        /* This is a 3-byte UTF-8 character */
        unicode = ((c&0x0f)<<12) + ((z[i+1]&0x3f)<<6) + (z[i+2]&0x3f);
        if( unicode <= 0x07ff ){
          /* overlong form */
          return 0;
        }else if( unicode>=0xe000 ){
          /* U+E000..U+FFFF */
          if( (unicode<=0xf8ff) || (unicode>=0xfffe) ){
            /* U+E000..U+F8FF are for private use.

             * U+FFFE..U+FFFF are noncharacters. */
            return 0;
          } else if( (unicode>=0xfdd0) && (unicode<=0xfdef) ){
            /* U+FDD0..U+FDEF are noncharacters. */
            return 0;
          }
        }else if( (unicode>=0xD800) && (unicode<=0xDFFF) ){

          /* U+D800..U+DFFF are for surrogate pairs. */
          return 0;
        }
      }
      do{
        if( (z[i+1]&0xc0)!=0x80 ){
          /* Invalid continuation byte (multi-byte UTF-8) */
          return 0;
        }
        /* The hi-bits of c are used to keep track of the number of expected
         * continuation-bytes, so we don't need a separate counter. */
        c<<=1; ++i;
      }while( c>=0xc0 );
    }else if( c=='\\' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;
      if( z[i+1]=='.' ){
        if( z[i+2]=='/' || z[i+2]==0 ) return 0;
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
#if defined(_WIN32)
  for(i=0; i<n; i++){
    if( z[i]=='\\' ) z[i] = '/';
  }
#endif

  /* Removing trailing "/" characters */
  if ( !slash ){
    while( n>1 && z[n-1]=='/' ){ n--; }
  }

  /* Remove duplicate '/' characters.  Except, two // at the beginning
  ** of a pathname is allowed since this is important on windows. */
  for(i=j=1; i<n; i++){
    z[j++] = z[i];







|







600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
#if defined(_WIN32)
  for(i=0; i<n; i++){
    if( z[i]=='\\' ) z[i] = '/';
  }
#endif

  /* Removing trailing "/" characters */
  if( !slash ){
    while( n>1 && z[n-1]=='/' ){ n--; }
  }

  /* Remove duplicate '/' characters.  Except, two // at the beginning
  ** of a pathname is allowed since this is important on windows. */
  for(i=j=1; i<n; i++){
    z[j++] = z[i];
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
    if( zPath[i]==0 ){
      blob_reset(pOut);
      if( zPwd[i]==0 ){
        blob_append(pOut, ".", 1);
      }else{
        blob_append(pOut, "..", 2);
        for(j=i+1; zPwd[j]; j++){
          if( zPwd[j]=='/' ) {
            blob_append(pOut, "/..", 3);
          }
        }
      }
      return;
    }
    if( zPwd[i]==0 && zPath[i]=='/' ){
      memcpy(&tmp, pOut, sizeof(tmp));
      blob_set(pOut, "./");
      blob_append(pOut, &zPath[i+1], -1);
      blob_reset(&tmp);
      return;
    }
    while( zPath[i-1]!='/' ){ i--; }
    blob_set(&tmp, "../");
    for(j=i; zPwd[j]; j++){
      if( zPwd[j]=='/' ) {
        blob_append(&tmp, "../", 3);
      }
    }
    blob_append(&tmp, &zPath[i], -1);
    blob_reset(pOut);
    memcpy(pOut, &tmp, sizeof(tmp));
  }







|
















|







857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
    if( zPath[i]==0 ){
      blob_reset(pOut);
      if( zPwd[i]==0 ){
        blob_append(pOut, ".", 1);
      }else{
        blob_append(pOut, "..", 2);
        for(j=i+1; zPwd[j]; j++){
          if( zPwd[j]=='/' ){
            blob_append(pOut, "/..", 3);
          }
        }
      }
      return;
    }
    if( zPwd[i]==0 && zPath[i]=='/' ){
      memcpy(&tmp, pOut, sizeof(tmp));
      blob_set(pOut, "./");
      blob_append(pOut, &zPath[i+1], -1);
      blob_reset(&tmp);
      return;
    }
    while( zPath[i-1]!='/' ){ i--; }
    blob_set(&tmp, "../");
    for(j=i; zPwd[j]; j++){
      if( zPwd[j]=='/' ){
        blob_append(&tmp, "../", 3);
      }
    }
    blob_append(&tmp, &zPath[i], -1);
    blob_reset(pOut);
    memcpy(pOut, &tmp, sizeof(tmp));
  }