Fossil

Changes On Branch auto-toc
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch auto-toc Excluding Merge-Ins

This is equivalent to a diff from 83f03e91c4 to 77cbe291af

2020-09-15
20:10
Bug fixes in Pandoc identifier generation. Now closed. See [branch/auto-toc|branch wiki] for discussion. ... (Closed-Leaf check-in: 77cbe291af user: drh tags: auto-toc)
19:50
Automatic table-of-contents generated for Markdown if there is a tag of the form: <!--markdown: toc=N --> where N is an integer that is the deepest level of content that will be added to the index. The TOC is inserted in place of the magic HTML comment. ... (check-in: 6142e11d20 user: drh tags: auto-toc)
19:23
Remove some end-of-line whitespace and fix some very minor comment typos and capitalization errors ... (check-in: 0537925523 user: andygoth tags: trunk)
18:20
Enable automatic paragraph numbering in Markdown using a special HTML comment: "<--markdown paragraph-numbers=on -->" ... (check-in: d9a70a1df9 user: drh tags: auto-toc)
16:40
fossil.pikchr.addSrcView() now tags each processed SVG element to avoid potentially processing the same one multiple times. Added fossil.pikchr support to /doc, /wiki, and /wikiedit/fileedit previews. This is harmless if there are no pikchrs or JS is disabled. ... (check-in: 83f03e91c4 user: stephan tags: trunk)
15:43
Fix the markdown converter so that it recognizes HTML comments and passes them through into the raw_html_tag callback. ... (check-in: 881f86645a user: drh tags: trunk)

Changes to src/blob.c.

650
651
652
653
654
655
656






















































657
658
659
660
661
662
663
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  pFrom->iCursor = i;
  while( i<n && !fossil_isspace(aData[i]) ){ i++; }
  blob_extract(pFrom, i-pFrom->iCursor, pTo);
  while( i<n && fossil_isspace(aData[i]) ){ i++; }
  pFrom->iCursor = i;
  return pTo->nUsed;
}

/*
** Extract a single token of one of the forms:
**
**       ="TEXT"
**       ='TEXT'
**       =TEXT
**
** The leading = is present if and only if skipEq is true.
**
** TEXT is the part that is extracted. There can be whitespace on
** either side of the text.  The TEXT ends at the matching delimiter,
** or at whitespace if there is no delimiter.
**
** Return true if an argument is found.   Return zero and leave
** the cursor unchanged if there is no argument.
**
** The cursor of pFrom is left pointing at the first character past
** the end of the argument.
**
** pTo will be an ephermeral blob.  If pFrom changes, it might alter
** pTo as well.
*/
int blob_argument_token(Blob *pFrom, Blob *pTo, int skipEq){
  char *aData = pFrom->aData;
  int n = pFrom->nUsed;
  int i = pFrom->iCursor;
  int iStart;
  char cDelim;
  while( i<n && fossil_isspace(aData[i]) ){ i++; }
  if( skipEq ){
    if( i>=n || aData[i]!='=' ) return 0;
    i++;
    while( i<n && fossil_isspace(aData[i]) ){ i++; }
  }
  if( i>=n ) return 0;
  cDelim = aData[i];
  if( cDelim=='\'' || cDelim=='"' ){
    if( i>=n-2 ) return 0;
    i++;
    iStart = pFrom->iCursor = i;
    while( i<n && aData[i]!=cDelim ){ i++; }
    if( i>=n ) return 0;
    blob_extract(pFrom, i-iStart, pTo);
    i++;
  }else{
    iStart = pFrom->iCursor = i;
    while( i<n && !fossil_isspace(aData[i]) && aData[i]!='=' ){ i++; }
    blob_extract(pFrom, i-iStart, pTo);
  }
  while( i<n && fossil_isspace(aData[i]) ){ i++; }
  pFrom->iCursor = i;
  return 1;
}

/*
** Extract a single SQL token from pFrom and use it to initialize pTo.
** Return the number of bytes in the token.  If no token is found,
** return 0.
**
** An SQL token consists of one or more non-space characters.  If the

Changes to src/markdown.c.

370
371
372
373
374
375
376
377

378
379
380
381
382
383
384
370
371
372
373
374
375
376

377
378
379
380
381
382
383
384







-
+








  /* begins with a '<' optionally followed by '/', followed by letter */
  if( data[0]!='<' ) return 0;
  i = (data[1]=='/') ? 2 : 1;
  if( (data[i]<'a' || data[i]>'z') &&  (data[i]<'A' || data[i]>'Z') ){
    if( data[1]=='!' && size>=7 && data[2]=='-' && data[3]=='-' ){
      for(i=6; i<size && (data[i]!='>'||data[i-1]!='-'|| data[i-2]!='-');i++){}
      if( i<size ) return i;
      if( i<size ) return i+1;
    }
    return 0;
  }

  /* scheme test */
  *autolink = MKDA_NOT_AUTOLINK;
  if( size>6

Changes to src/markdown_html.c.

27
28
29
30
31
32
33













34
35
36
37
38
39
40







41
42






























































































43
44
45
46
47
48
49
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162







+
+
+
+
+
+
+
+
+
+
+
+
+






-
+
+
+
+
+
+
+


+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







void markdown_to_html(
  struct Blob *input_markdown,
  struct Blob *output_title,
  struct Blob *output_body);

#endif /* INTERFACE */

/*
** Each heading is recorded as an instance of the following
** structure, in its own separate memory allocation.
*/
typedef struct MarkdownHeading MarkdownHeading;
struct MarkdownHeading {
  MarkdownHeading *pPrev, *pNext;  /* List of them all */
  char *zTitle;                    /* Text as displayed */
  char *zTag;                      /* Pandoc-style tag */
  int iLevel;                      /* Level number for this entry */
  int nth;                         /* This is the nth with the same tag */
};

/*
** An instance of the following structure is passed through the
** "opaque" pointer.
*/
typedef struct MarkdownToHtml MarkdownToHtml;
struct MarkdownToHtml {
  Blob *output_title;     /* Store the title here */
  Blob *output_title;                /* Store the title here */
  MarkdownHeading *pFirst, *pLast;   /* List of all headings */
  int iToc;         /* Where to insert table-of-contents */
  int mxToc;        /* Maximum table-of-content level */
  int mnLevel;      /* Minimum level seen over all headings */
  int iHdngNums;    /* True to automatically number headings */
  int aNum[6];      /* Most recent number at each level */
};

/*
** Add a new heading to the heading list.  This involves generating
** a Pandoc-compatible identifier based on the heading text.
*/
static void html_new_heading(MarkdownToHtml *pCtx, Blob *text, int iLevel){
  MarkdownHeading *pNew, *pSearch;
  int nText = blob_size(text);
  size_t n = sizeof(*pNew) + nText*2 + 10;
  const char *zText = blob_buffer(text);
  char *zTag;
  int i, j;
  int seenChar = 0;

  pNew = fossil_malloc( n );
  memset(pNew, 0, n);
  if( pCtx->pLast ){
    pCtx->pLast->pNext = pNew;
    if( pCtx->mnLevel>iLevel ) pCtx->mnLevel = iLevel;
  }else{
    pCtx->mnLevel = iLevel;
  }
  pNew->pPrev = pCtx->pLast;
  pCtx->pLast = pNew;
  if( pCtx->pFirst==0 ) pCtx->pFirst = pNew;
  pNew->zTitle = (char*)&pNew[1];
  memcpy(pNew->zTitle, zText, nText);
  pNew->zTitle[nText] = 0;
  pNew->zTag = pNew->zTitle + nText + 1;
  pNew->iLevel = iLevel;
  pNew->nth = 0;

  /* Generate an identifier.  The identifer name is approximately the
  ** same as a Pandoc identifier.
  **
  **  *  Skip all text up to the first letter.
  **  *  Remove all text past the last letter.
  **  *  Remove HTML markup and entities.
  **  *  Replace all whitespace sequences with a single "-"
  **  *  Remove all characters other than alphanumeric, "_", "-", and ".".
  **  *  Convert all alphabetics to lower case.
  **  *  If nothing remains, use "section" as the identifier.
  */
  memcpy(pNew->zTag, zText, nText);
  pNew->zTag[nText] = 0;
  zTag = pNew->zTag;
  for(i=j=0; zTag[i]; i++){
    char c = zTag[i];
    if( fossil_isupper(c) ){
      if( !seenChar ){ j = 0; seenChar = 1; }
      zTag[j++] = fossil_tolower(c);
      continue;
    }
    if( fossil_islower(c) ){
      if( !seenChar ){ j = 0; seenChar = 1; }
      zTag[j++] = c;
      continue;
    }
    if( c=='<' ){
      i += html_tag_length(zTag+i) - 1;
      continue;
    }
    if( c=='&' ){
      while( zTag[i] && zTag[i]!=';' ){ i++; }
      if( zTag[i]==0 ) break;
      continue;
    }
    if( fossil_isspace(c) ){
      if( j && zTag[j-1]!='-' ) zTag[j++] = '-';
      while( fossil_isspace(zTag[i+1]) ){ i++; }
      continue;
    }
    if( !fossil_isalnum(c) && c!='.' && c!='_' && c!='-' ){
      if( j && zTag[j-1]!='-' ) zTag[j++] = '-';
    }else{
      zTag[j++] = c;
    }
  }
  if( j==0 || !seenChar ){
    memcpy(zTag, "section", 7);
    j = 7;
  }
  while( j>0 && !fossil_isalpha(zTag[j-1]) ){ j--; }
  zTag[j] = 0;

  /* Search for duplicate identifiers and disambiguate */
  pNew->nth = 0;
  for(pSearch=pNew->pPrev; pSearch; pSearch=pSearch->pPrev){
    if( strcmp(pSearch->zTag,zTag)==0 ){
      pNew->nth = pSearch->nth+1;
      break;
    }
  }
}   


/* INTER_BLOCK -- skip a line between block level elements */
#define INTER_BLOCK(ob) \
  do { if( blob_size(ob)>0 ) blob_append_char(ob, '\n'); } while (0)

/* BLOB_APPEND_LITERAL -- append a string literal to a blob */
#define BLOB_APPEND_LITERAL(blob, literal) \
135
136
137
138
139
140
141
142
















































143
144
145
146

147
148
149
150
151
152
153
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315








+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




+







  assert( blob_size(ob)==PROLOG_SIZE );
}

static void html_epilog(struct Blob *ob, void *opaque){
  INTER_BLOCK(ob);
  BLOB_APPEND_LITERAL(ob, "</div>\n");
}

/*
** If text is an HTML control comment, then deal with it and return true.
** Otherwise just return false without making any changes.
**
** We are looking for comments of the following form:
**
**     <!--markdown: toc=N -->
**     <!--markdown: paragraph-numbers=on -->
**     <!--markdown: paragraph-numbers=N -->
**
** In the paragraph-numbers=N form with N>1, N-th level headings are
** numbered like top-levels.  N+1-th level headings are like 2nd levels.
** and so forth.
**
** In the toc=N form, a table of contents is generated for all headings
** less than or equal to leve N.
*/
static int html_control_comment(Blob *ob, Blob *text, void *opaque){
  Blob token, arg;
  MarkdownToHtml *pCtx;
  if( blob_size(text)<20 ) return 0;
  if( strncmp(blob_buffer(text),"<!--markdown:",13)!=0 ) return 0;
  pCtx = (MarkdownToHtml*)opaque;
  blob_seek(text, 13, BLOB_SEEK_SET);
  blob_init(&token, 0, 0);
  blob_init(&arg, 0, 0);
  while( blob_argument_token(text, &token, 0) ){
    if( blob_eq_str(&token, "toc", 3) && blob_argument_token(text, &arg, 1) ){
      pCtx->iToc = blob_size(ob);
      pCtx->mxToc = atoi(blob_str(&arg));
      blob_reset(&arg);
    }else
    if( blob_eq_str(&token,"paragraph-numbers",-1)
     && blob_argument_token(text,&arg,1)
    ){
      char *zArg = blob_str(&arg);   
      pCtx->iHdngNums = fossil_isdigit(zArg[0]) ? atoi(zArg) : is_truth(zArg);
      blob_reset(&arg);
    }else
    if( !blob_eq_str(&token,"-->",3) ){
      blob_appendf(ob, "<!--markdown: unknown-tag=\"%h\" -->",
                   blob_str(&token));
    }
    blob_reset(&token); 
  } 
  return 1;
}

static void html_blockhtml(struct Blob *ob, struct Blob *text, void *opaque){
  char *data = blob_buffer(text);
  size_t size = blob_size(text);
  Blob *title = ((MarkdownToHtml*)opaque)->output_title;
  if( html_control_comment(ob,text,opaque) ) return;
  while( size>0 && fossil_isspace(data[0]) ){ data++; size--; }
  while( size>0 && fossil_isspace(data[size-1]) ){ size--; }
  /* If the first raw block is an <h1> element, then use it as the title. */
  if( blob_size(ob)<=PROLOG_SIZE
   && size>9
   && title!=0
   && sqlite3_strnicmp("<h1",data,3)==0
178
179
180
181
182
183
184


185

186
187
188
189
190
191
192





193












194
195
196
197
198
199
200
340
341
342
343
344
345
346
347
348

349
350
351
352
353
354
355
356
357
358
359
360
361

362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380







+
+
-
+







+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+








static void html_header(
  struct Blob *ob,
  struct Blob *text,
  int level,
  void *opaque
){
  MarkdownToHtml *pCtx = (MarkdownToHtml*)opaque;
  MarkdownHeading *pHdng;
  struct Blob *title = ((MarkdownToHtml*)opaque)->output_title;
  struct Blob *title = pCtx->output_title;
  /* The first header at the beginning of a text is considered as
   * a title and not output. */
  if( blob_size(ob)<=PROLOG_SIZE && title!=0 && blob_size(title)==0 ){
    BLOB_APPEND_BLOB(title, text);
    return;
  }
  INTER_BLOCK(ob);
  html_new_heading(pCtx, text, level);
  pHdng = pCtx->pLast;
  if( pHdng->nth ){
    blob_appendf(ob, "<h%d id='%h-%d'>", level, pHdng->zTag, pHdng->nth);
  }else{
  blob_appendf(ob, "<h%d>", level);
    blob_appendf(ob, "<h%d id='%h'>", level, pHdng->zTag);
  }
  if( pCtx->iHdngNums && level>=pCtx->iHdngNums ){
    int i;
    for(i=pCtx->iHdngNums-1; i<level-1; i++){
      blob_appendf(ob,"%d.",pCtx->aNum[i]);
    }
    blob_appendf(ob,"%d", ++pCtx->aNum[i]);
    if( i==pCtx->iHdngNums-1 ) blob_append(ob, ".0", 2);
    blob_append(ob, " ", 1);
    for(i++; i<6; i++) pCtx->aNum[i] = 0;
  }
  BLOB_APPEND_BLOB(ob, text);
  blob_appendf(ob, "</h%d>", level);
}

static void html_hrule(struct Blob *ob, void *opaque){
  INTER_BLOCK(ob);
  BLOB_APPEND_LITERAL(ob, "<hr />\n");
301
302
303
304
305
306
307
308
309
310
311
312




313


314
315
316
317
318
319
320
481
482
483
484
485
486
487


488

489
490
491
492
493

494
495
496
497
498
499
500
501
502







-
-

-

+
+
+
+
-
+
+







  void *opaque
){
  BLOB_APPEND_LITERAL(ob, "  <tr>\n");
  BLOB_APPEND_BLOB(ob, cells);
  BLOB_APPEND_LITERAL(ob, "  </tr>\n");
}



/* HTML span tags */

static int html_raw_html_tag(struct Blob *ob, struct Blob *text, void *opaque){
  if( html_control_comment(ob,text,opaque) ){
    /* No-op */
  }else{
    /* Everything else is passed through without change */
  blob_append(ob, blob_buffer(text), blob_size(text));
    blob_append(ob, blob_buffer(text), blob_size(text));
  }
  return 1;
}

static int html_autolink(
  struct Blob *ob,
  struct Blob *link,
  enum mkd_autolink type,
527
528
529
530
531
532
533











































534
535
536
537
538
539
540
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  return 1;
}


static void html_normal_text(struct Blob *ob, struct Blob *text, void *opaque){
  html_escape(ob, blob_buffer(text), blob_size(text));
}

/*
** Insert a table of contents into the body of the document.
**
** The pCtx provides the information needed to do this:
**
**    pCtx->iToc              Offset into pOut of where to insert the TOC
**    pCtx->mxToc             Maximum depth of the TOC
**    pCtx->pFirst            List of paragraphs to form the TOC
*/
static void html_insert_toc(MarkdownToHtml *pCtx, Blob *pOut){
  Blob new;
  MarkdownHeading *pX;
  int iLevel = pCtx->mnLevel-1;
  int iBase = iLevel;
  blob_init(&new, 0, 0);
  blob_append(&new, blob_buffer(pOut), pCtx->iToc);
  blob_append(&new, "<div class='markdown-toc'>\n", -1);
  for(pX=pCtx->pFirst; pX; pX=pX->pNext){
    if( pX->iLevel>pCtx->mxToc ) continue;
    while( iLevel<pX->iLevel ){
      iLevel++;
      blob_appendf(&new, "<ul class='markdown-toc%d markdown-toc'>\n",
                         iLevel - iBase);
    }
    while( iLevel>pX->iLevel ){
      iLevel--;
      blob_appendf(&new, "</ul>\n");
    }
    blob_appendf(&new,"<li><a href='#%h'>", pX->zTag);
    html_to_plaintext(pX->zTitle, &new);
    blob_appendf(&new,"</a></li>\n");
  }
  while( iLevel>iBase ){
    iLevel--;
    blob_appendf(&new, "</ul>\n");
  }
  blob_appendf(&new, "</div>\n");
  blob_append(&new, blob_buffer(pOut)+pCtx->iToc,
                    blob_size(pOut)-pCtx->iToc);
  blob_reset(pOut);
  *pOut = new;
}

/*
** Convert markdown into HTML.
**
** The document title is placed in output_title if not NULL.  Or if
** output_title is NULL, the document title appears in the body.
*/
577
578
579
580
581
582
583


584
585
586
587
588
589




590


802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820

821
822







+
+






+
+
+
+
-
+
+
    html_normal_text,

    /* misc. parameters */
    "*_", /* emph_chars */
    0     /* opaque */
  };
  MarkdownToHtml context;
  MarkdownHeading *pHdng, *pNextHdng;

  memset(&context, 0, sizeof(context));
  context.output_title = output_title;
  html_renderer.opaque = &context;
  if( output_title ) blob_reset(output_title);
  blob_reset(output_body);
  markdown(output_body, input_markdown, &html_renderer);
  if( context.mxToc>0 ) html_insert_toc(&context, output_body);
  for(pHdng=context.pFirst; pHdng; pHdng=pNextHdng){
    pNextHdng = pHdng->pNext;
    fossil_free(pHdng);
}
  }
}