ycl

Artifact [9c7d366b29]
Login

Artifact 9c7d366b299e66113bf1ebe690ff9f817fc4176c:


     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
   100
   101
   102
   103
   104
   105
   106
   107
   108
   109
   110
   111
   112
   113
   114
   115
   116
   117
   118
   119
   120
   121
   122
   123
   124
   125
   126
   127
   128
   129
   130
   131
   132
   133
   134
   135
   136
   137
   138
   139
   140
   141
   142
   143
   144
   145
   146
   147
#! /bin/env tclsh

variable doc::validcharexpr {
	description {
		Return an expresssion is true if [set $varname] returns
		the number of a unicode character that is a valid XML character .
	}
}
proc validCharExpr varname {
	string map [list @varname@ $varname] {
		!(
				(@varname@ <= 0x08 && @varname@ >= 0x01)
			||
				(@varname@ <= 0x0c && @varname@ >= 0x0B)
			||
				(@varname@ <= 0x1f && @varname@ >= 0x0e)

			||
				(@varname@ <= 0x84 && @varname@ >= 0x7f)
			||
				(@varname@ <= 0x9f && @varname@ >= 0x86)

			|| (
					! (@varname@ == 9 || @varname@ ==  10 || @varname@ ==  13)
				&&
					! (@varname@ <= 0x10ffff && @varname@ >= 0x10000)
				&&
					! (@varname@ <= 0xd7ff && @varname@ >= 0x20)
				&&
					! (@varname@ <= 0xfffd && @varname@ >= 0xe00)
			)
		)
	}
}


proc encodeInvalidCharsScripted {inchan outchan} [
	string map [list @expr@ [validCharExpr \$ord]] {
	set dataout {}
	while {![eof $inchan]} {
		set data [read $inchan 65536]
		foreach char [split $data {}] {
			scan $char %c ord
			if {!(@expr@)} {
				append dataout &#x[format %x $ord]\;
			} else {
				append dataout $char
			}
		}
		puts -nonewline $outchan $dataout
		set dataout {}
	}
}]
interp alias {} [namespace current]::encodeInvalidChars {} [namespace current]::encodeInvalidCharsScripted


proc accelerateEncodeInvalidChars {} [string map [list @expr@ [validCharExpr *uchar]] {
	package require {ycl chan clib}
	package require critcl
	variable accelerated
	::critcl::tcl 8.5
	::critcl::api import ycl_chan_clib 0.1
	::critcl::ccode {
		void process (Tcl_Interp *interp , Tcl_UniChar *uchar , Tcl_DString *output) {
			char *utfstring;
			static char hex[9];
			int hexlen;
			if (!(@expr@)) {
				Tcl_DStringAppend(output ,"&#x" ,-1);
				hexlen = snprintf(hex ,8 ,"%x" ,(int) *uchar);
				Tcl_DStringAppend(output ,hex ,hexlen);
				Tcl_DStringAppend(output ,";" ,-1);
				/* Since the refcount of ucharobj was never incremented ,
				   it may have been reused */
			} else {
				Tcl_UniCharToUtfDString(uchar ,1 ,output);
			}
		}
	}

	set name encodeInvalidCharsAccelerated
	::critcl::cproc $name {Tcl_Interp* interp pstring inname pstring outname} void {
		static int mode;
		Tcl_Channel chan = Tcl_GetChannel(interp ,inname.s ,&mode);
		Tcl_Channel outchan = Tcl_GetChannel(interp ,outname.s ,&mode);
		filter(interp ,chan ,outchan ,process);
		return;
	}
	::critcl::load
	interp alias {} [namespace current]::encodeInvalidChars {} [namespace which $name]
	set accelerated 1
	return [namespace which $name]
}]

variable accelerated 0

try {
	accelerateEncodeInvalidChars
} on error {tres topts} {
	puts stderr [dict get $topts -errorinfo]
	puts stderr [list {failed to create} encodeInvalidCharsAccelerated]
}


# This Scheme version was twice as fast as the pure Tcl version , but still 50
# times slower than the C version .   It uses a decimal representation instead
# of hexadecimal.
if 0 {
#lang racket

(define (encodeInvalidChars)
	(define data (read-string 65536 (current-input-port)))
	(cond (
		(not (eof-object? data))
		(for-each (lambda (char) 
			(let (
				(number (char->integer char))
				)

				(cond (
					(
						or
							(and (<= number #x08) (>= number #x01))
							(and (<= number #x0c) (>= number #x0b))
							(and (<= number #x1f) (>= number #x0e))
							(and (<= number #x84) (>= number #x7f))
							(and (<= number #x9f) (>= number #x86))
							(and
								(not (member number (list 9 10 13)))
								(not (and (<= number #x10ffff) (>= number #x10000)))
								(not (and (<= number #xd7ff) (>= number #x20)))
								(not (and (<= number #xfffd) (>= number #xe00)))
							)
					)
					(display (~a "&#" number #\;))
				) (else
					(display char)
				))
			)
		) (string->list data))
		(encodeInvalidChars)
	))
)

(encodeInvalidChars)
}