#! /bin/env tclsh
variable doc::validcharexpr {
description {
Return an expresssion is true if [set $varname] returns
the number of a unicode character that is a valid XML character .
}
}
proc validCharExpr varname {
string map [list @varname@ $varname] {
!(
(@varname@ <= 0x08 && @varname@ >= 0x01)
||
(@varname@ <= 0x0c && @varname@ >= 0x0B)
||
(@varname@ <= 0x1f && @varname@ >= 0x0e)
||
(@varname@ <= 0x84 && @varname@ >= 0x7f)
||
(@varname@ <= 0x9f && @varname@ >= 0x86)
|| (
! (@varname@ == 9 || @varname@ == 10 || @varname@ == 13)
&&
! (@varname@ <= 0x10ffff && @varname@ >= 0x10000)
&&
! (@varname@ <= 0xd7ff && @varname@ >= 0x20)
&&
! (@varname@ <= 0xfffd && @varname@ >= 0xe00)
)
)
}
}
proc encodeInvalidCharsScripted {inchan outchan} [
string map [list @expr@ [validCharExpr \$ord]] {
set dataout {}
while {![eof $inchan]} {
set data [read $inchan 65536]
foreach char [split $data {}] {
scan $char %c ord
if {!(@expr@)} {
append dataout &#x[format %x $ord]\;
} else {
append dataout $char
}
}
puts -nonewline $outchan $dataout
set dataout {}
}
}]
interp alias {} [namespace current]::encodeInvalidChars {} [namespace current]::encodeInvalidCharsScripted
proc accelerateEncodeInvalidChars {} [string map [list @expr@ [validCharExpr *uchar]] {
package require {ycl chan clib}
package require critcl
variable accelerated
::critcl::tcl 8.5
::critcl::api import ycl_chan_clib 0.1
::critcl::ccode {
void process (Tcl_Interp *interp , Tcl_UniChar *uchar , Tcl_DString *output) {
char *utfstring;
static char hex[9];
int hexlen;
if (!(@expr@)) {
Tcl_DStringAppend(output ,"&#x" ,-1);
hexlen = snprintf(hex ,8 ,"%x" ,(int) *uchar);
Tcl_DStringAppend(output ,hex ,hexlen);
Tcl_DStringAppend(output ,";" ,-1);
/* Since the refcount of ucharobj was never incremented ,
it may have been reused */
} else {
Tcl_UniCharToUtfDString(uchar ,1 ,output);
}
}
}
set name encodeInvalidCharsAccelerated
::critcl::cproc $name {Tcl_Interp* interp pstring inname pstring outname} void {
static int mode;
Tcl_Channel chan = Tcl_GetChannel(interp ,inname.s ,&mode);
Tcl_Channel outchan = Tcl_GetChannel(interp ,outname.s ,&mode);
filter(interp ,chan ,outchan ,process);
return;
}
::critcl::load
interp alias {} [namespace current]::encodeInvalidChars {} [namespace which $name]
set accelerated 1
return [namespace which $name]
}]
variable accelerated 0
try {
accelerateEncodeInvalidChars
} on error {tres topts} {
puts stderr [dict get $topts -errorinfo]
puts stderr [list {failed to create} encodeInvalidCharsAccelerated]
}
# This Scheme version was twice as fast as the pure Tcl version , but still 50
# times slower than the C version . It uses a decimal representation instead
# of hexadecimal.
if 0 {
#lang racket
(define (encodeInvalidChars)
(define data (read-string 65536 (current-input-port)))
(cond (
(not (eof-object? data))
(for-each (lambda (char)
(let (
(number (char->integer char))
)
(cond (
(
or
(and (<= number #x08) (>= number #x01))
(and (<= number #x0c) (>= number #x0b))
(and (<= number #x1f) (>= number #x0e))
(and (<= number #x84) (>= number #x7f))
(and (<= number #x9f) (>= number #x86))
(and
(not (member number (list 9 10 13)))
(not (and (<= number #x10ffff) (>= number #x10000)))
(not (and (<= number #xd7ff) (>= number #x20)))
(not (and (<= number #xfffd) (>= number #xe00)))
)
)
(display (~a "&#" number #\;))
) (else
(display char)
))
)
) (string->list data))
(encodeInvalidChars)
))
)
(encodeInvalidChars)
}
|