Artifact d04a7d8a04a43ee43e90ae88268755de59856e0d:
- File
packages/parse/lib/x/xml/xml.test.tcl
— part of check-in
[99990bf839]
at
2018-06-30 10:39:43
on branch trunk
— parser graph
last commit before deep changes in order to search for closing tags before any opening tags and to track unmatched text segments
parse xml
all tests pass
(user: pooryorick size: 11220)
#! /bin/env tclsh package require {ycl test} proc suite_main {} { package require {ycl list} namespace import [yclprefix]::list::sl package require {ycl parse xml} namespace import [yclprefix]::parse::xml package require {ycl string} namespace import [yclprefix]::string::cmp namespace import [yclprefix]::string::dedent namespace import [yclprefix]::test::cleanup1 [yclprefix]::test::init proc cmpres {expected res} { if {$res eq $expected} { set res matched } else { set idx [cmp $res $expected] set res1 [list {not matched at} $idx] append res1 "\n*** expected ***\n" append res1 \n[string range $expected [expr {$idx - 20}] [expr {$idx + 140}]] append res1 "\n*** received ***\n" append res1 \n[string range $res [expr {$idx - 20}] [expr {$idx + 140}]] set res $res1 } return $res } set data1 [dedent { <body> <section> <p> paragraph1 </p> </section> </body> }] set expected1 {{{{text {}}} 0 0 { } { }} {{{element body} {text {}}} 7 8 { } { }} {{{element body} {element section} {text {}}} 18 20 { } { }} {{{element body} {element section} {element p} {text {}}} 24 40 { paragraph1 } { paragraph1 }} {{{element body} {element section} {element p}} 24 40 { paragraph1 } { paragraph1 }} {{{element body} {element section} {text {}}} 45 46 { } { }} {{{element body} {element section}} 18 46 { <p> paragraph1 </p> } { <p> paragraph1 </p> }} {{{element body} {text {}}} 57 57 { } { }} {{{element body}} 7 57 { <section> <p> paragraph1 </p> </section> } { <section> <p> paragraph1 </p> </section> }} {{{text {}}} 65 65 { } { }} {{} 0 65 { <body> <section> <p> paragraph1 </p> </section> </body> } { <body> <section> <p> paragraph1 </p> </section> </body> }}} set expected_1p [sl { {{text {}}} {{element body} {text {}}} {{element body} {element section} {text {}}} {{element body} {element section} {element p} {text {}}} {{element body} {element section} {element p}} {{element body} {element section} {text {}}} {{element body} {element section}} {{element body} {text {}}} {{element body}} {{text {}}} {} }] set expected_1n [sl { {text {} 1} {text {} 0} {element body 1} {text {} 1} {text {} 0} {element section 1} {text {} 1} {text {} 0} {element p 1} {text {} 1} {text {} 0} {element p 0} {text {} 1} {text {} 0} {element section 0} {text {} 1} {text {} 0} {element body 0} {text {} 1} {text {} 0} }] set data2 [string map {</p> </P>} $data1] regsub -all {\n\s*</(?:p|section)>} $data1 {} data3a set data3 [dedent { <body> <section> <p> paragraph1 </body> }] set expected3 {{{{text {}}} 0 0 { } { }} {{{element body} {text {}}} 7 8 { } { }} {{{element body} {element section} {text {}}} 18 20 { } { }} {{{element body} {element section} {element p} {text {}}} 24 38 { paragraph1 } { paragraph1 }} {{{element body} {element section} {element p}} 24 38 { paragraph1 } { paragraph1 }} {{{element body} {element section}} 18 38 { <p> paragraph1 } { <p> paragraph1 }} {{{element body}} 7 38 { <section> <p> paragraph1 } { <section> <p> paragraph1 }} {{{text {}}} 46 46 { } { }} {{} 0 46 { <body> <section> <p> paragraph1 </body> } { <body> <section> <p> paragraph1 </body> }}} set data4 [dedent { <body> <div1> <p1> content1 </p1> <p2> content2 </p2> </div1> <div2> </div2> </body> }] set data5 [dedent { <body> <div1> }] set data6 [dedent { <body> <div1>hello</div1> <div2>goodbye</div2> <div1>hello again</div1> </body> }] set setup1 { variable res2 {} variable res3 {} xml .spawn xml1 proc on_body {_ start end body} { variable res2 variable res3 set data [$_ $ data] lappend res2 [$_ path] lappend res3 [list [$_ path] $start $end $body [ string range $data $start $end]] } xml1 .method on_body [list [namespace which on_body]] proc on_enter {_ type tag open} { variable res lappend res [list $type $tag $open] } xml1 .method on_enter [list [namespace which on_enter]] xml1 typespec {} process {} on enter on_enter \ on leave on_enter on body on_body } test children {} -setup $setup1 -body { xml1 init transient false data $data1 xml1 parse set res1 [lmap x [xml1 children] { list [$x $ type] [$x $ tag] }] return $res1 } -cleanup [cleanup1] -result {{text {}} {element body} {text {}}} test traverse {} -setup $setup1 -body { variable res xml1 init transient false data $data4 xml1 parse set res {} xml1 traverse cmd [list ::apply [list _ { variable res lappend res [list [$_ $ type] [$_ $ tag] [$_ text]] return 1 } [namespace current]]] return $res } -cleanup [cleanup1] -result [sl { {text {} { }} {element body { <div1> <p1> content1 </p1> <p2> content2 </p2> </div1> <div2> </div2> }} {text {} { }} {element div1 { <p1> content1 </p1> <p2> content2 </p2> }} {text {} { }} {element p1 { content1 }} {text {} { content1 }} {text {} { }} {element p2 { content2 }} {text {} { content2 }} {text {} { }} {text {} { }} {element div2 { }} {text {} { }} {text {} { }} {text {} { }} }] test unmatched {} -setup $setup1 -body { variable res xml1 init transient false data $data5 proc unmatched _ { variable res namespace upvar $_ data data level level if {$level == 0} { return } lappend res {hi from unmatched} append data [dedent { content </div1> </body> }] return again } xml1 .method unmatched [namespace which unmatched] xml1 on unmatched unmatched xml1 typespec {} on body {} xml1 parse return $res } -cleanup [cleanup1] -result [sl { {text {} 1} {text {} 0} {element body 1} {text {} 1} {text {} 0} {element div1 1} {text {} 1} {text {} 0} {hi from unmatched} {text {} 1} {text {} 0} {element div1 0} {text {} 1} {text {} 0} {element body 0} {text {} 1} {text {} 0} }] test traverse_breadth {} -setup $setup1 -body { variable res xml1 init transient false data $data4 xml1 parse set res {} xml1 traverse mode breadth cmd [list ::apply [list _ { variable res lappend res [$_ $ tag] return 1 } [namespace current]]] return $res } -cleanup [cleanup1] -result [sl { {} body {} {} div1 {} div2 {} {} p1 {} p2 {} {} {} {} }] test xml {} -setup $setup1 -body { variable res xml1 run data $data1 set expected $expected1 return "[cmpres $expected_1n $res] [cmpres $expected_1p $res2] [ cmpres $expected $res3]" } -cleanup [cleanup1] -result {matched matched matched} test xml_imbalance {} -setup {} -body { xml .spawn xml1 try { set res [xml1 run data $data2] } on error {tres topts} { if {[string is list $tres] && [ dict exists [lrange $tres 1 end] node]} { lappend res [lindex $tres 0] lappend res [dict get [lrange $tres 1 end] node] } else { if {[string match {*incomplete part*} $tres]} { lappend res $tres } else { return -options $topts $tres } } } return $res } -cleanup [cleanup1] -result [sl { {incomplete part at} {{element body} {element section} {element p}} }] test xml_imbalance_imbalanced {} -setup $setup1 -body { variable res xml1 configure balanced false try { xml1 run data $data2 } on error {tres topts} { if {[string is list $tres]} { lappend res [lindex $tres 0] if {[catch { lappend res [dict get [lrange $tres 1 end] node] }]} { return -options $topts $tres } } else { lappend res $tres } } if {[string is list $res]} { set res [list {*}$res] } set expected $expected_1n lset expected 11 1 P return [cmpres $expected $res] } -cleanup [cleanup1] -result matched test xml_imbalance_imbalanced_twotags {} -setup $setup1 -body { variable res xml1 configure balanced false use_open true xml1 run data $data3 if {[string is list $res]} { set res [list {*}$res] } return [cmpres $res3 $expected3] } -cleanup [cleanup1] -result matched test xml_imbalance_imbalanced_useopen {} -setup $setup1 -body { variable res xml1 configure balanced false use_open true xml1 run data $data2 lassign [lindex $res3 0] path start end body lappend resx $path $start lappend expected {{text {}}} 0 lappend resx [string range $data2 $end+1 $end+6] lappend expected <body> lassign [lindex $res3 1] path start end body lappend resx $path lappend expected {{element body} {text {}}} lappend resx [string range $data2 $start-6 $start-1] lappend expected <body> lassign [lindex $res3 2] path start end body text lappend resx $path lappend expected {{element body} {element section} {text {}}} lappend resx $text lappend expected \n\t\t lappend resx [string range $data2 $end+1 $end+20] lappend expected [lindex [ regexp -inline {<section>\n\t\t(.*)</P>} $data2] 1] lassign [lindex $res3 3] path start end body text lappend resx $path lappend expected {{element body} {element section} {element p} {text {}}} lappend resx $text lappend expected [lindex [ regexp -inline {<p>(.*)</P>} $data2] 1] lappend resx [string range $data2 $end+1 $end+16] lappend expected </P>\n\t</section> lassign [lindex $res3 4] path start end body text lappend resx $path lappend expected {{element body} {element section} {element p}} lappend resx $text lappend expected [lindex [regexp -inline {<p>(.*)</P>} $data2] 1] lassign [lindex $res3 5] path start end body text lappend resx $path lappend expected {{element body} {element section} {text {}}} lappend resx $text lappend expected [lindex [ regexp -inline {</P>(.*)</section>} $data2] 1] lassign [lindex $res3 6] path start end body text lappend resx $path lappend expected {{element body} {element section}} lappend resx $text lappend expected [lindex [ regexp -inline {<section>(.*)</section>} $data2] 1] lassign [lindex $res3 7] path start end body text lappend resx $path lappend expected {{element body} {text {}}} lappend resx $body lappend expected [lindex [ regexp -inline {</section>(.*)</body>} $data2] 1] lappend resx [string range $data2 $end+1 $end+7] lappend expected </body> lassign [lindex $res3 8] path start end body text lappend resx $path lappend expected {{element body}} lappend resx $body lappend expected [lindex [ regexp -inline {<body>(.*)</body>} $data2] 1] lassign [lindex $res3 9] path start end body text lappend resx $path lappend expected {{text {}}} lappend resx $body lappend expected [lindex [ regexp -inline {</body>(.*)} $data2] 1] lassign [lindex $res3 10] path start end body text lappend resx $path lappend expected {} lappend resx $body lappend expected $data2 lappend resx [list [llength $res3]] lappend expected 11 #foreach item $resx { # puts [incr i] # puts $item\n\n #} return [cmpres $expected $resx] } -cleanup [cleanup1] -result matched cleanupTests }