Backup

Check-in [1edb3423c3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Extended "sfpull" to search the tickets in Tracker.xml for attachment and retrieve these as well. The application now additionally requires tDOM (xml processing), and Tcl 8.5 (dict).
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:1edb3423c3baefaa302576065fe93fd7fd080c86
User & Date: aku 2011-02-11 22:05:27
Context
2011-02-14
18:49
Added support for mercurial repositories, and demo set of such. check-in: 0a5de9df32 user: aku tags: trunk
2011-02-11
22:05
Extended "sfpull" to search the tickets in Tracker.xml for attachment and retrieve these as well. The application now additionally requires tDOM (xml processing), and Tcl 8.5 (dict). check-in: 1edb3423c3 user: aku tags: trunk
19:12
Extended "backup_sf" to allow specification of more than one repository type to rsync. In that case the destination directories are constructed from project name and repository type, the latter is used as suffix. Previous cases are unchanged, i.e. cvs is default, and using a single repository type uses just the project name for the destination directory. check-in: 4cafd464c3 user: aku tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to bin/sfpull.

1
2
3
4
5
6
7
8
9
10
..
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
..
98
99
100
101
102
103
104


105
106
107
108
109
110
111
...
115
116
117
118
119
120
121

122
123
124
125
126
127
128
129
130
131
132
133

134
135
136
137
138
139
140
141
142
143































































144
145
146
147
148
149
150
...
272
273
274
275
276
277
278


















































































































































279
280
281
282
283
284
285
...
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
...
449
450
451
452
453
454
455


456
457
458
459
460
461
462
...
497
498
499
500
501
502
503





504
505
506
507
508
509
510
#!/bin/sh
# -*- tcl -*- \
exec tclsh "$0" ${1+"$@"}
# ### ### ### ######### ######### #########
## (C) 2009 ActiveState Software Inc.
#
## SourceForge Pull (Extract Project information from SourceForge).
## A ForkLift application

# ### ### ### ######### ######### #########
................................................................................
package require Expect           ; # ssh password interaction
package require base64           ; # (Tcllib) Encoding for basic authentication.
package require fileutil         ; # (Tcllib) Helper for rsync over ssh, temp ssh config.
package require http 2.7         ; # (Core) Retrieve urls, post forms ...
package require textutil::adjust ; # (Tcllib) support for json generator code
package require tls              ; # Secure connections (https).
package require autoproxy


#puts [package ifneeded http [package present http]]
#proc http::Log {args} { puts HTTP:\ [join $args] }

# ### ### ### ######### ######### #########

proc main {} {
    initialize;# Required for verify_project, in the commandline.
    if {![commandline]} usage
    save_config \
	[pull_tracker] \
	[pull_mailinglists] \
	[pull_website] \
	[pull_repository]
    return
}

proc commandline {} {
................................................................................
    # website    = name of the website directory, if any.
    # repository = name of repository directory.
    # mlists ... = lists containing the names of all project mailing
    #              lists, if any.

    global thedestination theproject thepid therepotype theuser



    set tmp {}
    foreach {name archive} $mlists {
	lappend tmp $name [JsonString $archive]
    }
    set mlists $tmp

    file mkdir $thedestination
................................................................................
	     configuration-version [JsonString 1] \
	     project        [JsonString $theproject] \
	     origin         [JsonString SourceForge] \
	     origin-url     [JsonString http://sourceforge.net/projects/$theproject] \
	     origin-id      [JsonString $thepid] \
	     exporter       [JsonString $theuser] \
	     tracker        [JsonString $tracker] \

	     mailinglists   [JsonObjectDict $mlists] \
	     website        [JsonString $website] \
	     repository     [JsonObject \
				 type  [JsonString $therepotype] \
				 where [JsonString $repository] \
				] \
	     ]\n
    return
}

proc pull_tracker {} {
    global thepid


    log Retrieving tracker information...

    set src https://sourceforge.net/export/xml_export2.php?group_id=$thepid
    set dst Trackers.xml

    pull-with-cookie-login $src $dst

    return $dst
}
































































proc pull_mailinglists {} {
    global thepid

    log Retrieving mailing list archives...

    set token [http::geturl \
................................................................................
	eof {}
    }
    log_barber_pole_done
    return $dst
}

# ### ### ### ######### ######### #########



















































































































































proc verify_project {} {
    global theproject thepid therepotype

    set url http://sourceforge.net/projects/$theproject/

    puts "Get project id of $url..."
................................................................................
    # Supported repository types: cvs, svn, hg, git, bzr.

    # In the main page the repository type is hidden somewhere within
    # a javascript which is loaded later. The public_info page however
    # still seems to provide us with this information directly in the
    # HTML. It is however a page requiring login.

    set tmp [pull-with-cookie-login \
		 https://sourceforge.net/project/admin/public_info.php?group_id=$thepid \
		 public_info.html]

    set data [fileutil::cat $tmp]

    set pattern "scm/\\?type=(\[^&\]+)"
    if {![regexp $pattern $data -> therepotype]} {
	log "Project $theproject : Unable to determine repository type"
	return -code error FAIL/type
    }
................................................................................
    foreach {k v} $meta {
	if {$k ne "Set-Cookie"} continue
	set cookie [lindex [split $v {;}] 0]
	# Cookie is in the form of 'k=v'.
	# We can use this form directly.
	lappend lines $cookie
    }


    set thecookies [list Cookie: [join $lines {;}]]

    log Logged in as $theuser
    #log $thecookies
    return $thecookies
}

................................................................................
    return
}

proc log_progress_done {msg} {
    puts "\rOK $msg                                     "
    return
}






proc dest {path} {
    global thedestination
    file mkdir $thedestination
    return [file join $thedestination $path]
}



|







 







|










|







 







>
>







 







>












>










>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
|
|
<







 







>
>







 







>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
..
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
..
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
...
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
...
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
...
522
523
524
525
526
527
528

529
530

531
532
533
534
535
536
537
...
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
...
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
#!/bin/sh
# -*- tcl -*- \
exec tclsh8.5 "$0" ${1+"$@"}
# ### ### ### ######### ######### #########
## (C) 2009 ActiveState Software Inc.
#
## SourceForge Pull (Extract Project information from SourceForge).
## A ForkLift application

# ### ### ### ######### ######### #########
................................................................................
package require Expect           ; # ssh password interaction
package require base64           ; # (Tcllib) Encoding for basic authentication.
package require fileutil         ; # (Tcllib) Helper for rsync over ssh, temp ssh config.
package require http 2.7         ; # (Core) Retrieve urls, post forms ...
package require textutil::adjust ; # (Tcllib) support for json generator code
package require tls              ; # Secure connections (https).
package require autoproxy
package require tdom

#puts [package ifneeded http [package present http]]
#proc http::Log {args} { puts HTTP:\ [join $args] }

# ### ### ### ######### ######### #########

proc main {} {
    initialize;# Required for verify_project, in the commandline.
    if {![commandline]} usage
    save_config \
	[pull_attachments [pull_tracker]] \
	[pull_mailinglists] \
	[pull_website] \
	[pull_repository]
    return
}

proc commandline {} {
................................................................................
    # website    = name of the website directory, if any.
    # repository = name of repository directory.
    # mlists ... = lists containing the names of all project mailing
    #              lists, if any.

    global thedestination theproject thepid therepotype theuser

    foreach {tracker attachments} $tracker break

    set tmp {}
    foreach {name archive} $mlists {
	lappend tmp $name [JsonString $archive]
    }
    set mlists $tmp

    file mkdir $thedestination
................................................................................
	     configuration-version [JsonString 1] \
	     project        [JsonString $theproject] \
	     origin         [JsonString SourceForge] \
	     origin-url     [JsonString http://sourceforge.net/projects/$theproject] \
	     origin-id      [JsonString $thepid] \
	     exporter       [JsonString $theuser] \
	     tracker        [JsonString $tracker] \
	     attachments    [JsonString $attachments] \
	     mailinglists   [JsonObjectDict $mlists] \
	     website        [JsonString $website] \
	     repository     [JsonObject \
				 type  [JsonString $therepotype] \
				 where [JsonString $repository] \
				] \
	     ]\n
    return
}

proc pull_tracker {} {
    global thepid
    variable attachments

    log Retrieving tracker information...

    set src https://sourceforge.net/export/xml_export2.php?group_id=$thepid
    set dst Trackers.xml

    pull-with-cookie-login $src $dst

    return $dst
}

proc pull_attachments {dst} {
    variable attachments

    # Now parse the tracker information, find all the referenced
    # attachments, and pull them as well. This code written originally
    # by Kevin Kenny (scanexport.tcl).

    log Searching for tracker attachments...
    log_barber_pole_start

    xml::parser theparser -namespace -final 1 \
	-elementstartcommand  track_startElement \
	-characterdatacommand track_charData \
	-elementendcommand    track_endElement

    set attachments {}
    set tree        {ROOT}
    theparser parsefile [dest $dst]

    log_barber_pole_done

    log Retrieving tracker attachments...

    set total [llength $attachments]
    set an 0
    set missing 0

    log_progress_start
    foreach a $attachments {
	incr an
	log_progress {} $total $an

	set ticket [dict get $a ticket]
	set base   [dict get $a url]
	set id     [dict get $a id]
	set fname  [dict get $a filename]

	set dst Attachments/file.$id

	set uri $base$ticket
##	puts -nonewline stderr "\nRetrieving $uri..."
	flush stderr
	set token [http::geturl $uri]
	if {[http::error $token] ne {}} {
	    incr missing
	    puts stderr "\nERROR RETRIEVING $uri: [http::error $token]"
	    puts stderr "Attachment $filename to $ticket will not be included"
	} else {
##	    puts stderr ok
	    file mkdir [file dirname [dest $dst]]
	    fileutil::writeFile $dst [http::data $token]
	}
	http::cleanup $token
    }

    if {$missing} {
	log_progress_done_err "$total/missing $missing"
    } else {
	log_progress_done $total
    }
    return [list $dst Attachments]
}

proc pull_mailinglists {} {
    global thepid

    log Retrieving mailing list archives...

    set token [http::geturl \
................................................................................
	eof {}
    }
    log_barber_pole_done
    return $dst
}

# ### ### ### ######### ######### #########
## This code originally written by Kevin Kenny (scanexport.tcl). Here
## in sfpull it has been reduced to get only the data needed to
## retrieve all ticket attachments.

# startElement --
#
#	Callback executed at the start of any XML element in the
#	SourceForge export
#
# Parameters:
#	name - Element name
#	attlist - List of attributes attached to the element.

proc track_startElement {name attlist} {
    variable curAttachmentFields
    variable tree
    variable chardata

    set chardata {}
    lappend tree $name

    log_barber_pole

    contextmatch {
	{tracker_item attachments attachment} {
	    # Start of an attachment - clear the fields
	    set curAttachmentFields {}
	}
    }
}

# charData --
#
#	Callback for character data in the XML
#
# Parameters:
#	data - Data to include in the enclosing element.
#
# Results:
#	None.

proc track_charData {data} {
    variable chardata
    append chardata $data
}

# endElement --
#
#	Callback for the end of an element
#
# Parameters:
#	name - Name of the element being ended.
#
# Results:
#	None.

proc track_endElement {name} {

    variable tree
    variable chardata
    variable curAttachmentFields
    variable attachments
    variable curTicket

    contextmatch {
	{tracker_item id} {
	    # Ticket ID - stash for use in attachments.
	    set curTicket $chardata
	}
	{tracker_item attachments attachment url} {
	    # URL (incorrect, but fixable) of an attachment
	    dict set curAttachmentFields url $chardata
	}
	{tracker_item attachments attachment id} {
	    # Integer ID of an attachment
	    dict set curAttachmentFields id $chardata
	}
	{tracker_item attachments attachment filename} {
	    # File name of an attachment
	    dict set curAttachmentFields filename $chardata
	}
	{tracker_item attachments attachment description} {
	    # Human readable description of an attachment
	    dict set curAttachmentFields description $chardata
	}
	{tracker_item attachments attachment filesize} {
	    # File size of an attachment
	    dict set curAttachmentFields filesize $chardata
	}
	{tracker_item attachments attachment filetype} {
	    # File type of an attachment
	    dict set curAttachmentFields filetype $chardata
	}
	{tracker_item attachments attachment date} {
	    # Date (seconds from Unix epoch) of an attachment
	    dict set curAttachmentFields date $chardata
	}
	{tracker_item attachments attachment submitter} {
	    # User that submitted an attachment
	    dict set curAttachmentFields submitter $chardata
	}
	{tracker_item attachments attachment} {
	    # End of an attachment
	    dict set curAttachmentFields ticket $curTicket
	    lappend attachments $curAttachmentFields
	}
	{document} {
#	    puts "Trackers: $trackers"
	}
    }
    set tree [lrange $tree 0 end-1]
}

# contextmatch --
#
#	Match on the context in the XML parse
#
# Parameters:
#	what - Dictionary whose keys are contexts and whose values
#	       are scripts. Each context is a list of element tags;
#	       the context must match the righmost part of the path
#	       to the current element.
#
# Side effects:
#	Whatever the matching scripts do.

proc contextmatch {what} {
    variable tree
    set l [expr {[llength $tree] - 1}]
    foreach {pattern script} $what {
	set i [expr {[llength $pattern] - 1}]
	if {$i <= $l} {
	    set ok 1
	    for {set j $l} {$ok && $i >= 0} {incr i -1; incr j -1} {
		if {[lindex $pattern $i] ne [lindex $tree $j]} {
		    set ok 0
		}
	    }
	    if {$ok} {
		uplevel 1 $script
	    }
	}
    }
}

# ### ### ### ######### ######### #########

proc verify_project {} {
    global theproject thepid therepotype

    set url http://sourceforge.net/projects/$theproject/

    puts "Get project id of $url..."
................................................................................
    # Supported repository types: cvs, svn, hg, git, bzr.

    # In the main page the repository type is hidden somewhere within
    # a javascript which is loaded later. The public_info page however
    # still seems to provide us with this information directly in the
    # HTML. It is however a page requiring login.


    set url  https://sourceforge.net/project/admin/public_info.php?group_id=$thepid
    set tmp  [pull-with-cookie-login $url public_info.html]

    set data [fileutil::cat $tmp]

    set pattern "scm/\\?type=(\[^&\]+)"
    if {![regexp $pattern $data -> therepotype]} {
	log "Project $theproject : Unable to determine repository type"
	return -code error FAIL/type
    }
................................................................................
    foreach {k v} $meta {
	if {$k ne "Set-Cookie"} continue
	set cookie [lindex [split $v {;}] 0]
	# Cookie is in the form of 'k=v'.
	# We can use this form directly.
	lappend lines $cookie
    }
    lappend lines SFUSER=1

    set thecookies [list Cookie: [join $lines {;}]]

    log Logged in as $theuser
    #log $thecookies
    return $thecookies
}

................................................................................
    return
}

proc log_progress_done {msg} {
    puts "\rOK $msg                                     "
    return
}

proc log_progress_done_err {msg} {
    puts "\rERR $msg                                     "
    return
}

proc dest {path} {
    global thedestination
    file mkdir $thedestination
    return [file join $thedestination $path]
}