Backup

Check-in [1edb3423c3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Extended "sfpull" to search the tickets in Tracker.xml for attachment and retrieve these as well. The application now additionally requires tDOM (xml processing), and Tcl 8.5 (dict).
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:1edb3423c3baefaa302576065fe93fd7fd080c86
User & Date: aku 2011-02-11 22:05:27
Context
2011-02-14
18:49
Added support for mercurial repositories, and demo set of such. check-in: 0a5de9df32 user: aku tags: trunk
2011-02-11
22:05
Extended "sfpull" to search the tickets in Tracker.xml for attachment and retrieve these as well. The application now additionally requires tDOM (xml processing), and Tcl 8.5 (dict). check-in: 1edb3423c3 user: aku tags: trunk
19:12
Extended "backup_sf" to allow specification of more than one repository type to rsync. In that case the destination directories are constructed from project name and repository type, the latter is used as suffix. Previous cases are unchanged, i.e. cvs is default, and using a single repository type uses just the project name for the destination directory. check-in: 4cafd464c3 user: aku tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to bin/sfpull.

     1      1   #!/bin/sh
     2      2   # -*- tcl -*- \
     3         -exec tclsh "$0" ${1+"$@"}
            3  +exec tclsh8.5 "$0" ${1+"$@"}
     4      4   # ### ### ### ######### ######### #########
     5      5   ## (C) 2009 ActiveState Software Inc.
     6      6   #
     7      7   ## SourceForge Pull (Extract Project information from SourceForge).
     8      8   ## A ForkLift application
     9      9   
    10     10   # ### ### ### ######### ######### #########
................................................................................
    37     37   package require Expect           ; # ssh password interaction
    38     38   package require base64           ; # (Tcllib) Encoding for basic authentication.
    39     39   package require fileutil         ; # (Tcllib) Helper for rsync over ssh, temp ssh config.
    40     40   package require http 2.7         ; # (Core) Retrieve urls, post forms ...
    41     41   package require textutil::adjust ; # (Tcllib) support for json generator code
    42     42   package require tls              ; # Secure connections (https).
    43     43   package require autoproxy
    44         -
           44  +package require tdom
    45     45   
    46     46   #puts [package ifneeded http [package present http]]
    47     47   #proc http::Log {args} { puts HTTP:\ [join $args] }
    48     48   
    49     49   # ### ### ### ######### ######### #########
    50     50   
    51     51   proc main {} {
    52     52       initialize;# Required for verify_project, in the commandline.
    53     53       if {![commandline]} usage
    54     54       save_config \
    55         -	[pull_tracker] \
           55  +	[pull_attachments [pull_tracker]] \
    56     56   	[pull_mailinglists] \
    57     57   	[pull_website] \
    58     58   	[pull_repository]
    59     59       return
    60     60   }
    61     61   
    62     62   proc commandline {} {
................................................................................
    98     98       # website    = name of the website directory, if any.
    99     99       # repository = name of repository directory.
   100    100       # mlists ... = lists containing the names of all project mailing
   101    101       #              lists, if any.
   102    102   
   103    103       global thedestination theproject thepid therepotype theuser
   104    104   
          105  +    foreach {tracker attachments} $tracker break
          106  +
   105    107       set tmp {}
   106    108       foreach {name archive} $mlists {
   107    109   	lappend tmp $name [JsonString $archive]
   108    110       }
   109    111       set mlists $tmp
   110    112   
   111    113       file mkdir $thedestination
................................................................................
   115    117   	     configuration-version [JsonString 1] \
   116    118   	     project        [JsonString $theproject] \
   117    119   	     origin         [JsonString SourceForge] \
   118    120   	     origin-url     [JsonString http://sourceforge.net/projects/$theproject] \
   119    121   	     origin-id      [JsonString $thepid] \
   120    122   	     exporter       [JsonString $theuser] \
   121    123   	     tracker        [JsonString $tracker] \
          124  +	     attachments    [JsonString $attachments] \
   122    125   	     mailinglists   [JsonObjectDict $mlists] \
   123    126   	     website        [JsonString $website] \
   124    127   	     repository     [JsonObject \
   125    128   				 type  [JsonString $therepotype] \
   126    129   				 where [JsonString $repository] \
   127    130   				] \
   128    131   	     ]\n
   129    132       return
   130    133   }
   131    134   
   132    135   proc pull_tracker {} {
   133    136       global thepid
          137  +    variable attachments
   134    138   
   135    139       log Retrieving tracker information...
   136    140   
   137    141       set src https://sourceforge.net/export/xml_export2.php?group_id=$thepid
   138    142       set dst Trackers.xml
   139    143   
   140    144       pull-with-cookie-login $src $dst
   141    145   
   142    146       return $dst
   143    147   }
          148  +
          149  +proc pull_attachments {dst} {
          150  +    variable attachments
          151  +
          152  +    # Now parse the tracker information, find all the referenced
          153  +    # attachments, and pull them as well. This code written originally
          154  +    # by Kevin Kenny (scanexport.tcl).
          155  +
          156  +    log Searching for tracker attachments...
          157  +    log_barber_pole_start
          158  +
          159  +    xml::parser theparser -namespace -final 1 \
          160  +	-elementstartcommand  track_startElement \
          161  +	-characterdatacommand track_charData \
          162  +	-elementendcommand    track_endElement
          163  +
          164  +    set attachments {}
          165  +    set tree        {ROOT}
          166  +    theparser parsefile [dest $dst]
          167  +
          168  +    log_barber_pole_done
          169  +
          170  +    log Retrieving tracker attachments...
          171  +
          172  +    set total [llength $attachments]
          173  +    set an 0
          174  +    set missing 0
          175  +
          176  +    log_progress_start
          177  +    foreach a $attachments {
          178  +	incr an
          179  +	log_progress {} $total $an
          180  +
          181  +	set ticket [dict get $a ticket]
          182  +	set base   [dict get $a url]
          183  +	set id     [dict get $a id]
          184  +	set fname  [dict get $a filename]
          185  +
          186  +	set dst Attachments/file.$id
          187  +
          188  +	set uri $base$ticket
          189  +##	puts -nonewline stderr "\nRetrieving $uri..."
          190  +	flush stderr
          191  +	set token [http::geturl $uri]
          192  +	if {[http::error $token] ne {}} {
          193  +	    incr missing
          194  +	    puts stderr "\nERROR RETRIEVING $uri: [http::error $token]"
          195  +	    puts stderr "Attachment $filename to $ticket will not be included"
          196  +	} else {
          197  +##	    puts stderr ok
          198  +	    file mkdir [file dirname [dest $dst]]
          199  +	    fileutil::writeFile $dst [http::data $token]
          200  +	}
          201  +	http::cleanup $token
          202  +    }
          203  +
          204  +    if {$missing} {
          205  +	log_progress_done_err "$total/missing $missing"
          206  +    } else {
          207  +	log_progress_done $total
          208  +    }
          209  +    return [list $dst Attachments]
          210  +}
   144    211   
   145    212   proc pull_mailinglists {} {
   146    213       global thepid
   147    214   
   148    215       log Retrieving mailing list archives...
   149    216   
   150    217       set token [http::geturl \
................................................................................
   272    339   	eof {}
   273    340       }
   274    341       log_barber_pole_done
   275    342       return $dst
   276    343   }
   277    344   
   278    345   # ### ### ### ######### ######### #########
          346  +## This code originally written by Kevin Kenny (scanexport.tcl). Here
          347  +## in sfpull it has been reduced to get only the data needed to
          348  +## retrieve all ticket attachments.
          349  +
          350  +# startElement --
          351  +#
          352  +#	Callback executed at the start of any XML element in the
          353  +#	SourceForge export
          354  +#
          355  +# Parameters:
          356  +#	name - Element name
          357  +#	attlist - List of attributes attached to the element.
          358  +
          359  +proc track_startElement {name attlist} {
          360  +    variable curAttachmentFields
          361  +    variable tree
          362  +    variable chardata
          363  +
          364  +    set chardata {}
          365  +    lappend tree $name
          366  +
          367  +    log_barber_pole
          368  +
          369  +    contextmatch {
          370  +	{tracker_item attachments attachment} {
          371  +	    # Start of an attachment - clear the fields
          372  +	    set curAttachmentFields {}
          373  +	}
          374  +    }
          375  +}
          376  +
          377  +# charData --
          378  +#
          379  +#	Callback for character data in the XML
          380  +#
          381  +# Parameters:
          382  +#	data - Data to include in the enclosing element.
          383  +#
          384  +# Results:
          385  +#	None.
          386  +
          387  +proc track_charData {data} {
          388  +    variable chardata
          389  +    append chardata $data
          390  +}
          391  +
          392  +# endElement --
          393  +#
          394  +#	Callback for the end of an element
          395  +#
          396  +# Parameters:
          397  +#	name - Name of the element being ended.
          398  +#
          399  +# Results:
          400  +#	None.
          401  +
          402  +proc track_endElement {name} {
          403  +
          404  +    variable tree
          405  +    variable chardata
          406  +    variable curAttachmentFields
          407  +    variable attachments
          408  +    variable curTicket
          409  +
          410  +    contextmatch {
          411  +	{tracker_item id} {
          412  +	    # Ticket ID - stash for use in attachments.
          413  +	    set curTicket $chardata
          414  +	}
          415  +	{tracker_item attachments attachment url} {
          416  +	    # URL (incorrect, but fixable) of an attachment
          417  +	    dict set curAttachmentFields url $chardata
          418  +	}
          419  +	{tracker_item attachments attachment id} {
          420  +	    # Integer ID of an attachment
          421  +	    dict set curAttachmentFields id $chardata
          422  +	}
          423  +	{tracker_item attachments attachment filename} {
          424  +	    # File name of an attachment
          425  +	    dict set curAttachmentFields filename $chardata
          426  +	}
          427  +	{tracker_item attachments attachment description} {
          428  +	    # Human readable description of an attachment
          429  +	    dict set curAttachmentFields description $chardata
          430  +	}
          431  +	{tracker_item attachments attachment filesize} {
          432  +	    # File size of an attachment
          433  +	    dict set curAttachmentFields filesize $chardata
          434  +	}
          435  +	{tracker_item attachments attachment filetype} {
          436  +	    # File type of an attachment
          437  +	    dict set curAttachmentFields filetype $chardata
          438  +	}
          439  +	{tracker_item attachments attachment date} {
          440  +	    # Date (seconds from Unix epoch) of an attachment
          441  +	    dict set curAttachmentFields date $chardata
          442  +	}
          443  +	{tracker_item attachments attachment submitter} {
          444  +	    # User that submitted an attachment
          445  +	    dict set curAttachmentFields submitter $chardata
          446  +	}
          447  +	{tracker_item attachments attachment} {
          448  +	    # End of an attachment
          449  +	    dict set curAttachmentFields ticket $curTicket
          450  +	    lappend attachments $curAttachmentFields
          451  +	}
          452  +	{document} {
          453  +#	    puts "Trackers: $trackers"
          454  +	}
          455  +    }
          456  +    set tree [lrange $tree 0 end-1]
          457  +}
          458  +
          459  +# contextmatch --
          460  +#
          461  +#	Match on the context in the XML parse
          462  +#
          463  +# Parameters:
          464  +#	what - Dictionary whose keys are contexts and whose values
          465  +#	       are scripts. Each context is a list of element tags;
          466  +#	       the context must match the righmost part of the path
          467  +#	       to the current element.
          468  +#
          469  +# Side effects:
          470  +#	Whatever the matching scripts do.
          471  +
          472  +proc contextmatch {what} {
          473  +    variable tree
          474  +    set l [expr {[llength $tree] - 1}]
          475  +    foreach {pattern script} $what {
          476  +	set i [expr {[llength $pattern] - 1}]
          477  +	if {$i <= $l} {
          478  +	    set ok 1
          479  +	    for {set j $l} {$ok && $i >= 0} {incr i -1; incr j -1} {
          480  +		if {[lindex $pattern $i] ne [lindex $tree $j]} {
          481  +		    set ok 0
          482  +		}
          483  +	    }
          484  +	    if {$ok} {
          485  +		uplevel 1 $script
          486  +	    }
          487  +	}
          488  +    }
          489  +}
          490  +
          491  +# ### ### ### ######### ######### #########
   279    492   
   280    493   proc verify_project {} {
   281    494       global theproject thepid therepotype
   282    495   
   283    496       set url http://sourceforge.net/projects/$theproject/
   284    497   
   285    498       puts "Get project id of $url..."
................................................................................
   309    522       # Supported repository types: cvs, svn, hg, git, bzr.
   310    523   
   311    524       # In the main page the repository type is hidden somewhere within
   312    525       # a javascript which is loaded later. The public_info page however
   313    526       # still seems to provide us with this information directly in the
   314    527       # HTML. It is however a page requiring login.
   315    528   
   316         -    set tmp [pull-with-cookie-login \
   317         -		 https://sourceforge.net/project/admin/public_info.php?group_id=$thepid \
   318         -		 public_info.html]
   319         -
          529  +    set url  https://sourceforge.net/project/admin/public_info.php?group_id=$thepid
          530  +    set tmp  [pull-with-cookie-login $url public_info.html]
   320    531       set data [fileutil::cat $tmp]
   321    532   
   322    533       set pattern "scm/\\?type=(\[^&\]+)"
   323    534       if {![regexp $pattern $data -> therepotype]} {
   324    535   	log "Project $theproject : Unable to determine repository type"
   325    536   	return -code error FAIL/type
   326    537       }
................................................................................
   449    660       foreach {k v} $meta {
   450    661   	if {$k ne "Set-Cookie"} continue
   451    662   	set cookie [lindex [split $v {;}] 0]
   452    663   	# Cookie is in the form of 'k=v'.
   453    664   	# We can use this form directly.
   454    665   	lappend lines $cookie
   455    666       }
          667  +    lappend lines SFUSER=1
          668  +
   456    669       set thecookies [list Cookie: [join $lines {;}]]
   457    670   
   458    671       log Logged in as $theuser
   459    672       #log $thecookies
   460    673       return $thecookies
   461    674   }
   462    675   
................................................................................
   497    710       return
   498    711   }
   499    712   
   500    713   proc log_progress_done {msg} {
   501    714       puts "\rOK $msg                                     "
   502    715       return
   503    716   }
          717  +
          718  +proc log_progress_done_err {msg} {
          719  +    puts "\rERR $msg                                     "
          720  +    return
          721  +}
   504    722   
   505    723   proc dest {path} {
   506    724       global thedestination
   507    725       file mkdir $thedestination
   508    726       return [file join $thedestination $path]
   509    727   }
   510    728