diff --git a/docs/wiki_convert/aegisub_convert_docs.pl b/docs/wiki_convert/aegisub_convert_docs.pl old mode 100644 new mode 100755 index 67ff4e36b..76989c89d --- a/docs/wiki_convert/aegisub_convert_docs.pl +++ b/docs/wiki_convert/aegisub_convert_docs.pl @@ -30,12 +30,13 @@ use File::Path; # variables my $base_dir = '.'; -my $host = 'http://aegisub.cellosoft.com'; -my $docs_base_url = 'http://aegisub.cellosoft.com/docs/'; +my $host = 'http://docs.aegisub.org'; +my $docs_base_url = 'http://docs.aegisub.org/manual/'; my $agent = LWP::UserAgent->new(); my @process_pages = ($docs_base_url . 'Main_Page'); # start at the main page -my %pages = ($base_dir . '/Main_Page.html' => $docs_base_url . "Main_Page"); +my %pages = ($base_dir . '/Main_Page' => $docs_base_url . "Main_Page"); my %accepted_types = ( + 'application/javascript' => '.js', 'image/png' => '.png', 'image/jpeg' => '.jpg', 'image/gif' => '.gif', @@ -64,42 +65,40 @@ while ( @process_pages ) { # initialize object and download the page my $page_object = $agent->get($current_page); print("Requesting ${current_page}...\n"); - + # warn and skip if we couldn't get it unless ( $page_object->is_success() ) { warn("Couldn't get ", $current_page, ": ", $page_object->status_line(), ", skipping\n"); next(GETLOOP); } - + my $content_type = $page_object->content_type(); - + # skip if we don't know what it is unless ( exists($accepted_types{$content_type}) ) { warn("I don't know what to do with ", $content_type, ", skipping\n"); next(GETLOOP); } - - + # monstrously ugly hack to handle rewriting of .css filenames my $name; - if ( $current_page =~ m!index\.php\?title=MediaWiki\:.+?\.css!i ) { + if ( $current_page =~ m!css$!i ) { $name = convert_css_link($current_page); } else { $name = convert_link($current_page); } - - + # if it's html, parse it, grab new links # add them to @process_pages if they're not already in there # then write the modified html to disk if ( $content_type eq 'text/html' ) { my ($filename, $content, @newpages) = parse_and_fix_html($current_page, $page_object->base(), $page_object->decoded_content()); - + # skip this page if the parser decided it was a page we don't want next(GETLOOP) unless ($content); - + write_to_disk($filename, $content_type, $content); - + foreach my $url (@newpages) { my $newname = convert_link($url); # check if we already added that page to our todo-list @@ -116,9 +115,9 @@ while ( @process_pages ) { # if it's CSS we need the @import'ed links elsif ( $content_type eq 'text/css' ) { my @newpages = parse_css($current_page, $page_object->base(), $page_object->decoded_content()); - + write_to_disk($name, $content_type, $page_object->decoded_content()); - + foreach my $url (@newpages) { my $newname = convert_link($url); # check if we already added that page to our todo-list @@ -158,30 +157,42 @@ sub parse_and_fix_html { #get arguments my ($url, $base, $content) = @_; my (@links, @links_to_modify); # list of links to return later - - + + # strip RSS etc. + $content =~ s!]*xml[^>]* />!!gi; + + # kill the topbar + $content =~ s!