本帖最后由 523066680 于 2019-1-15 14:46 编辑
获取midi(背景音乐)、nes、图片、作弊码、以及PDF(通常是手册)
运行环境:Strawberry Perl v5.26
路径设置:our $wdir = "F:/temp/nesgames"; | | | | | | | | | | | | | use File::Slurp; | | use File::Path; | | use File::Basename; | | use Mojo::UserAgent; | | use Mojo::DOM; | | use Try::Tiny; | | STDOUT->autoflush(1); | | | | our $wdir = "F:/temp/nesgames"; | | our $main = "http://www.nesfiles.com"; | | our $games = "http://www.nesfiles.com/Games"; | | | | mkpath $wdir unless -e $wdir; | | chdir $wdir; | | | | our $ua = Mojo::UserAgent->new(); | | our @headers = ( "User-Agent" => "Firefox/63.0" ); | | | | get_games_list($games); | | | | sub get_games_list | | { | | our ($ua, $main, @heaee); | | my ($link) = @_; | | my $res = $ua->get( $link, \@headers )->res; | | my $dom = $res->dom; | | | | for my $e ( $dom->find(".nesfilesTable a")->each ) | | { | | | | get_files( $main .$e->attr("href"), $e->text ); | | } | | } | | | | sub get_files | | { | | our ($main, $ua, @headers); | | my ($link, $name) = @_; | | my $title = basename($link); | | my ($res, $dom); | | | | my $fname = "${title}.html"; | | if ( -e $fname ) { | | my $html = read_file($fname); | | $dom = Mojo::DOM->new( $html ); | | } else { | | $res = $ua->get( $link, \@headers )->res; | | $dom = $res->dom; | | write_file( $fname, {binmode=>":raw"}, $res->body ); | | } | | | | | | mkdir $title unless -e $title; | | my ($head, $list, $res2); | | for my $section ($dom->find(".GameSection")->each) | | { | | $head = $section->at("header")->text; | | last if $head=~/Ebay$/i; | | | | | | if ($head=~/Codes/i) { | | write_file( $title ."/Codes_Cheats.txt", $section->all_text ); | | next; | | } | | | | if ($head=~/Screenshots/i) { | | | | $list = $section->find("img")->map(attr=>"src"); | | } else { | | | | $list = $section->find("a")->map(attr=>"href"); | | } | | | | printf "%s\n", $head; | | for my $href ( $list->each ) | | { | | printf "%s\n", $href; | | $fname = $title ."/". basename($href); | | | | next if -e $fname; | | $res2 = try_to_get( "${main}$href" ); | | next unless defined $res2; | | | | write_file( $fname, {binmode=>":raw"}, $res2->body); | | } | | } | | } | | | | sub try_to_get | | { | | our ($ua, @headers); | | my ($link) = @_; | | my $res; | | my $times = 0; | | | | while (1) | | { | | try { $res = $ua->get( $link )->result; } | | catch { printf "Error %s, retry: %d\n", $_, $times; }; | | $times++; | | last if (defined $res and $res->is_success); | | return undef if ( $times > 5 ); | | } | | return $res; | | }COPY |
|