复制代码
- =info
- Author: 523066680/vicyang
- Date: 2018-11
- =cut
- use Encode;
- use LWP::UserAgent;
- use Mojo::DOM;
- use File::Slurp;
- use File::Basename qw/basename/;
- use File::Path qw/mkpath/;
- STDOUT->autoflush(1);
- our $wdir = "D:/temp/wallpaper_zol/meinv";
- our $main = "http://desk.zol.com.cn";
- my $ua = LWP::UserAgent->new( agent => "Mozilla/5.0" );
- our @headers = (
- "Host" => "desk.zol.com.cn",
- "User-Agent" => "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0",
- );
- mkpath $wdir unless -e $wdir;
- chdir $wdir;
- # 获取所有主题链接
- my @items;
- my $iter = 1;
- while ( get_item( $main ."/meinv/${iter}.html", \@items ) >= 1 )
- {
- $iter++;
- }
- # 遍历页面、提取图片
- my $idx = 0;
- for my $item ( @items )
- {
- printf "[%03d/%d] %s %s\n", $idx++ , $#items+1, $item->{link}, $item->{title};
- get_pages( $item->{link}, $item->{title} );
- }
- sub get_item
- {
- our ($main, @headers);
- my ( $link, $ref ) = @_;
- # 重建 UserAgent 对象
- my $ua = LWP::UserAgent->new();
- my $res = $ua->get($link, @headers);
- my $dom = Mojo::DOM->new( $res->content );
- for my $e ( $dom->find(".photo-list-padding")->each )
- {
- printf "%s %s\n", $e->at("a")->attr("href"), $e->at("span")->attr("title");
- push @$ref, {
- 'link' => $main . $e->at("a")->attr("href"),
- 'title' => $e->at("span")->attr("title")
- }
- }
- if ( defined $dom->at("#pageNext") ) { return 1 }
- else { return 0 }
- }
- # --- Get each pages of item --- #
- sub get_pages
- {
- our @headers;
- my ($link, $title) = @_;
- my $res = $ua->get( $link, @headers );
- my $dom = Mojo::DOM->new( $res->content );
- my $path = "${wdir}/${title}";
- mkpath $path unless -e $path;
- chdir $path;
- # 图片数量
- my $pics = $dom->at(".photo-list-box li i")->text;
- $pics=~s/[^\d]//; #去除斜杠
- my @files = glob "*.jpg";
- if ( $#files+1 == $pics ) {
- printf "Images already exist\n";
- return;
- }
- for my $e ($dom->find(".photo-list-box a")->each )
- {
- #printf "%s\n", $e->attr("href");
- get_pic( $main . $e->attr("href") );
- }
- }
- sub get_pic
- {
- my ( $link ) = @_;
- # 刷新 UserAgent 对象
- my $ua = LWP::UserAgent->new( timeout => 6 );
- my $res = $ua->get($link);
- my $dom = Mojo::DOM->new($res->content);
- my $pic_url;
- my $pic_name;
- my $sub_url = $dom->at(".wallpaper-down dd a")->attr("href");
- $pic_name = basename($sub_url);
- $pic_name =~ s/\.html/\.jpg/i;
- printf "%s\n", $pic_name;
- return if ( -e $pic_name );
- my $retry = 0;
- do
- {
- $res = $ua->get( "${main}${sub_url}" );
- if ( $retry > 0 and $retry < 5 ) { print "retry times: $retry\n"; }
- elsif ( $retry > 5 ) { print "False\n"; return }
- $retry++;
- }
- until ( $res->is_success );
- $dom = Mojo::DOM->new( $res->content );
- $ua->mirror( $dom->at("")->attr("src"), $pic_name );
- }
=info Author: 523066680/vicyang Date: 2018-11 =cut use Encode; use Mojo::UserAgent; use Mojo::DOM; use File::Slurp; use File::Basename qw/basename/; use File::Path qw/mkpath/; STDOUT->autoflush(1); our $theme = "meishi"; our $wdir = "F:/Wallpaper/zol/$theme"; our $main = "http://desk.zol.com.cn"; our $ua = Mojo::UserAgent->new(); our @headers = ( "Host" => "desk.zol.com.cn", "User-Agent" => "Firefox/63.0", ); mkpath $wdir unless -e $wdir; chdir $wdir; 获取所有主题链接 my @items; my $iter = 1; while ( get_item( $main ."/${theme}/${iter}.html", \@items ) >= 1 ) { $iter++; } 遍历页面、提取图片 my $idx = 1; for my $item ( @items ) { printf "[%03d/%d] %s %s\n", $idx++ , $#items+1, $item->{link}, $item->{title}; get_pages( $item->{link}, $item->{title} ); } sub get_item { my ( $link, $ref ) = @_; my $res = try_to_get( $link ); my $dom = $res->dom; for my $e ( $dom->find(".photo-list-padding")->each ) { printf "%s %s\n", $e->at("a")->attr("href"), $e->at("span")->attr("title"); push @$ref, {'link' => $main . $e->at("a")->attr("href"), 'title' => $e->at("span")->attr("title") }; } # 判断是否为最后一页 if ( defined $dom->at("#pageNext") ) { return 1 } else { return 0 } } --- Get each pages of item --- # sub get_pages { my ($link, $title) = @_; my $res = try_to_get( $link ); my $dom = $res->dom; my $path = "${wdir}/${title}"; mkpath $path unless -e $path; chdir $path; # 图片数量 my $pics = $dom->at(".photo-list-box li i")->text; $pics=~s/[^\d]//; #去除斜杠 my @files = glob "*.jpg"; if ( $#files+1 == $pics ) { printf "Images already exist\n"; return; } for my $e ($dom->find(".photo-list-box a")->each ) { #printf "%s\n", $e->attr("href"); get_pic( $main . $e->attr("href") ); } } sub get_pic { my ( $link ) = @_; my $res = try_to_get( $link ); return unless (defined $res); my $dom = $res->dom; my $pic_url; my $pic_name; my $obj = $dom->at(".wallpaper-down dd a"); my $sub_url; while (1) { $sub_url = $obj->attr("href"); # 某些图片没有提供指定分辨率的链接 if ( $sub_url !~/\.html/ ) { printf "Did not found picture url, skip %s\n", $sub_url; return; } $pic_name = basename($sub_url); $pic_name =~ s/\.html/\.jpg/i; printf "%s\n", $pic_name; return if ( -e $pic_name ); my $res = try_to_get( "${main}${sub_url}" ); return unless (defined $res); my $dom = $res->dom; my $res = $ua->get( $dom->at("")->attr("src") )->result; # 如果下载失败就选择下一个分辨率的图片 if ( $res->code == 502 ) { $obj = $obj->next; next; } write_file( $pic_name, {binmode=>":raw"}, $res->body ); last; } } sub try_to_get { our ($ua, @headers); my $link = shift; my $res; my $retry = 0; do { $res = $ua->get( $link )->result; if ( $retry > 0 and $retry < 5 ) { print "Retry times: $retry\n"; } elsif ( $retry > 5 ) { print "False\n"; return undef } $retry++; } until ( $res->is_success ); return $res; } |
欢迎光临 批处理之家 (http://bbs.bathome.net/) | Powered by Discuz! 7.2 |