本帖最后由 523066680 于 2019-1-9 22:16 编辑
写过抓双色球往期结果的,多线程,以前写的丑(发的代码不完整,好像还有另一个脚本负责导出EXCEL,一时没找着)- use Encode;
- use Modern::Perl;
- use Time::HiRes qw/time sleep/;
- use threads;
- use threads::shared;
- use Try::Tiny;
- use Mojo::UserAgent;
-
- use File::Basename;
- use File::Path qw/make_path/;
- use File::Slurp;
- STDOUT->autoflush(1);
-
- our $workdir = "D:\\Temp\\Double_Ball_Lottery";
- make_path $workdir unless -e $workdir;
- #chdir $workdir;
-
- our $ua;
- our $main = "http://kaijiang.500.com";
- our @links :shared;
- our @ths;
-
- $ua = Mojo::UserAgent->new();
- $ua = $ua->max_redirects(5);
-
- print "Getting Links ... ";
- get_links( \@links );
- say "Done";
-
- #创建线程
- grep { push @ths, threads->create( \&thread_func, $_ ) } ( 0 .. 3 );
-
- #等待运行结束
- while ( threads->list(threads::running) ) { sleep 0.2 };
-
- #线程分离/结束
- grep { $_->detach() } threads->list(threads::all);
-
- quit();
-
- sub thread_func
- {
- our (@links, $workdir);
- my ( $id ) = @_;
- my $ua = Mojo::UserAgent->new();
- my ($link, $file, $res, $times);
-
- while ( $#links > 0 )
- {
- $link = shift @links;
- $file = $workdir ."\\". basename($link);
-
- if ( -e $file ) { say "$id - $link already exists"; next };
- say "$id - $link";
-
- $times = 0;
- while (1)
- {
- try { $res = $ua->get($link)->result }
- catch
- {
- printf "[%d] getting %s, retry: %d\n", $id, basename($link), $times++;
- sleep 3.0;
- };
- last if ( defined $res and $res->is_success );
- return if ( $times > 10 );
- }
-
- write_file( $workdir ."\\". basename($link), $res->body );
- }
- }
-
- sub get_links
- {
- my ($aref) = @_;
- my $html = read_file("simple.htm");
- my $dom = Mojo::DOM->new( $html );
-
- for my $e ( $dom->at(".iSelectList")->find("a")->each )
- {
- push @$aref, $e->attr("href");
- }
- }
-
- sub quit { system("pause"); }
复制代码 原来是18年2月写的,之前抓了03年到18年1月的html,导出excel(没显示日期,就是搞个分布图),
https://share.weiyun.com/5V1MvmE |