- 帖子
- 3151
- 积分
- 6455
- 技术
- 317
- 捐助
- 70
- 注册时间
- 2008-8-3
|
[原创代码] [Perl][threads]多线程下载数独题库
本帖最后由 523066680 于 2017-9-12 19:31 编辑
数独网站:免费的在线数独
地址规律:/printable.php?nd=难度&y=年&m=月&d=日
起点日期是2008-1-1,终点日期是当天,难度分为 0 1 2 3 4
运行后文件保存在 sudoku_nd0.txt ... sudoku_nd4.txt,如果程序中断,再次运行时会加载上次的结果继续下载。
=info
523066680 2017-09
https://zhuanlan.zhihu.com/PerlExample
=cut
use threads;
use threads::shared;
use IO::Handle;
use File::Slurp;
use Data::Dumper;
use Time::HiRes qw/sleep time/;
STDOUT->autoflush(1);
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
use Date::Format;
use Time::Local;
use LWP::UserAgent;
our $level = 0;
our $dbfile;
our $main = "http://www.cn.sudokupuzzle.org";
our $ua = LWP::UserAgent->new(
agent => "Mozilla/5.0", keep_alive => 1, timeout => 5,
);
our $db :shared;
our @tasks :shared;
my @ths;
while ( $level <= 4 )
{
$dbfile = "sudoku_nd${level}.txt";
$db = undef;
@tasks = ();
#结构初始化,设置任务列表
initdb( \$db, \@tasks, $dbfile );
@ths = ();
grep { push @ths, threads->create( \&getSudoku, $_ ); } (0..4);
#等待并结束线程
while ( threads->list( threads::running ) ) { sleep 1.0 }
grep { $_->detach() } @ths;
#最终数据输出
write_file( $dbfile, Dumper $db );
print "Done $dbfile\n";
$level++;
}
sub getSudoku
{
our ($main, $ua, $level, $db, @tasks);
my $id = shift;
my $html;
my $link;
my $res;
my $timestamp;
my ($year, $mon, $day);
while ( $#tasks >= 0 )
{
$timestamp = shift @tasks;
($year, $mon, $day) = split( " ", time2str( "%Y %L %e", $timestamp ) );
$link = "$main/printable.php?nd=$level&y=$year&m=$mon&d=$day";
GET: while (1)
{
$res = $ua->get($link);
$html = $res->content();
if ($html=~/(\d{162})/)
{
$db->{ $timestamp } = $1;
printf "[%d %d-%02d-%02d Done\n", $id, $year, $mon, $day;
last GET;
}
}
#遇到偶数月份零一日的时候保存数据,避免程序中断前功尽弃
if ( ($day == 1) and ($mon % 2 == 0) )
{
write_file( $dbfile, Dumper $db );
}
}
}
sub initdb
{
our ($main, $level);
my ( $db, $tasks, $dbfile ) = @_;
my $iter = timelocal(0, 0, 0, 1, 0, 2008 ); #起点日期,月份从0开始
my $last = time(); #最后日期为今天
my %new_db;
my $old_db;
$old_db = eval read_file( $dbfile ) if ( -e $dbfile );
while ( $iter < $last )
{
if ( defined $old_db->{$iter} )
{
$new_db{$iter} = $old_db->{$iter};
}
else
{
$new_db{$iter} = undef;
push @$tasks, $iter;
}
$iter += 24*3600;
}
$$db = shared_clone( \%new_db );
} |
http://523066680.ys168.com/
Perl/数独/数独题库200801-201709_含答案.zip |
|