| |
| cls |
| |
| $whr = New-Object -ComObject 'WinHttp.WinHttpRequest.5.1' |
| |
| |
| function Get-MainInfo($key){ |
| |
| $url = 'https://www.km.com/tv/{0}.html' -f $key |
| $whr.Open('GET',$url,$false) |
| $whr.Send() |
| $txt = $whr.ResponseText |
| $whr.Abort() |
| |
| |
| $pattern = '(?s)<div class="inside-title.*?>.*?>(.*?)<' |
| if($txt -match $pattern){ |
| '名称: TTT' + $Matches[1] |
| Write-Host ('解析电视剧: ' + $Matches[1]) |
| } |
| |
| $pattern = '(?s)<div class="video_poster.*?">.*?<img src="(.*?)"' |
| if($txt -match $pattern){ |
| '图片地址: https:' + $Matches[1] |
| } |
| |
| "`r`n【概览】" |
| $pattern = '(?s)mr20">(.*?)<a class="unfold"' |
| if($txt -match $pattern){ |
| $t = $Matches[1] -replace '(?s)<script>.*?</script>','' -replace '<.*?>','' -replace '\s{2,}','' |
| $t -replace '(导演:)|(主演:)|(地区/类型:)|(剧情集数:)|(播出时间:)|(在线观看网站:)|(别名:)|(片长:)',("`r`n" + '$0') -replace "主演:.*?`r`n",'' |
| } |
| |
| |
| $pattern = '(?s)<p class="score_num.*?>.*?>(.*?)<' |
| if($txt -match $pattern){ |
| '综合评分: ' + $Matches[1] |
| } |
| |
| $plot_main = '' |
| $pattern = '剧情介绍:.*?>(.*?)<' |
| if($txt -match $pattern){ |
| $plot_main = $Matches[1] |
| } |
| $pattern = '>\(全部 (\d+)<' |
| if($txt -match $pattern){ |
| $part = $Matches[1] |
| } |
| |
| |
| "`r`n【幕后信息】" |
| $pattern = '(?s)幕后信息</div>(.*?)<a class="intro_fold' |
| if($txt -match $pattern){ |
| $Matches[1] -replace '<.*?>','' -replace '\s{2,}','' -replace '(编剧:)|(制片人:)|(TV首播时间:)|(在线播放平台:)|(出品公司:)|《',("`r`n" + '$0') |
| } |
| |
| |
| "`r`n【演员表】" |
| $url = 'https://www.km.com/tv/yanyuan/{0}.html' -f $key |
| $whr.Open('GET',$url,$false) |
| $whr.Send() |
| $txt = $whr.ResponseText |
| $whr.Abort() |
| $pattern = '(?s)<div class="actor-list-detail same_col">.*?<div class="actor-list-detail same_col">' |
| [regex]::Matches($txt,$pattern) | foreach { |
| if($_.Value.Contains('<em>饰</em><em>演</em>')){ |
| 'ZZZ' + $_.Value -replace '<.*?>|\s{2,}',''-replace '饰演'," 饰演 " -replace '最近作品:.*$','' |
| } |
| } |
| |
| |
| "`r`n【人物介绍】" |
| $url = 'https://www.km.com/tv/role/{0}.html' -f $key |
| $whr.Open('GET',$url,$false) |
| $whr.Send() |
| $txt = $whr.ResponseText |
| $whr.Abort() |
| $pattern = '(?s)<div class="role-name">.*?<div class="role-intro-js">' |
| [regex]::Matches($txt,$pattern) | foreach { |
| $_.Value -replace '<.*?>','' -replace '\s{2,}','' -replace '演员',' 演员' -replace '-->简介:',"-->简介:`r`n`t" |
| } |
| |
| |
| Get-PartInfo -key $key -title $plot_main -max $part |
| '-----------------------------------------------------------------------------' |
| } |
| |
| |
| function Get-PartInfo($key,$title,$max){ |
| "`r`n【分集剧情】" |
| "`t剧情介绍`n`t`t" + $title |
| for($i = 1; $i -le $max; $i++){ |
| "`t第{0}集`t`t" -f $i |
| $url = 'https://www.km.com/tv/{0}/2_{1}.html' -f $key,$i |
| |
| $whr.Open('GET',$url,$false) |
| $whr.Send() |
| $txt = $whr.ResponseText |
| $whr.Abort() |
| |
| $pattern = '(?s)<div class="article-content">.*?</div>' |
| if($txt -match $pattern){ |
| "`t`t" + ($Matches[0] -replace '<.*?>','' -replace '\s{2,}',' ').Trim() |
| } |
| } |
| } |
| |
| |
| $cur_file = 'pass.txt' |
| $url_file = 'a.txt' |
| $out_file = 'info.txt' |
| |
| $cur = 0 |
| if([System.IO.File]::Exists($cur_file)){ |
| $line = Get-Content $cur_file | Select-Object -First 1 |
| if($line -match '\d+'){ |
| $cur = [int]$line |
| } |
| } |
| Write-Host ('上次解析个数: {0}' -f $cur) |
| |
| Get-Content $url_file | Select-Object -Skip $cur | foreach { |
| if($_ -match 'https://www.km.com/tv/(\d+).html'){ |
| |
| Get-MainInfo -key $Matches[1] | Out-File $out_file -Append |
| |
| ++$cur |
| Out-File -InputObject $cur -FilePath $cur_file |
| } |
| }COPY |