- #&cls&@cd /d "%~dp0" & powershell -c "Get-Content '%~0' | Select-Object -Skip 1 | Out-String | Invoke-Expression" &pause&exit
- cls
- #whr
- $whr = New-Object -ComObject 'WinHttp.WinHttpRequest.5.1'
-
- #函数 获取一页列表
- function Get-PageList($page){
- $url = 'https://www.km.com/tv/-----{0}.html' -f $page
- $whr.Open('GET',$url,$false)
- $whr.Send()
- if($whr.Status -eq 200){
- $txt = $whr.ResponseText
- $whr.Abort()
- $pattern = '(?<=<div class="filter_res_image scale_image_container">\s+<a href=").*?(?=")'
- [regex]::Matches($txt,$pattern) | foreach {
- if($_.Value -match '/(\d+).html$'){
- $Matches[1]
- }
- }
- }
- }
-
- #函数 获取主要信息
- function Get-MainInfo($key){
- #主要信息界面
- $url = 'https://www.km.com/tv/{0}.html' -f $key
- $whr.Open('GET',$url,$false)
- $whr.Send()
- $txt = $whr.ResponseText
- $whr.Abort()
-
- #标题
- $pattern = '(?s)<div class="inside-title.*?>.*?>(.*?)<'
- if($txt -match $pattern){
- '名称: TTT' + $Matches[1]
- Write-Host ('解析电视剧: ' + $Matches[1])
- }
- #图片地址
- $pattern = '(?s)<div class="video_poster.*?">.*?<img src="(.*?)"'
- if($txt -match $pattern){
- '图片地址: ''https:' + $Matches[1]
- }
- #概览
- "`r`n【概览】"
- $pattern = '(?s)mr20">(.*?)<a class="unfold"'
- if($txt -match $pattern){
- $t = $Matches[1] -replace '(?s)<script>.*?</script>','' -replace '<.*?>','' -replace '\s{2,}',''
- $t -replace '(导演:)|(主演:)|(地区/类型:)|(剧情集数:)|(播出时间:)|(在线观看网站:)|(别名:)|(片长:)',("`r`n" + '$0')
- }
-
- #分数
- $pattern = '(?s)<p class="score_num.*?>.*?>(.*?)<'
- if($txt -match $pattern){
- '综合评分: ' + $Matches[1]
- }
-
- #幕后信息
- "`r`n【幕后信息】"
- $pattern = '(?s)幕后信息</div>(.*?)<a class="intro_fold'
- if($txt -match $pattern){
- $Matches[1] -replace '<.*?>','' -replace '\s{2,}','' -replace '(编剧:)|(制片人:)|(TV首播时间:)|(在线播放平台:)|(出品公司:)|《',("`r`n" + '$0')
- }
-
- #演员表
- "`r`n【演员表】"
- $url = 'https://www.km.com/tv/yanyuan/{0}.html' -f $key
- $whr.Open('GET',$url,$false)
- $whr.Send()
- $txt = $whr.ResponseText
- $whr.Abort()
-
-
- $pattern = '(?s)<div class="actor-list-detail same_col">.*?<div class="actor-works same_col_bottom">'
- [regex]::Matches($txt,$pattern) | foreach {
- 'ZZZ' + ($_.Value -replace '<.*?>','' -replace '\s{2,}','' -replace '饰演'," 饰演 ")
- }
-
- #人物介绍
- "`r`n【人物介绍】"
- $url = 'https://www.km.com/tv/role/{0}.html' -f $key
- $whr.Open('GET',$url,$false)
- $whr.Send()
- $txt = $whr.ResponseText
- $whr.Abort()
- $pattern = '(?s)<div class="role-name">.*?<div class="role-intro-js">'
- [regex]::Matches($txt,$pattern) | foreach {
- $_.Value -replace '<.*?>','' -replace '\s{2,}','' -replace '演员',' 演员' -replace '-->简介:',"-->简介:`r`n`t"
- }
- }
-
- #函数 一页数据保存为一个文件
- function Save-PageInfo($page){
- #如果本页已解析,则跳过
- $f = 'page_{0:000}.txt' -f $page
- if([System.IO.File]::Exists($f)){
- '第{0}页已解析,跳过...' -f $page
- return;
- }
- #开始解析
- &{
- Get-PageList -page 1 | foreach {
- Get-MainInfo -key $_
- '----------------------------------'
- }
- } | Out-File $f
- }
-
- #解析并保存第一页数据
- Save-PageInfo -page 1
复制代码
|