本帖最后由 yu2n 于 2014-7-1 04:52 编辑
vbs 函数过程:
1. 调用wget: 下载网站所有页面到本脚本目录 ……
2. 扫描本脚本目录中所有文件 ……
3. 读取本脚本目录中的所有网页,匹配图片 URL 地址 ……
4. 保存所有图片 URL 地址到 url-img.txt 文件 ……
5. 调用wget: 下载 url-img.txt 指定的图片到本脚本 img 目录 …… | | | Call Main() | | Sub Main() | | | | | | If Not (LCase(Right(WScript.FullName,11)) = LCase("CScript.exe")) Then | | CreateObject("WScript.Shell").Run "cscript.exe //nologo """ & WScript.ScriptFullName & """", 1, False | | WScript.Quit(1) | | End If | | | | Dim wso, strMeDir | | Set wso = WScript.CreateObject("WScript.Shell") | | strMeDir = Left(WScript.ScriptFullName, InStrRev(WScript.ScriptFullName,"\")-1) | | | | WScript.Echo "1. 启动 wget下载网站所有页面到本脚本目录的 720.hao2046.net 文件夹 ……" | | wso.Run "wget -r -p -k -c -x -A=jpg,htm,html 720.hao2046.net -P """ & strMeDir & """", 1, True | | | | | | WScript.Echo "2. 扫描 720.hao2046.net 文件夹中所有文件 ……" | | Dim strFolderspec, strHTML, strURL | | Dim arr() : ReDim Preserve arr(0) | | strFolderspec = strMeDir & "\720.hao2046.net" | | Call ScanFolder(arr, strFolderspec) | | | | | | Dim regEx | | Set regEx = CreateObject("VBScript.RegExp") | | regEx.IgnoreCase = True | | regEx.Global = True | | regEx.MultiLine = True | | | | | | WScript.Echo "3. 读取 720.hao2046.net 文件夹中的所有网页,匹配图片 URL 地址 ……" | | For i = 0 To UBound(arr) | | If LCase(Right(arr(i),5)) = ".html" Or LCase(Right(arr(i),4)) = ".htm" Then | | | | strHTML = ReadPfile(arr(i), "gb2312") | | regEx.Pattern = "src=['""]http://\S+\.jpg['""]" | | Set Matches = regEx.Execute(strHTML) | | For Each Match in Matches | | If Not Match.Value = "" Then | | regEx.Pattern = "(src=['""])*(['""])*" | | strURL = strURL & regEx.Replace(Match.Value, "") & vbCrLf | | End If | | Next | | End If | | Next | | | | | | WScript.Echo "4. 保存所有图片 URL 地址到 url-img.txt 文件 ……" | | Call SavePfile(strMeDir & "\url-img.txt", "utf-8", strURL) | | | | | | WScript.Echo "5. 启动 wget 下载 url-img.txt 指定的图片到本脚本 img 目录 ……" | | wso.Run "wget -c -x -t 5 -i """ & strMeDir & "\url-img.txt"" -P """ & strMeDir & "\img""", 1, True | | | | Msgbox "完成!" | | End Sub | | | | | | | | Function ReadPfile(ByVal FileName, ByVal FileCode) | | Dim objStream | | Set objStream = CreateObject("ADODB.Stream") | | | | With objStream | | .Type = 2 | | .Mode = 3 | | .open | | .Charset = FileCode | | .LoadFromFile FileName | | ReadPfile = .ReadText | | .Close | | End With | | Set objStream = Nothing | | End Function | | | | | | | | Function SavePfile(ByVal FileName, ByVal FileCode, ByVal TextString) | | Dim objStream | | Set objStream = CreateObject("ADODB.Stream") | | With objStream | | .Type = 2 | | .Mode = 3 | | .Charset = FileCode | | .open | | .WriteText TextString | | .SaveToFile FileName, 2 | | .Close | | End With | | Set objStream = Nothing | | End Function | | | | | | | | Sub ScanFolder(ByRef arr, ByVal strFolderspec) | | On Error Resume Next | | Dim fso, objFolder | | Set fso = Createobject("Scripting.FileSystemObject") | | Set objFolder = fso.getfolder(strFolderspec) | | ReDim Preserve arr(UBound(arr)+1) | | arr(UBound(arr)) = strFolderspec & "\" | | For Each subFile In objFolder.files | | ReDim Preserve arr(UBound(arr)+1) | | arr(UBound(arr)) = subFile.path | | Next | | For Each subFolder In objFolder.subfolders | | ScanFolder arr, subFolder.path | | Next | | Set fso = NoThing | | Set objFolder = NoThing | | End Sub COPY |
附网页文件查找字符串代码(findstr_html.vbs): | | | Call Main() | | Sub Main() | | | | | | If Not (LCase(Right(WScript.FullName,11)) = LCase("CScript.exe")) Then | | CreateObject("WScript.Shell").Run "cscript.exe //nologo """ & WScript.ScriptFullName & """", 1, False | | WScript.Quit(1) | | End If | | | | Dim strMeDir | | strMeDir = Left(WScript.ScriptFullName, InStrRev(WScript.ScriptFullName,"\")-1) | | Dim regEx, strHTML, strURL | | | | | | Dim arr() : ReDim Preserve arr(0) | | Call ScanFolder(arr, strMeDir & "\720.hao2046.net") | | If UBound(arr) = 0 Then | | WScript.Echo strMeDir & "\720.hao2046.net" & ", Not Found!" | | Exit Sub | | End If | | | | | | Set regEx = CreateObject("VBScript.RegExp") | | regEx.IgnoreCase = True | | regEx.Global = True | | regEx.MultiLine = True | | | | | | Do | | strPattern = InputBox("请输入要匹配的正则表达式:","查找所有网页文件","123456") | | strInfo = strPattern & vbCrLf & "Not Found!" | | For i = 0 To UBound(arr) | | If LCase(Right(arr(i),5)) = ".html" Or LCase(Right(arr(i),4)) = ".htm" Then | | | | strHTML = ReadPfile(arr(i), "gb2312") | | If InStr(strHTML, strPattern)>0 Then | | strInfo = strPattern & vbCrLf & arr(i) & vbCrLf | | Exit For | | Else | | | | regEx.Pattern = strPattern | | Set Matches = regEx.Execute(strHTML) | | For Each Match in Matches | | If Not Match.Value = "" Then | | | | | | strInfo = strPattern & vbCrLf & arr(i) & vbCrLf | | Exit For | | End If | | Next | | End If | | End If | | Next | | WScript.Echo strInfo | | Loop | | End Sub | | | | | | | | | | Function ReadPfile(ByVal FileName, ByVal FileCode) | | Dim objStream | | Set objStream = CreateObject("ADODB.Stream") | | | | With objStream | | .Type = 2 | | .Mode = 3 | | .open | | .Charset = FileCode | | .LoadFromFile FileName | | ReadPfile = .ReadText | | .Close | | End With | | Set objStream = Nothing | | End Function | | | | | | | | Sub ScanFolder(ByRef arr, ByVal strFolderspec) | | On Error Resume Next | | Dim fso, objFolder | | Set fso = Createobject("Scripting.FileSystemObject") | | Set objFolder = fso.getfolder(strFolderspec) | | ReDim Preserve arr(UBound(arr)+1) | | arr(UBound(arr)) = strFolderspec & "\" | | For Each subFile In objFolder.files | | ReDim Preserve arr(UBound(arr)+1) | | arr(UBound(arr)) = subFile.path | | Next | | For Each subFolder In objFolder.subfolders | | ScanFolder arr, subFolder.path | | Next | | Set fso = NoThing | | Set objFolder = NoThing | | End Sub COPY |
提示:
1. 警告:请不要直接运行代码,这里的示范网址可能无法访问、或缺乏安全性,请改为其他网址再使用。
2. 请将 wget.exe 放置于脚本同一目录下,然后执行。文件结构如下:
..\wget.exe
..\wget_img.vbs
..\findstr_html.vbs |