[新手上路]批处理新手入门导读[视频教程]批处理基础视频教程[视频教程]VBS基础视频教程[批处理精品]批处理版照片整理器
[批处理精品]纯批处理备份&还原驱动[批处理精品]CMD命令50条不能说的秘密[在线下载]第三方命令行工具[在线帮助]VBScript / JScript 在线参考
返回列表 发帖
本帖最后由 flashercs 于 2018-9-21 09:51 编辑
  1. var xhr = (function () {
  2.     var aXMLHttpVers = ['MSXML2.XMLHTTP.6.0', 'MSXML2.XMLHTTP.3.0', 'MSXML2.XMLHTTP', 'Microsoft.XMLHTTP'];
  3.     for (var i = 0; i < aXMLHttpVers.length; i++) {
  4.         try {
  5.             return new ActiveXObject(aXMLHttpVers[i]);
  6.         } catch (error) { }
  7.     }
  8.     showError('Can\'t build XMLHTTP automation object.');
  9.     WScript.Quit(1);
  10. })();
  11. var fso = new ActiveXObject('Scripting.FileSystemObject'),
  12.     shell = new ActiveXObject('WScript.Shell'),
  13.     curDir,
  14.     url = 'http://www.chinapoesy.com/',
  15.     aTSIndex = ['TangShiAllIndex2.html', 'TangShiAllIndex1.html', 'TangShiAllIndex.html'],
  16.     aTSUrl = [],
  17.     arr,
  18.     outFile = 'tangshi.txt',//唐诗文本
  19.     reTSIndex = /<a[^>]+href=(['"])(.+?)\1[^>]*>\s*第\d+卷\s*<\/a>/gi,
  20.     rePoem = /<li[^>]+class="LiTitle"[^>]*>\s*<a[^>]+class="Green"[^>]*>\s*(.+?)\s*<\/a>[\S\s]+?<\/li>\s*<li[^>]+class="LiContent[^"]*"[^>]*>\s*<b>(.+?)<\/b>([\S\s]+?)<\/li>/gi,
  21.     rePB = /<[^>]*>|\r|\n/g,
  22.     reNextPage = /<a[^>]+href="\/([^"]+)"[^>]*>\s*<img\s+src="\/Images\/Pager\/nextn.gif"\s+border="0"[^>]*>\s*<\/a>/i,
  23.     aFields = ['作者', '标题', '正文'],
  24.     sSplit = '\t',
  25.     ts,
  26.     sHtml,
  27.     i,
  28.     l,
  29.     timer;
  30. timer = new Date();
  31. curDir = shell.CurrentDirectory = fso.GetParentFolderName(WScript.ScriptFullName);
  32. try {
  33.     ts = fso.OpenTextFile(outFile, 2, true);
  34. } catch (err) {
  35.     showError(err, 'Writing to ' + outFile);
  36.     WScript.Quit(2);
  37. }
  38. for (i = aTSIndex.length; i >= 0; i--) {
  39.     sHtml = getHtml(url + aTSIndex[i]);
  40.     while (arr = reTSIndex.exec(sHtml)) {
  41.         aTSUrl.push(arr[2]);
  42.     }
  43. }
  44. ts.WriteLine(aFields.join(sSplit));
  45. for (i = 0, l = aTSUrl.length; i < l; i++) {
  46.     writePoem(url + aTSUrl[i]);
  47.     //next page
  48.     while (arr = reNextPage.exec(sHtml)) {
  49.         writePoem(url + arr[1]);
  50.     }
  51. }
  52. ts.Close();
  53. WScript.Echo('Mission complete.\nTime elapsed: ' + (new Date() - timer) / 1000 + 's');
  54. WScript.Quit();
  55. function getHtml(URL) {
  56.     xhr.open('GET', URL, false);
  57.     xhr.send();
  58.     if (200 === xhr.status) {
  59.         return xhr.responseText;
  60.     }
  61.     showError('fetch URI "' + URL + '" failed.\nstatus: ' + xhr.status);
  62.     return '';
  63. }
  64. function writePoem(URL) {
  65.     sHtml = getHtml(URL);
  66.     while (arr = rePoem.exec(sHtml)) {
  67.         ts.WriteLine([arr[1], arr[2], arr[3].replace(rePB, '')].join(sSplit));
  68.     }
  69. }
  70. function showError(err, source) {
  71.     WScript.Echo('[object Error]' === Object.prototype.toString.call(err) ?
  72.         [
  73.             err.name,
  74.             'source: ' + (undefined === source ? '' : source),
  75.             'number: ' + (err.number >>> 0).toString(16),
  76.             'equipment: ' + (err.number >> 16 & 0x1FFF),
  77.             'code: ' + (err.number & 0xFFFF),
  78.             'Information: ' + err.message
  79.         ].join('\n')
  80.         :
  81.         err);
  82. }
复制代码
2

评分人数

TOP

回复 3# lxh623


不明白楼主的意思,语言表达不清晰。
想要什么说不明白。
你不会发个截图吗?

TOP

本帖最后由 flashercs 于 2018-9-24 01:37 编辑
  1. (function(e,t){function i(e,t,i,r,n){this.level=e,this.reChild=t,this.a2=r,this.next=n,this.tableText="",this.tableID=i}function r(e,t,i){var n,a,h,u=s(e);if(null===t.next)return n=t.getContent(u),void o.WriteLine([n[0],i[2],i[1],n[1]].join(f));for("author"===t.level&&(d[i[0]]&&(n=RegExp("<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>\\s*"+l(d[i[0]])+"\\s*</a>","i").exec(u))&&(u=s(x+n[1])),a=T[i[0]]);;){for(;n=t.getChildren(u,a);){if(i.push("author"===t.level?"作者:"+n[1]:n[1]),"title"===t.level){h=n[1]+".txt";try{o=p.OpenTextFile(h,2,!0)}catch(e){c(e,"Opening file "+h);continue}}r(x+n[0],t.next,i),i.pop(),"title"===t.level&&o.Close()}if(!(n=b.exec(u)))break;u=s(x+n[1])}}function n(e){var t,i,n,l,a=["TangShiAllIndex2.html","TangShiAllIndex1.html","TangShiAllIndex.html"],h=/<a[^>]+href=(['"])(.+?)\1[^>]*>\s*第\d+卷\s*<\/a>/gi,u=/<li[^>]+class="LiTitle"[^>]*>\s*<a[^>]+href=["']([^"']+)["'][^>]+?class="Green"[^>]*>\s*(.+?)\s*<\/a>[\S\s]+?<\/li>/gi,g={};try{o=p.OpenTextFile(e,2,!0)}catch(t){return c(t,"Writing to "+e),!1}for(l=a.length-1;l>=0;l--)for(t=s(x+a[l]);n=h.exec(t);)for(i=s(x+n[2]);;){for(;n=u.exec(i);)g[n[1]]||(r(x+n[1],oArc.next.next,["全唐诗","作者:"+n[2]]),g[n[1]]=!0);if(!(n=b.exec(i)))break;i=s(x+n[1])}o.Close()}function l(e){return e.replace(/[()]/g,"\\$&")}function a(e){return e.replace(/^\s+|\s+$|/g,"")}function s(e){return u.open("GET",e,!1),u.send(),200===u.status?u.responseText:(c('fetch URI "'+e+'" failed.\nstatus: '+u.status),"")}function c(e,t){WScript.Echo("[object Error]"===Object.prototype.toString.call(e)?[e.name,"source: "+(void 0===t?"":t),"number: "+(e.number>>>0).toString(16),"equipment: "+(e.number>>16&8191),"code: "+(65535&e.number),"Information: "+e.message].join("\n"):e)}var o,h,u=function(){for(var e=["MSXML2.XMLHTTP.6.0","MSXML2.XMLHTTP.3.0","MSXML2.XMLHTTP","Microsoft.XMLHTTP"],t=0;t<e.length;t++)try{return new ActiveXObject(e[t])}catch(e){}c("Can't build XMLHTTP automation object."),WScript.Quit(1)}(),p=new ActiveXObject("Scripting.FileSystemObject"),g=new ActiveXObject("WScript.Shell"),x="http://www.chinapoesy.com/",b=/<a[^>]+href="\/([^"]+)"[^>]*>\s*<img\s+src="\/Images\/Pager\/nextn.gif"\s+border="0"[^>]*>\s*<\/a>/i,f="\t",d={"近现代诗":"近现代诗人全集","唐诗":"更多唐诗诗人","宋词":"更多宋词词人","元曲":"更多元曲名家","外国诗歌":"更多外国诗人"},T={"宋词":'id="DDlSongCi"',"元曲":'id="DDlSongCi"',"唐诗":'id="DDlTang"'};i.prototype.getChildren=function(e){var t;return""===this.tableText&&(this.tableText=RegExp("<table[^>]+"+l(this.tableID)+"[^>]*>[\\S\\s]+?</table>","i").exec(e)),(t=this.reChild.exec(this.tableText))?[t[this.a2[0]],t[this.a2[1]]]:(this.tableText="",null)},oArc=new i("content",null,"",null,null),oArc.getContent=function(e,t,i){var r;r=e.split(/<script\s+type="text\/javascript"[^>]*>\s*\$\("#loading"\)\.css\("display","none"\);\s*<\/script>|<(\w+)[^>]+id="Author\d*"[^>]*>[\S\s]*?<\/\1>|<script\s+type="text\/javascript">\s*<!--\s*google_ad_client[^<>]+?google_ad_slot[^<>]+?-->\s*<\/script>/gi);try{return[a(r[1].replace(/(?:<[^<>]*>|&nbsp;)+/g,"")).replace(/[\r\n]/g," "),r[2].replace(/(?:<[^<>]*>|&nbsp;)+/g,function(e){return/(?:<br\s*\/?>|<\/br>)+/i.test(e)?"<br />":""}).replace(/(?:<br \/>|\s)+/gi,"<br />")]}catch(e){return[]}},oArc=new i("article",/<a[^>]+href=["']([^"']+)["'][^>]*>([^<>]+)<\/a>/gi,'id="DLFeelingChina"',[1,2],oArc),oArc.getChildren=function(e,t){var i;return""===this.tableText&&(this.tableText=RegExp("<table[^>]+"+l(t||this.tableID)+"[^>]*>[\\S\\s]+?</table>","i").exec(e)),(i=this.reChild.exec(this.tableText))?[i[this.a2[0]],/\((\d+)\)/.exec(i[this.a2[1]])[1]]:(this.tableText="",null)},oArc=new i("author",/<a[^>]+href=["']([^"']+)["'][^>]*>([^<>]+)<\/a>/gi,'id="DLAtuhor"',[1,2],oArc),oArc.getChildren=function(e,t){var i;return""===this.tableText&&(this.tableText=RegExp("<table[^>]+"+l(t||this.tableID)+"[^>]*>[\\S\\s]+?</table>","i").exec(e)),(i=this.reChild.exec(this.tableText))?[i[this.a2[0]],i[this.a2[1]].replace(/\s+|\(\d*\)/g,"")]:(this.tableText="",null)},oArc=new i("title",RegExp("<a[^>]+href=[\"']/([^\"']+)[\"'][^>]*>("+t.join("|")+")</a>","gi"),'class="top"',[1,2],oArc),h=new Date,g.CurrentDirectory=p.GetParentFolderName(WScript.ScriptFullName),e&&n("全唐诗.txt"),r(x,oArc,[]),WScript.Echo("Mission complete.\nTime elapsed: "+(new Date-h)/1e3+"s"),WScript.Quit()})(!0,["唐诗","宋词","全宋词","元曲","诗经","楚辞","乐府诗集","近现代诗","外国诗歌"]);
  2. //第一个参数是否下载全唐诗,1=是,0=否;
  3. //第二个参数是除全唐诗以外的其他栏目列表。
复制代码
1

评分人数

TOP

本帖最后由 flashercs 于 2018-9-23 18:07 编辑

回复 18# lxh623


    四个字段分别是:作品名 人气热度 作者:作者名 正文

第二个数字是人气

47行 blQT=false;改为blQT=true;就是下载全唐诗
  1. blQT = true;
复制代码
这网站有很多已经删除的页面 站长没有统计 结果就出现404访问资源错误
于是全唐诗四万三千多怕是假数据。
外国诗歌哪有那么多?你看下下载的外国诗歌的最后一条是哪个诗人的什么诗歌,然后去网站最后一页去看看是否一致再说吧。
注:下载完了记得修改下载的文件名,如 “全唐诗.txt" 改名,否则再次下载会重新下载的,会浪费时间。
个人认为 作品名跟热度是两个字段,要分割开的好。
1

评分人数

TOP

回复 20# lxh623


    修改了下可以下载完整外国诗人了。

TOP

回复 23# lxh623

TOP

返回列表