本帖最后由 WHY 于 2018-9-30 19:19 编辑
| var fso = new ActiveXObject('Scripting.FileSystemObject'); | | var http = new ActiveXObject('Microsoft.XMLHTTP'); | | | | | | var map = {'散文精选':'sanwenjingxuan'}; | | var home = 'http://www.ceasm.com/'; | | | | for (var key in map) { | | var url = home + map[key] + '/'; | | var reg = /<h4><a href="\/([^"]*\.html)" target="_blank">([^<>]*)<\/a><\/h4>/g; | | while(url != '') { | | var txt = getText(url); | | var m = txt.match(/<a href='([^']*\.html)'>下一页/); | | url = m ? home + map[key] + m[1] : ''; | | txt = txt.split('<ul class="dedelist">')[1] | | if(!txt) continue; | | txt = txt.split('</ul>')[0]; | | while(arr = reg.exec(txt)) { | | writeToFile('A' + arr[2] + '\r\n' + getSentence(home + arr[1])); | | } | | } | | } | | | | function getText(url) { | | http.open('GET', url, false); | | http.send(); | | with(new ActiveXObject('ADODB.Stream')){ | | Mode = 3; Type = 1; | | Open(); | | Write(http.responseBody); | | Position = 0; | | Type = 2; | | Charset = 'GB2312'; | | var s = ReadText(-1); | | } | | return s; | | } | | | | function getSentence(url) { | | var stc = []; | | while(url != '') { | | var s = getText(url); | | var m = s.match(/<a href='([^']*\.html)'>下一页/); | | url = m ? url.replace(/[^/]*$/, '') + m[1] : ''; | | s = s.split('<div class="text">')[1]; | | if(!s) continue; | | s = s.split('</table>')[0]; | | s = s.replace(/(?:<\/strong>)?<\/p>/g, '\r\n'); | | s = s.replace(/<[^>]*>| /g, ''); | | s = s.replace(/^[ ]+/gm, ''); | | s = s.replace(/(\r?\n)+/g, '$1').replace(/^(?:\r?\n)|(?:\r?\n)$/g, ''); | | s = s.replace(/'/g, "'"); | | s = s.replace(/‘/g, '‘').replace(/’/g, '’').replace(/…/g,'…'); | | s = s.replace(/·/g, '·').replace(/“/g, '“').replace(/”/g, '”'); | | s = s.replace(/—/g, '—').replace(/"/g, '"'); | | stc.push(s); | | } | | return stc.join('\r\n'); | | } | | | | function writeToFile(str) { | | var objFile = fso.OpenTextFile(key + '.Log', 8, true); | | objFile.WriteLine(str); | | objFile.Close(); | | } | | | | WSH.Echo('Done')COPY |
|