Python科幻小说下载器(页 1) - Python - 批处理之家 BAT,CMD,批处理,PowerShell,VBS,DOS

wujunkai 发表于 2019-5-12 15:54

Python科幻小说下载器

使用Python2.7.13[code]# encoding:utf-8
import urllib2
import re

beginning="http://www.kehuan.net.cn"

#解析目录或文字
def get(address,name):
web=urllib2.urlopen(beginning+address).read()
basis=re.findall(re.compile(r'(?<=<dd><a href=").+?(?=">)'),'r"'+web+'"')
if basis != []:
 fout=open(name.decode('utf-8')+'.log',"a+")
 fout.write(" "+name+'\n\n')
 fout.close()
 for i in basis:
 get(i,name)
else:
 if re.search(re.compile(r'(?<=<title>).+?(?=</title>)'),'r"'+web+'"'):
 title=re.search(re.compile(r'(?<=<title>).+?(?=</title>)'),'r"'+web+'"').group(0)
 else:
 title='runtime wrong'
 if re.findall(re.compile(r'(?<=).+?(?=)'),'r"'+web+'"'):
 article=re.findall(re.compile(r'(?<=).+?(?=)'),'r"'+web+'"')
 else:
 article=' '
 fout=open(name.decode('utf-8')+'.doc',"a+")
 fout.write(" "+title+'\n')
 for i in article:
 fout.write(" ")
 j=0
 while j < len(i):
 if i[j]!='&':
 fout.write(i[j])
 elif i[j+1]=='h' and i[j+2]=='e' and i[j+3]=='l' and i[j+4]=='l' and i[j+5]=='i' and i[j+6]=='p' and i[j+7]==';':
 fout.write('…')
 j=j+7
 elif i[j+1]=='m' and i[j+2]=='i' and i[j+3]=='d' and i[j+4]=='d' and i[j+5]=='o' and i[j+6]=='t' and i[j+7]==';':
 fout.write('·')
 j=j+7
 elif i[j+1]=='l' and i[j+2]=='d' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
 fout.write('“')
 j=j+6
 elif i[j+1]=='r' and i[j+2]=='d' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
 fout.write('”')
 j=j+6
 elif i[j+1]=='l' and i[j+2]=='s' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
 fout.write('‘')
 j=j+6
 elif i[j+1]=='r' and i[j+2]=='s' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
 fout.write('’')
 j=j+6
 elif i[j+1]=='m' and i[j+2]=='d' and i[j+3]=='a' and i[j+4]=='s' and i[j+5]=='h' and i[j+6]==';':
 fout.write('—')
 j=j+6
 elif i[j+1]=='q' and i[j+2]=='u' and i[j+3]=='o' and i[j+4]=='t' and i[j+5]==';':
 fout.write('"')
 j=j+5
 elif i[j+1]=='n' and i[j+2]=='b' and i[j+3]=='s' and i[j+4]=='p' and i[j+5]==';':
 fout.write('\n')
 j=j+5
 elif i[j+1]=='l' and i[j+2]=='t' and i[j+3]==';':
 fout.write('<')
 j=j+3
 elif i[j+1]=='g' and i[j+2]=='t' and i[j+3]==';':
 fout.write('>')
 j=j+3
 else:
 print i[j:j+7]
 j=j+1
 fout.write('\n')
 fout.write('\n')
 fout.close()

#主函数
web=urllib2.urlopen("http://www.kehuan.net.cn/author/liucixin.html").read()
result=re.findall(re.compile(r'(?<=<li><a href=").+?(?=">)'),'r"'+web+'"')
name=re.findall(re.compile(r'(?<=.html">).+?(?=</a>)'),'r"'+web+'"')
for i in range(0,len(name)):
if re.search(re.compile(r'(?<=).+?(?=)'),'r"'+name[i]+'"'):
 name[i]=re.search(re.compile(r'(?<=).+?(?=)'),'r"'+name[i]+'"').group(0)
for i in range(9,len(result)):
get(result[i],name[i])

[/code]速度有点慢，请谅解

页: [1]

批处理之家's Archiver

Python科幻小说下载器