标题: [原创代码] Python科幻小说下载器 [打印本页]
作者: wujunkai 时间: 2019-5-12 15:54 标题: Python科幻小说下载器
使用Python2.7.13- # encoding:utf-8
- import urllib2
- import re
-
- beginning="http://www.kehuan.net.cn"
-
- #解析目录或文字
- def get(address,name):
- web=urllib2.urlopen(beginning+address).read()
- basis=re.findall(re.compile(r'(?<=<dd><a href=").+?(?=">)'),'r"'+web+'"')
- if basis != []:
- fout=open(name.decode('utf-8')+'.log',"a+")
- fout.write(" "+name+'\n\n')
- fout.close()
- for i in basis:
- get(i,name)
- else:
- if re.search(re.compile(r'(?<=<title>).+?(?=</title>)'),'r"'+web+'"'):
- title=re.search(re.compile(r'(?<=<title>).+?(?=</title>)'),'r"'+web+'"').group(0)
- else:
- title='runtime wrong'
- if re.findall(re.compile(r'(?<=<p>).+?(?=</p>)'),'r"'+web+'"'):
- article=re.findall(re.compile(r'(?<=<p>).+?(?=</p>)'),'r"'+web+'"')
- else:
- article=' '
- fout=open(name.decode('utf-8')+'.doc',"a+")
- fout.write(" "+title+'\n')
- for i in article:
- fout.write(" ")
- j=0
- while j < len(i):
- if i[j]!='&':
- fout.write(i[j])
- elif i[j+1]=='h' and i[j+2]=='e' and i[j+3]=='l' and i[j+4]=='l' and i[j+5]=='i' and i[j+6]=='p' and i[j+7]==';':
- fout.write('…')
- j=j+7
- elif i[j+1]=='m' and i[j+2]=='i' and i[j+3]=='d' and i[j+4]=='d' and i[j+5]=='o' and i[j+6]=='t' and i[j+7]==';':
- fout.write('·')
- j=j+7
- elif i[j+1]=='l' and i[j+2]=='d' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
- fout.write('“')
- j=j+6
- elif i[j+1]=='r' and i[j+2]=='d' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
- fout.write('”')
- j=j+6
- elif i[j+1]=='l' and i[j+2]=='s' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
- fout.write('‘')
- j=j+6
- elif i[j+1]=='r' and i[j+2]=='s' and i[j+3]=='q' and i[j+4]=='u' and i[j+5]=='o' and i[j+6]==';':
- fout.write('’')
- j=j+6
- elif i[j+1]=='m' and i[j+2]=='d' and i[j+3]=='a' and i[j+4]=='s' and i[j+5]=='h' and i[j+6]==';':
- fout.write('—')
- j=j+6
- elif i[j+1]=='q' and i[j+2]=='u' and i[j+3]=='o' and i[j+4]=='t' and i[j+5]==';':
- fout.write('"')
- j=j+5
- elif i[j+1]=='n' and i[j+2]=='b' and i[j+3]=='s' and i[j+4]=='p' and i[j+5]==';':
- fout.write('\n')
- j=j+5
- elif i[j+1]=='l' and i[j+2]=='t' and i[j+3]==';':
- fout.write('<')
- j=j+3
- elif i[j+1]=='g' and i[j+2]=='t' and i[j+3]==';':
- fout.write('>')
- j=j+3
- else:
- print i[j:j+7]
- j=j+1
- fout.write('\n')
- fout.write('\n')
- fout.close()
-
-
- #主函数
- web=urllib2.urlopen("http://www.kehuan.net.cn/author/liucixin.html").read()
- result=re.findall(re.compile(r'(?<=<li><a href=").+?(?=">)'),'r"'+web+'"')
- name=re.findall(re.compile(r'(?<=.html">).+?(?=</a>)'),'r"'+web+'"')
- for i in range(0,len(name)):
- if re.search(re.compile(r'(?<=<strong>).+?(?=</strong>)'),'r"'+name[i]+'"'):
- name[i]=re.search(re.compile(r'(?<=<strong>).+?(?=</strong>)'),'r"'+name[i]+'"').group(0)
- for i in range(9,len(result)):
- get(result[i],name[i])
复制代码
速度有点慢,请谅解
欢迎光临 批处理之家 (http://bbs.bathome.net/) |
Powered by Discuz! 7.2 |