不会用Python,查了挺久,乱凑出来…- import re
- from urllib import request
- import os
- from pprint import pprint
- from time import sleep
-
- url_imgs = 'http://img.xgyw.cc/'
- url_root = 'http://www.xgyw.cc'
- url_find = url_root + '/neiyiyouwu/neiyiyouwu2035.html'
- path_save = '.'
-
- try:
- txt=request.urlopen(url_find).read().decode("gbk")
- rs_page=re.findall(r'''(/(\w+)/(\2)\d+_?\d*.html)''',txt)
- print('\n### rs_page::\n', rs_page)
- except:
- sleep(1)
- pass
-
- nPage = 0
- for x in rs_page:
- nPage += 1
- url_page = url_root + x[0]
- try:
- html=request.urlopen(url_page).read().decode("gbk")
- rs_img=re.findall('''src=\"(/uploadfile.*?\d+/\w+\.jpg)\"''',html)
- print('\n### Page:' + str(nPage) + '/' + str(len(rs_page)))
- print('\t### ' + url_page)
- except:
- sleep(1)
-
- nImgs = 0
- for h in rs_img:
-
- nImgs += 1
-
- fn = os.path.basename(h)
- dp = os.path.basename(url_find)
- #dp = url_find[url_find.rfind('/')+1:]
- dp = path_save + '/' + dp[:dp.rfind('.')]
- if (os.path.exists(dp) == False) : os.makedirs(dp) #建立文件夹
-
- ff = (dp + '/' + fn).replace('/', '\\')
- url_img = url_imgs + h
-
- print('\n### Page:' + str(nPage) + '/' + str(len(rs_page)), '\tImgs:' + str(nImgs) + '/' + str(len(rs_img)))
- print('\t### ' + url_page)
- print('\t### ', url_img)
- print('\t### ->', ff)
-
- try:
- if (os.path.exists(ff) == False):
- print('\t### Download ...')
- request.urlretrieve(url_img, ff)
- print('\t\t### [OK]')
- else:
- print('\t### [EXIST]')
-
- except:
- print('\t\t### Error:0x3')
- sleep(1)
-
- print('\n### Done.')
复制代码
|