回复 1# 2652boy - import argparse
- from glob import glob
- from os.path import exists, join
- from PyPDF2 import PdfFileReader
-
- def get_total_pages(folder, recursive=False):
- if not exists(folder):
- return "Error: No such file or directory: {}".format(folder)
- if recursive:
- pdf_list = glob(join(folder, "**/*.pdf"), recursive=True)
- else:
- pdf_list = glob(join(folder, "*.pdf"), recursive=False)
-
- pages = []
- for pdf in pdf_list:
- reader = PdfFileReader(pdf)
- num_page = reader.getNumPages()
- # print("%d pages <-- '%s'" % (num_page, pdf))
- pages.append(num_page)
- return sum(pages)
-
- if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument('folder', type=str, help='path to the folder where PDF files are stored.')
- parser.add_argument('--recursive', '-r', action='store_true', help='search PDF files in the <folder> recursively.')
- args = parser.parse_args()
- total_pages = get_total_pages(args.folder, recursive=args.recursive)
- print("===============================\nTotal pages in '%s': %d" % (args.folder, total_pages))
复制代码 将以上文本以UTF-8编码保存为count_pdf_pages.py,假设要统计"D:\tmp\pdf"文件夹(含子文件夹)下所有pdf文档的页数总数,可执行以下指令获得:
python count_pdf_pages.py "D:\tmp\pdf" -r |