![]() |
|
PDF file split and copy - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: General (https://python-forum.io/forum-1.html) +--- Forum: Code sharing (https://python-forum.io/forum-5.html) +--- Thread: PDF file split and copy (/thread-38728.html) |
PDF file split and copy - rob101 - Nov-17-2022 Mostly in the interests of learning, but also for a practical application, I've been looking for a way to copy pages from a PDF file and write said pages to a new PDF file. Having done that, I've further developed the idea so that pages from different PDF files can be combined into a new PDF file. I used the pdfrw 0.4 project, for no other reason than it's the first project I came to. I may (if I find the time and motivation) at some point look at other ways to do this, but right now, this does what I need it to do. I hope you find it of use. If you find any bugs, please report back (maybe with a fix?) and I'll update the code. Of note: This code does not traverse directories; simply run from a location in which you have a copy of your PDF files: I take no responsibility for any loss of your files, so please work with copies, so that you know that you have a back-up. Thanks. #!/usr/bin/python3
# Install with: pip3 install pdfrw
from pdfrw import PdfReader, PdfWriter
import os
def read_file(opp):
head = "PDF File List"
print(head.center(2 * len(head)))
print(f"{'-'*len(head)}".center(2 * len(head)))
files = os.listdir()
file_number = 0
file_list = []
for file in files:
file_name = file.split(".")
if len(file_name) > 1 and file_name[1].lower() == 'pdf':
file_number += 1
file_list.append((file_number, file))
for index, file in enumerate(file_list):
print("{:2d}: {}".format(file_list[index][0], file_list[index][1]))
file_name = False
while not file_name:
try:
file_name = int(input(f"\n{opp}: "))
except:
print("Pick a file by its number.")
if file_name > len(file_list) or file_name == 0:
file_name = False
else:
file_name = file_list[file_name - 1]
print(f"Reading from {file_name[1]}")
return file_name[1]
#==============<end of read file>=============#
def page_select(file_name):
# pages are zero indexed for the file read
read_from = PdfReader(file_name)
page_list = []
process_list = []
copy_list = []
pages = len(read_from.pages) # returns the total number of pages, starting at one
print(f"\nNumber of pages in {file_name}: {pages}")
for page in range(pages):
page_list.append(page)
if pages == 1:
process_list.append(file_name)
process_list.append(0)
else:
process_list.append(file_name)
print()
print("For each page you want to copy, enter the page")
print("number and press enter.\n")
print("To finish, simply press enter.")
print()
copy_page = True
while copy_page:
copy_page = input("Page number to copy: ")
if copy_page:
copy_page = int(copy_page) - 1
if copy_page not in page_list:
print(
f"Page {copy_page+1} does not exist.\nPlease check your PDF file and page number.")
else:
process_list.append(copy_page)
copy_page = True
if copy_list:
print("\nPages to copy:")
for page in copy_list:
print(page + 1, end=' ')
print("\n")
return process_list
#=============<end of page select>============#
def file_select():
response = ("yes", "no")
pdf_select = True
while pdf_select:
pdf_select = input("Select another pdf file? ")
if pdf_select not in response:
print("Please answer yes or no")
pdf_select = True
elif pdf_select == 'yes':
return True
else:
return False
#=============<end of file select>============#
def write_file(process_list):
files = os.listdir()
file_name = False
overwrite = True
while not file_name:
file_name = input("Write to: ")
if file_name in files:
print(f"\nWARNING!\n{file_name} already exists.")
print("Do you want to overwrite that file?")
overwrite = input("(y)es or (n)o: ") in ("y",)
if overwrite:
print(f"Writing to file: {file_name}")
write_to = PdfWriter()
for file in process_list:
read_from = PdfReader(file[0])
for page in range(1, len(file)):
print(f"Adding page {file[page]+1} from file {file[0]}")
write_to.addpage(read_from.pages[file[page]])
write_to.write(file_name)
else:
return False
return True
#============<end of write file>=============#
process_list = []
pdf_select = True
while pdf_select:
read_from = read_file("Read from")
process_list.append(page_select(read_from))
pdf_select = file_select()
opp = write_file(process_list)
if opp:
input("\nFile successfully written.\nPress the enter key to exit.")
else:
input("\nOperation aborted.\nPress the enter key to exit.")As a foot note: I've not done any extensive testing, but I have found one PDF file that this will not read, but as it's a bank account statement, there could be some kind of security issue, but I can't say for sure, as I don't know enough about the topic.
|