//Larry
Version with Comments
# Python3 program to bulk convert image file to pdf
# Mass convert tiff files to pdf
# Larry Billinghurst - 14 Aug 2023
# using img2pdf library
# Note: will overwrite target directory files
# importing necessary libraries
import img2pdf
from PIL import Image
import os
import datetime
import argparse
import time
# Set up the parser and add arguments
parser = argparse.ArgumentParser(description="Convert image files to PDF")
parser.add_argument("--source_dir", default="C:/support/exadocs",
help="Directory of the source images")
parser.add_argument("--output_dir", default="C:/support/exadocs-pdf",
help="Directory to save the converted PDFs")
# Parse the command line arguments
args = parser.parse_args()
# Start the timer
start_time = time.time()
# Initialize the file counter
file_counter = 0
# Use the arguments
source_dir = args.source_dir
output_dir = args.output_dir
print(f"Using source directory: {source_dir}")
print(f"Saving to output directory: {output_dir}")
# Create a timestamp
current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
# Path for the log file with timestamp
log_file_path = f"C:/support/exadocs-log_{current_timestamp}.txt"
# File Extension to look for
file_extension_list = ('.tif','.jpeg')
# Get logfile ready to write if needed
def log_error(message):
"""Append error messages to a log file."""
with open(log_file_path, 'a') as log_file:
log_file.write(message + "\n")
# loop through all files in source directory
for target_file in os.listdir(source_dir):
filename_full = os.path.basename(target_file)
# Store the filename without extension using the [0]
filename = os.path.splitext(filename_full)[0]
# Build the pdf target filename with output directory
pdf_target_file = output_dir + '/' + filename + '.pdf'
# Set source filename with path
source_file = source_dir + '/' + target_file
print(source_file)
print(pdf_target_file)
# Check if we have a matching file extension
if target_file.endswith(file_extension_list):
try:
# Open and verify the image
with Image.open(source_file) as img:
img.verify()
file_counter += 1 # Increment the file counter
# Convert the image to PDF
# Open pdf target file as "wb" write binary
with open(pdf_target_file,"wb") as out_file:
out_file.write(img2pdf.convert(source_file))
except Exception as e:
print("Verification or conversion failed for "
f"{source_file}. Error: {e}")
# Print the number of files processed
print(f"Number of files processed: {file_counter}")
# Calculate and print the elapsed time
end_time = time.time()
elapsed_time = end_time - start_time
hours, remainder = divmod(elapsed_time, 3600)
minutes, seconds = divmod(remainder, 60)
print(f"Script executed in: {int(hours)} hours, "
f"{int(minutes)} minutes, {seconds:.2f} seconds")
# End of Code
# Python3 program to bulk convert image file to pdf
import argparse
import datetime
import os
import time
from PIL import Image
import img2pdf
def log_error(message):
with open(log_file_path, 'a') as log_file:
log_file.write(message + "\n")
parser = argparse.ArgumentParser(description="Convert image files to PDF")
parser.add_argument("--source_dir", default="C:/support/exadocs",
help="Directory of source images")
parser.add_argument("--output_dir", default="C:/support/exadocs-pdf",
help="Directory for PDFs")
args = parser.parse_args()
start_time = time.time()
file_counter = 0
log_file_path = f"C:/support/exadocs-log_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
for target_file in os.listdir(args.source_dir):
if target_file.endswith(('.tif', '.jpeg')):
source_file = os.path.join(args.source_dir, target_file)
pdf_target_file = os.path.join(args.output_dir, os.path.splitext(target_file)[0] + '.pdf')
try:
with Image.open(source_file) as img:
img.verify()
with open(pdf_target_file, "wb") as out_file:
out_file.write(img2pdf.convert(source_file))
file_counter += 1
except Exception as e:
log_error(f"Failed for {source_file}. Error: {e}")
end_time = time.time()
hours, remainder = divmod(end_time - start_time, 3600)
minutes, seconds = divmod(remainder, 60)
print(f"Processed: {file_counter} files in {int(hours)}h {int(minutes)}m {seconds:.2f}s.")
No comments:
Post a Comment