Saturday, October 1, 2022

Index DICOM files with Python to csv

#!/usr/bin/env python

# This script will read DICOM Files and output headers to csv

import os
import sys
import os.path
import csv
import pydicom   # Load dicom tools.  Install with: pip install pydicom
import time
timestr = time.strftime("%Y%m%d-%H%M%S")
global success_count
global total_file_count
# files extensions not to index
file__ex_list = ['.xml','.gz','.zip','.txt','.doc','csv']
# set output file name
csvfilename = 'c:\\support\\DicomFileList-' + timestr + '.csv'
success_count = 0
total_file_count = 0
skipped_file_count = 0
# Set the path to scan for files
tpath = 'E:\\PACS-EXPORT' # \\20200130\\1.2.840.113619.2.226.6067236.1580392212.0.5\\1.2.840.113619.2.226.6067236.1580392212.0.6'
# create the header for the CSV file
csvheader = "SOPInstanceUID,StudyInstanceUID,AccessionNumber,PatientID,SeriesDescription,StudyDate,PatientName,Modality"
csvfile = open(csvfilename,'w', newline='')

#print(tpath)
# Recursively upload a directory

with csvfile:
    fnames = ['SOPInstanceUID','StudyInstanceUID','AccessionNumber','PatientID','SeriesDescription','StudyDate','PatientName','Modality','PatientBirthDate',"FileName"]
    csvwriter = csv.DictWriter(csvfile, fieldnames=fnames)
    csvwriter.writeheader()

    for root, dirs, files in os.walk(tpath):
        for f in files:
                total_file_count += 1
                #print(tpath)
                myfile = os.path.join(root,f)
                print("Looking at file -",myfile)
                #print(myfile.lower().endswith())
                if f.lower().endswith(tuple(file__ex_list)):
                    sys.stdout.write(" => skipping " + os.path.splitext(f)[1] + " file\n")
                    skipped_file_count += 1
                else:
                    success_count += 1
                    dicomdata = pydicom.dcmread(myfile)
                    csvwriter.writerow({
                                        'SOPInstanceUID' :  getattr(dicomdata,'SOPInstanceUID',''),
                                        'StudyInstanceUID' : getattr(dicomdata,'StudyInstanceUID',''),
                                        'AccessionNumber' : getattr(dicomdata,'AccessionNumber',''),
                                        'PatientID' : getattr(dicomdata,'PatientID',''),
                                        'SeriesDescription' : getattr(dicomdata,'SeriesDescription',''),
                                        'StudyDate' : getattr(dicomdata,'StudyDate',''),
                                        'PatientName' : getattr(dicomdata,'PatientName',''),
                                        'Modality' : getattr(dicomdata,'Modality',''),
                                        'PatientBirthDate' : getattr(dicomdata,'PatientBirthDate',''),
                                        "FileName" : myfile
                                        }) #patientName


if success_count == total_file_count:
    print("\nSummary: all %d DICOM file(s) have been processed successfully" % success_count)
else:
    print("\nSummary: %d out of %d files have been processed successfully as DICOM, with %d files skipped" % (success_count, total_file_count, skipped_file_count))