scripts/anonymize_dicom.py

#!/usr/bin/env python3
"""
Anonymize DICOM files by removing or replacing Protected Health Information (PHI).

Usage:
    python anonymize_dicom.py input.dcm output.dcm
    python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
"""

import argparse
import sys
from pathlib import Path

try:
    import pydicom
except ImportError:
    print("Error: pydicom is not installed. Install it with: pip install pydicom")
    sys.exit(1)


# Tags commonly containing PHI (Protected Health Information)
PHI_TAGS = [
    'PatientName', 'PatientID', 'PatientBirthDate', 'PatientBirthTime',
    'PatientSex', 'PatientAge', 'PatientSize', 'PatientWeight',
    'PatientAddress', 'PatientTelephoneNumbers', 'PatientMotherBirthName',
    'MilitaryRank', 'EthnicGroup', 'Occupation', 'PatientComments',
    'InstitutionName', 'InstitutionAddress', 'InstitutionalDepartmentName',
    'ReferringPhysicianName', 'ReferringPhysicianAddress',
    'ReferringPhysicianTelephoneNumbers', 'ReferringPhysicianIdentificationSequence',
    'PerformingPhysicianName', 'PerformingPhysicianIdentificationSequence',
    'OperatorsName', 'PhysiciansOfRecord', 'PhysiciansOfRecordIdentificationSequence',
    'NameOfPhysiciansReadingStudy', 'PhysiciansReadingStudyIdentificationSequence',
    'StudyDescription', 'SeriesDescription', 'AdmittingDiagnosesDescription',
    'DerivationDescription', 'RequestingPhysician', 'RequestingService',
    'RequestedProcedureDescription', 'ScheduledPerformingPhysicianName',
    'PerformedLocation', 'PerformedStationName',
]


def anonymize_dicom(input_path, output_path, patient_id='ANONYMOUS', patient_name='ANONYMOUS'):
    """
    Anonymize a DICOM file by removing or replacing PHI.

    Args:
        input_path: Path to input DICOM file
        output_path: Path to output anonymized DICOM file
        patient_id: Replacement patient ID (default: 'ANONYMOUS')
        patient_name: Replacement patient name (default: 'ANONYMOUS')
    """
    try:
        # Read DICOM file
        ds = pydicom.dcmread(input_path)

        # Track what was anonymized
        anonymized = []

        # Remove or replace sensitive data
        for tag in PHI_TAGS:
            if hasattr(ds, tag):
                if tag == 'PatientName':
                    ds.PatientName = patient_name
                    anonymized.append(f"{tag}: replaced with '{patient_name}'")
                elif tag == 'PatientID':
                    ds.PatientID = patient_id
                    anonymized.append(f"{tag}: replaced with '{patient_id}'")
                elif tag == 'PatientBirthDate':
                    ds.PatientBirthDate = '19000101'
                    anonymized.append(f"{tag}: replaced with '19000101'")
                else:
                    delattr(ds, tag)
                    anonymized.append(f"{tag}: removed")

        # Anonymize UIDs if present (optional - maintains referential integrity)
        # Uncomment if you want to anonymize UIDs as well
        # if hasattr(ds, 'StudyInstanceUID'):
        #     ds.StudyInstanceUID = pydicom.uid.generate_uid()
        # if hasattr(ds, 'SeriesInstanceUID'):
        #     ds.SeriesInstanceUID = pydicom.uid.generate_uid()
        # if hasattr(ds, 'SOPInstanceUID'):
        #     ds.SOPInstanceUID = pydicom.uid.generate_uid()

        # Save anonymized file
        ds.save_as(output_path)

        return True, anonymized

    except Exception as e:
        return False, str(e)


def main():
    parser = argparse.ArgumentParser(
        description='Anonymize DICOM files by removing or replacing PHI',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python anonymize_dicom.py input.dcm output.dcm
  python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
  python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001 --patient-name "Anonymous^Patient"
        """
    )

    parser.add_argument('input', type=str, help='Input DICOM file')
    parser.add_argument('output', type=str, help='Output anonymized DICOM file')
    parser.add_argument('--patient-id', type=str, default='ANONYMOUS',
                       help='Replacement patient ID (default: ANONYMOUS)')
    parser.add_argument('--patient-name', type=str, default='ANONYMOUS',
                       help='Replacement patient name (default: ANONYMOUS)')
    parser.add_argument('-v', '--verbose', action='store_true',
                       help='Show detailed anonymization information')

    args = parser.parse_args()

    # Validate input file exists
    input_path = Path(args.input)
    if not input_path.exists():
        print(f"Error: Input file '{args.input}' not found")
        sys.exit(1)

    # Anonymize the file
    print(f"Anonymizing: {args.input}")
    success, result = anonymize_dicom(args.input, args.output,
                                     args.patient_id, args.patient_name)

    if success:
        print(f"✓ Successfully anonymized DICOM file: {args.output}")
        if args.verbose:
            print(f"\nAnonymized {len(result)} fields:")
            for item in result:
                print(f"  - {item}")
    else:
        print(f"✗ Error: {result}")
        sys.exit(1)


if __name__ == '__main__':
    main()
← Back to pydicom