scripts/molecular_properties.py

#!/usr/bin/env python3
"""
Molecular Properties Calculator

Calculate comprehensive molecular properties and descriptors for molecules.
Supports single molecules or batch processing from files.

Usage:
    python molecular_properties.py "CCO"
    python molecular_properties.py --file molecules.smi --output properties.csv
"""

import argparse
import sys
from pathlib import Path

try:
    from rdkit import Chem
    from rdkit.Chem import Descriptors, Lipinski
except ImportError:
    print("Error: RDKit not installed. Install with: conda install -c conda-forge rdkit")
    sys.exit(1)


def calculate_properties(mol):
    """Calculate comprehensive molecular properties."""
    if mol is None:
        return None

    properties = {
        # Basic properties
        'SMILES': Chem.MolToSmiles(mol),
        'Molecular_Formula': Chem.rdMolDescriptors.CalcMolFormula(mol),

        # Molecular weight
        'MW': Descriptors.MolWt(mol),
        'ExactMW': Descriptors.ExactMolWt(mol),

        # Lipophilicity
        'LogP': Descriptors.MolLogP(mol),
        'MR': Descriptors.MolMR(mol),

        # Polar surface area
        'TPSA': Descriptors.TPSA(mol),
        'LabuteASA': Descriptors.LabuteASA(mol),

        # Hydrogen bonding
        'HBD': Descriptors.NumHDonors(mol),
        'HBA': Descriptors.NumHAcceptors(mol),

        # Atom counts
        'Heavy_Atoms': Descriptors.HeavyAtomCount(mol),
        'Heteroatoms': Descriptors.NumHeteroatoms(mol),
        'Valence_Electrons': Descriptors.NumValenceElectrons(mol),

        # Ring information
        'Rings': Descriptors.RingCount(mol),
        'Aromatic_Rings': Descriptors.NumAromaticRings(mol),
        'Saturated_Rings': Descriptors.NumSaturatedRings(mol),
        'Aliphatic_Rings': Descriptors.NumAliphaticRings(mol),
        'Aromatic_Heterocycles': Descriptors.NumAromaticHeterocycles(mol),

        # Flexibility
        'Rotatable_Bonds': Descriptors.NumRotatableBonds(mol),
        'Fraction_Csp3': Descriptors.FractionCsp3(mol),

        # Complexity
        'BertzCT': Descriptors.BertzCT(mol),

        # Drug-likeness
        'QED': Descriptors.qed(mol),
    }

    # Lipinski's Rule of Five
    properties['Lipinski_Pass'] = (
        properties['MW'] <= 500 and
        properties['LogP'] <= 5 and
        properties['HBD'] <= 5 and
        properties['HBA'] <= 10
    )

    # Lead-likeness
    properties['Lead-like'] = (
        250 <= properties['MW'] <= 350 and
        properties['LogP'] <= 3.5 and
        properties['Rotatable_Bonds'] <= 7
    )

    return properties


def process_single_molecule(smiles):
    """Process a single SMILES string."""
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        print(f"Error: Failed to parse SMILES: {smiles}")
        return None

    props = calculate_properties(mol)
    return props


def process_file(input_file, output_file=None):
    """Process molecules from a file."""
    input_path = Path(input_file)

    if not input_path.exists():
        print(f"Error: File not found: {input_file}")
        return

    # Determine file type
    if input_path.suffix.lower() in ['.sdf', '.mol']:
        suppl = Chem.SDMolSupplier(str(input_path))
    elif input_path.suffix.lower() in ['.smi', '.smiles', '.txt']:
        suppl = Chem.SmilesMolSupplier(str(input_path), titleLine=False)
    else:
        print(f"Error: Unsupported file format: {input_path.suffix}")
        return

    results = []
    for idx, mol in enumerate(suppl):
        if mol is None:
            print(f"Warning: Failed to parse molecule {idx+1}")
            continue

        props = calculate_properties(mol)
        if props:
            props['Index'] = idx + 1
            results.append(props)

    # Output results
    if output_file:
        write_csv(results, output_file)
        print(f"Results written to: {output_file}")
    else:
        # Print to console
        for props in results:
            print("\n" + "="*60)
            for key, value in props.items():
                print(f"{key:25s}: {value}")

    return results


def write_csv(results, output_file):
    """Write results to CSV file."""
    import csv

    if not results:
        print("No results to write")
        return

    with open(output_file, 'w', newline='') as f:
        fieldnames = results[0].keys()
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(results)


def print_properties(props):
    """Print properties in formatted output."""
    print("\nMolecular Properties:")
    print("="*60)

    # Group related properties
    print("\n[Basic Information]")
    print(f"  SMILES:              {props['SMILES']}")
    print(f"  Formula:             {props['Molecular_Formula']}")

    print("\n[Size & Weight]")
    print(f"  Molecular Weight:    {props['MW']:.2f}")
    print(f"  Exact MW:            {props['ExactMW']:.4f}")
    print(f"  Heavy Atoms:         {props['Heavy_Atoms']}")
    print(f"  Heteroatoms:         {props['Heteroatoms']}")

    print("\n[Lipophilicity]")
    print(f"  LogP:                {props['LogP']:.2f}")
    print(f"  Molar Refractivity:  {props['MR']:.2f}")

    print("\n[Polarity]")
    print(f"  TPSA:                {props['TPSA']:.2f}")
    print(f"  Labute ASA:          {props['LabuteASA']:.2f}")
    print(f"  H-bond Donors:       {props['HBD']}")
    print(f"  H-bond Acceptors:    {props['HBA']}")

    print("\n[Ring Systems]")
    print(f"  Total Rings:         {props['Rings']}")
    print(f"  Aromatic Rings:      {props['Aromatic_Rings']}")
    print(f"  Saturated Rings:     {props['Saturated_Rings']}")
    print(f"  Aliphatic Rings:     {props['Aliphatic_Rings']}")
    print(f"  Aromatic Heterocycles: {props['Aromatic_Heterocycles']}")

    print("\n[Flexibility & Complexity]")
    print(f"  Rotatable Bonds:     {props['Rotatable_Bonds']}")
    print(f"  Fraction Csp3:       {props['Fraction_Csp3']:.3f}")
    print(f"  Bertz Complexity:    {props['BertzCT']:.1f}")

    print("\n[Drug-likeness]")
    print(f"  QED Score:           {props['QED']:.3f}")
    print(f"  Lipinski Pass:       {'Yes' if props['Lipinski_Pass'] else 'No'}")
    print(f"  Lead-like:           {'Yes' if props['Lead-like'] else 'No'}")
    print("="*60)


def main():
    parser = argparse.ArgumentParser(
        description='Calculate molecular properties for molecules',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Single molecule
  python molecular_properties.py "CCO"

  # From file
  python molecular_properties.py --file molecules.smi

  # Save to CSV
  python molecular_properties.py --file molecules.sdf --output properties.csv
        """
    )

    parser.add_argument('smiles', nargs='?', help='SMILES string to analyze')
    parser.add_argument('--file', '-f', help='Input file (SDF or SMILES)')
    parser.add_argument('--output', '-o', help='Output CSV file')

    args = parser.parse_args()

    if not args.smiles and not args.file:
        parser.print_help()
        sys.exit(1)

    if args.smiles:
        # Process single molecule
        props = process_single_molecule(args.smiles)
        if props:
            print_properties(props)
    elif args.file:
        # Process file
        process_file(args.file, args.output)


if __name__ == '__main__':
    main()