scripts/example_queries.py

#!/usr/bin/env python3
"""
ChEMBL Database Query Examples

This script demonstrates common query patterns for the ChEMBL database
using the chembl_webresource_client Python library.

Requirements:
    pip install chembl_webresource_client
    pip install pandas (optional, for data manipulation)
"""

from chembl_webresource_client.new_client import new_client


def get_molecule_info(chembl_id):
    """
    Retrieve detailed information about a molecule by ChEMBL ID.

    Args:
        chembl_id: ChEMBL identifier (e.g., 'CHEMBL25')

    Returns:
        Dictionary containing molecule information
    """
    molecule = new_client.molecule
    return molecule.get(chembl_id)


def search_molecules_by_name(name_pattern):
    """
    Search for molecules by name pattern.

    Args:
        name_pattern: Name or pattern to search for

    Returns:
        List of matching molecules
    """
    molecule = new_client.molecule
    results = molecule.filter(pref_name__icontains=name_pattern)
    return list(results)


def find_molecules_by_properties(max_mw=500, min_logp=None, max_logp=None):
    """
    Find molecules based on physicochemical properties.

    Args:
        max_mw: Maximum molecular weight
        min_logp: Minimum LogP value
        max_logp: Maximum LogP value

    Returns:
        List of matching molecules
    """
    molecule = new_client.molecule

    filters = {
        'molecule_properties__mw_freebase__lte': max_mw
    }

    if min_logp is not None:
        filters['molecule_properties__alogp__gte'] = min_logp
    if max_logp is not None:
        filters['molecule_properties__alogp__lte'] = max_logp

    results = molecule.filter(**filters)
    return list(results)


def get_target_info(target_chembl_id):
    """
    Retrieve information about a biological target.

    Args:
        target_chembl_id: ChEMBL target identifier (e.g., 'CHEMBL240')

    Returns:
        Dictionary containing target information
    """
    target = new_client.target
    return target.get(target_chembl_id)


def search_targets_by_name(target_name):
    """
    Search for targets by name or keyword.

    Args:
        target_name: Target name or keyword (e.g., 'kinase', 'EGFR')

    Returns:
        List of matching targets
    """
    target = new_client.target
    results = target.filter(
        target_type='SINGLE PROTEIN',
        pref_name__icontains=target_name
    )
    return list(results)


def get_bioactivity_data(target_chembl_id, activity_type='IC50', max_value=100):
    """
    Retrieve bioactivity data for a specific target.

    Args:
        target_chembl_id: ChEMBL target identifier
        activity_type: Type of activity (IC50, Ki, EC50, etc.)
        max_value: Maximum activity value in nM

    Returns:
        List of activity records
    """
    activity = new_client.activity
    results = activity.filter(
        target_chembl_id=target_chembl_id,
        standard_type=activity_type,
        standard_value__lte=max_value,
        standard_units='nM'
    )
    return list(results)


def find_similar_compounds(smiles, similarity_threshold=85):
    """
    Find compounds similar to a query structure.

    Args:
        smiles: SMILES string of query molecule
        similarity_threshold: Minimum similarity percentage (0-100)

    Returns:
        List of similar compounds
    """
    similarity = new_client.similarity
    results = similarity.filter(
        smiles=smiles,
        similarity=similarity_threshold
    )
    return list(results)


def substructure_search(smiles):
    """
    Search for compounds containing a specific substructure.

    Args:
        smiles: SMILES string of substructure

    Returns:
        List of compounds containing the substructure
    """
    substructure = new_client.substructure
    results = substructure.filter(smiles=smiles)
    return list(results)


def get_drug_info(molecule_chembl_id):
    """
    Retrieve drug information including indications and mechanisms.

    Args:
        molecule_chembl_id: ChEMBL molecule identifier

    Returns:
        Tuple of (drug_info, mechanisms, indications)
    """
    drug = new_client.drug
    mechanism = new_client.mechanism
    drug_indication = new_client.drug_indication

    try:
        drug_info = drug.get(molecule_chembl_id)
    except:
        drug_info = None

    mechanisms = list(mechanism.filter(molecule_chembl_id=molecule_chembl_id))
    indications = list(drug_indication.filter(molecule_chembl_id=molecule_chembl_id))

    return drug_info, mechanisms, indications


def find_kinase_inhibitors(max_ic50=100):
    """
    Find potent kinase inhibitors.

    Args:
        max_ic50: Maximum IC50 value in nM

    Returns:
        List of kinase inhibitor activities
    """
    target = new_client.target
    activity = new_client.activity

    # Find kinase targets
    kinase_targets = target.filter(
        target_type='SINGLE PROTEIN',
        pref_name__icontains='kinase'
    )

    # Get target IDs
    target_ids = [t['target_chembl_id'] for t in kinase_targets[:10]]  # Limit to first 10

    # Find activities
    results = activity.filter(
        target_chembl_id__in=target_ids,
        standard_type='IC50',
        standard_value__lte=max_ic50,
        standard_units='nM'
    )

    return list(results)


def get_compound_bioactivities(molecule_chembl_id):
    """
    Get all bioactivity data for a specific compound.

    Args:
        molecule_chembl_id: ChEMBL molecule identifier

    Returns:
        List of all activity records for the compound
    """
    activity = new_client.activity
    results = activity.filter(
        molecule_chembl_id=molecule_chembl_id,
        pchembl_value__isnull=False
    )
    return list(results)


def export_to_dataframe(data):
    """
    Convert ChEMBL data to pandas DataFrame (requires pandas).

    Args:
        data: List of ChEMBL records

    Returns:
        pandas DataFrame
    """
    try:
        import pandas as pd
        return pd.DataFrame(data)
    except ImportError:
        print("pandas not installed. Install with: pip install pandas")
        return None


# Example usage
if __name__ == "__main__":
    print("ChEMBL Database Query Examples")
    print("=" * 50)

    # Example 1: Get information about aspirin
    print("\n1. Getting information about aspirin (CHEMBL25)...")
    aspirin = get_molecule_info('CHEMBL25')
    print(f"Name: {aspirin.get('pref_name')}")
    print(f"Formula: {aspirin.get('molecule_properties', {}).get('full_molformula')}")

    # Example 2: Search for EGFR inhibitors
    print("\n2. Searching for EGFR targets...")
    egfr_targets = search_targets_by_name('EGFR')
    if egfr_targets:
        print(f"Found {len(egfr_targets)} EGFR-related targets")
        print(f"First target: {egfr_targets[0]['pref_name']}")

    # Example 3: Find potent activities for a target
    print("\n3. Finding potent compounds for EGFR (CHEMBL203)...")
    activities = get_bioactivity_data('CHEMBL203', 'IC50', max_value=10)
    print(f"Found {len(activities)} compounds with IC50 <= 10 nM")

    print("\n" + "=" * 50)
    print("Examples completed successfully!")
← Back to chembl-database