Script de Python para buscar en chEMBL si un compuesto se une a una diana


Queremos saber si hay datos de interaccion de un compuesto químico dado sobre una o varias dianas proteicas. En nuestro ejemplo, el compuesto es la metil-bardoxolona, cuyo código en chEMBL es CHEMBL1762621 y queremos comprobar si existen datos de interacción con dianas relacionadas con Keap1/Nrf2, de las que debemos disponer de sus códigos chEMBL: 'CHEMBL3038498', 'CHEMBL2069156', 'CHEMBL1075094', 'CHEMBL4106129', 'CHEMBL4296123', 'CHEMBL4296124', 'CHEMBL4296122', 'CHEMBL3562164', 'CHEMBL1075141'.

Para la metil-bardoxolona que interacciona con Nuclear factor erythroid 2-related factor 2 con EC50 = 60.0 nM, se calcula el parámetro pchEMBL como -log10 (0.000000060) = 7.22


from chembl_webresource_client.new_client import new_client
import pandas as pd

# Definimos tu panel de dianas relacionadas con Keap1/Nrf2
KEAP1_NRF2_PANEL = [
    'CHEMBL3038498', 'CHEMBL2069156', 'CHEMBL1075094', # Core Human
    'CHEMBL4106129', 'CHEMBL4296123', 'CHEMBL4296124', # Complexes / PPI
    'CHEMBL4296122', 'CHEMBL3562164', 'CHEMBL1075141'  # Orthologs (Mouse/Rat)
]

def analyze_molecule_profile(molecule_id, target_list=None):
    """
    Analiza una molécula frente a un panel de targets o en toda la base de datos.
    """
    activities = new_client.activity
    
    if target_list:
        print(f"--- ANALIZANDO PERFIL DE {molecule_id} EN PANEL SELECCIONADO ({len(target_list)} dianas) ---")
        # Filtramos por molécula, tipos de ensayo y la lista de targets
        query = activities.filter(
            molecule_chembl_id=molecule_id,
            standard_type__in=['IC50', 'Ki', 'EC50', 'Kd'],
            target_chembl_id__in=target_list
        )
    else:
        print(f"--- BÚSQUEDA GLOBAL: PERFIL DE PROMISCUIDAD PARA {molecule_id} ---")
        query = activities.filter(
            molecule_chembl_id=molecule_id,
            standard_type__in=['IC50', 'Ki', 'EC50', 'Kd']
        )

    # Columnas necesarias para el análisis
    cols = ['molecule_chembl_id', 'target_chembl_id', 'target_pref_name', 
            'standard_type', 'standard_value', 'standard_units', 'pchembl_value']
    
    res = query.only(cols)
    df = pd.DataFrame(list(res))

    if df.empty:
        print("No se encontraron datos de actividad para los criterios seleccionados.")
        return None

    # Limpieza: Aseguramos que los valores numéricos se traten como tal
    df['standard_value'] = pd.to_numeric(df['standard_value'], errors='coerce')
    df['pchembl_value'] = pd.to_numeric(df['pchembl_value'], errors='coerce')
    
    # Ordenamos por pChEMBL para ver lo más potente arriba
    df = df.sort_values(by='pchembl_value', ascending=False)
    
    return df[cols]

# ==========================================
# EJEMPLOS DE EJECUCIÓN
# ==========================================

# Tu compuesto de interés (ejemplo: Metil-Bardoxolona es CHEMBL1762621)
compound_id = 'CHEMBL1762621' 

# OPCIÓN A: Testear contra tu panel específico de Keap1/Nrf2
print("\n[EJECUTANDO OPCIÓN A: PANEL ESPECÍFICO]")
results_panel = analyze_molecule_profile(compound_id, target_list=KEAP1_NRF2_PANEL)
if results_panel is not None:
    print(results_panel.to_string(index=False))

print("\n" + "="*80 + "\n")

# OPCIÓN B: Búsqueda global (Promiscuidad)
print("[EJECUTANDO OPCIÓN B: BÚSQUEDA GLOBAL]")
results_global = analyze_molecule_profile(compound_id)
if results_global is not None:
    # Mostramos los primeros 40 resultados si hay muchos
    print(results_global.head(40).to_string(index=False))

El resultado de ejecutar este script a fecha de edición de este artículo es el siguiente:


[EJECUTANDO OPCIÓN A: PANEL ESPECÍFICO]
--- ANALIZANDO PERFIL DE CHEMBL1762621 EN PANEL SELECCIONADO (9 dianas) ---
molecule_chembl_id target_chembl_id                            target_pref_name standard_type  standard_value standard_units  pchembl_value
     CHEMBL1762621    CHEMBL1075094 Nuclear factor erythroid 2-related factor 2          EC50            60.0             nM           7.22

================================================================================

[EJECUTANDO OPCIÓN B: BÚSQUEDA GLOBAL]
--- BÚSQUEDA GLOBAL: PERFIL DE PROMISCUIDAD PARA CHEMBL1762621 ---
molecule_chembl_id target_chembl_id                                          target_pref_name standard_type  standard_value standard_units  pchembl_value
     CHEMBL1762621        CHEMBL375                                              Mus musculus          IC50            0.10             nM          10.00
     CHEMBL1762621        CHEMBL375                                              Mus musculus          IC50            0.10             nM          10.00
     CHEMBL1762621        CHEMBL375                                              Mus musculus          IC50            0.11             nM           9.96
     CHEMBL1762621    CHEMBL3879801                                        NON-PROTEIN TARGET          IC50            0.20             nM           9.70
     CHEMBL1762621        CHEMBL394                                                   HCT-116          IC50            0.25             nM           9.60
     CHEMBL1762621     CHEMBL612545                                                 Unchecked          EC50            8.71             nM           8.06
     CHEMBL1762621    CHEMBL1255134                                 Ghrelin O-acyltransferase          IC50           35.00             nM           7.46
     CHEMBL1762621        CHEMBL387                                                      MCF7          IC50           50.00             nM           7.30
     CHEMBL1762621    CHEMBL1075094               Nuclear factor erythroid 2-related factor 2          EC50           60.00             nM           7.22
     CHEMBL1762621    CHEMBL4303835                                                SARS-CoV-2          EC50          200.00             nM           6.70
     CHEMBL1762621        CHEMBL395                                                     HepG2          IC50          260.00             nM           6.58
     CHEMBL1762621        CHEMBL384                                                     HT-29          IC50          280.00             nM           6.55
     CHEMBL1762621     CHEMBL613510                                                     HCT-8          IC50          290.00             nM           6.54
     CHEMBL1762621    CHEMBL4303835                                                SARS-CoV-2          EC50          290.00             nM           6.54
     CHEMBL1762621     CHEMBL612558                                                     ADMET          IC50          316.00             nM           6.50
     CHEMBL1762621        CHEMBL387                                                      MCF7          IC50          350.00             nM           6.46
     CHEMBL1762621    CHEMBL3879801                                        NON-PROTEIN TARGET          IC50          363.00             nM           6.44
     CHEMBL1762621        CHEMBL392                                                      A549          IC50          360.00             nM           6.44
     CHEMBL1762621     CHEMBL612545                                                 Unchecked          IC50          377.40             nM           6.42
     CHEMBL1762621    CHEMBL4296415                                                      GES1          IC50          400.00             nM           6.40
     CHEMBL1762621    CHEMBL3879801                                        NON-PROTEIN TARGET          IC50          399.00             nM           6.40
     CHEMBL1762621    CHEMBL4296457                                                       L02          IC50          400.00             nM           6.40
     CHEMBL1762621     CHEMBL614526                                                   BGC-823          IC50          500.00             nM           6.30
     CHEMBL1762621        CHEMBL395                                                     HepG2          IC50          500.00             nM           6.30
     CHEMBL1762621     CHEMBL614526                                                   BGC-823          IC50          500.00             nM           6.30
     CHEMBL1762621        CHEMBL395                                                     HepG2          IC50          520.00             nM           6.28
     CHEMBL1762621        CHEMBL392                                                      A549          IC50          520.00             nM           6.28
     CHEMBL1762621        CHEMBL400                                                MDA-MB-231          IC50          560.00             nM           6.25
     CHEMBL1762621     CHEMBL614909                                                  SGC-7901          IC50          600.00             nM           6.22
     CHEMBL1762621        CHEMBL392                                                      A549          IC50          630.00             nM           6.20
     CHEMBL1762621     CHEMBL614736                                                       HOS          IC50          660.00             nM           6.18
     CHEMBL1762621     CHEMBL615023                                                      U2OS          IC50          740.00             nM           6.13
     CHEMBL1762621     CHEMBL614321                                                   MCF-10A          IC50          800.00             nM           6.10
     CHEMBL1762621     CHEMBL614197                                                   NCI-N87          IC50          800.00             nM           6.10
     CHEMBL1762621        CHEMBL394                                                   HCT-116          IC50          840.00             nM           6.08
     CHEMBL1762621        CHEMBL387                                                      MCF7          IC50          850.00             nM           6.07
     CHEMBL1762621    CHEMBL3706566                                                   MGC-803          IC50          900.00             nM           6.05
     CHEMBL1762621        CHEMBL387                                                      MCF7          IC50         1200.00             nM           5.92
     CHEMBL1762621     CHEMBL612545                                                 Unchecked          IC50         1200.00             nM           5.92
     CHEMBL1762621     CHEMBL612558                                                     ADMET          IC50         1400.00             nM           5.85
     CHEMBL1762621    CHEMBL4483226                                                   A549/TR          IC50         1703.00             nM           5.77
     CHEMBL1762621        CHEMBL392                                                      A549          IC50         2074.00             nM           5.68
     CHEMBL1762621     CHEMBL612545                                                 Unchecked          IC50         2100.00             nM           5.68
     CHEMBL1762621     CHEMBL614818                                                    HEK293          IC50         2200.00             nM           5.66
     CHEMBL1762621       CHEMBL4026        Signal transducer and activator of transcription 3          IC50         2380.00             nM           5.62
     CHEMBL1762621     CHEMBL612557                                                  RAW264.7          IC50         4000.00             nM           5.40
     CHEMBL1762621        CHEMBL384                                                     HT-29          EC50         4340.00             nM           5.36
     CHEMBL1762621        CHEMBL395                                                     HepG2          IC50         4990.00             nM           5.30
     CHEMBL1762621     CHEMBL614576                                                      H9c2          IC50         5200.00             nM           5.28
     CHEMBL1762621    CHEMBL4523582                                 Replicase polyprotein 1ab          IC50         5810.00             nM           5.24
     CHEMBL1762621    CHEMBL3879801                                        NON-PROTEIN TARGET          IC50         5850.00             nM           5.23
     CHEMBL1762621    CHEMBL3588729                                 Ghrelin O-acyltransferase          IC50         6000.00             nM           5.22
     CHEMBL1762621    CHEMBL2157850                   Ubiquitin carboxyl-terminal hydrolase 7          IC50        14080.00             nM           4.85
     CHEMBL1762621    CHEMBL1293227                   Ubiquitin carboxyl-terminal hydrolase 2          IC50        22000.00             nM           4.66
     CHEMBL1762621    CHEMBL3588729                                 Ghrelin O-acyltransferase          IC50        23000.00             nM           4.64
     CHEMBL1762621        CHEMBL240 Voltage-gated inwardly rectifying potassium channel KCNH2          IC50       200000.00             nM            NaN
     CHEMBL1762621    CHEMBL4303835                                                SARS-CoV-2          IC50        20000.00             nM            NaN
     CHEMBL1762621    CHEMBL4303835                                                SARS-CoV-2          IC50        19952.62             nM            NaN
     CHEMBL1762621     CHEMBL612267                                                      L929          EC50        10000.00             nM            NaN