Dibuja moléculas 2D dentro de fichero Excel (similar a ChemDraw).


El siguiente script de Python lee un fichero SDF, en el ejemplo con 1000 moleculas (1000_best.sdf) en 3D y con un campo Name (nombre la molecula), y genera la estructura 2D y la introduce en una celda en un fichero XLSx (5OJF-open-MP8-noQ_best-docked.xlsx) que tiene varias columnas, entre ellas una con el mismo nombre de cada molecula del fichero SDF llamada Name; tal y como lo haría ChemDraw. Genera de salida el fichero 5OJF-open-MP8-noQ_best-docked_2D-structures.xlsx.

Recuerda que debes tener activado el entorno de conda donde hayas instalado rdkit, openpyxl, Cairo, etc.

Lo puedes conseguir (mejor con rango de Administrador en un equipo Windows) con una sola línea...
conda install -c conda-forge rdkit openpyxl pillow cairosvg cairo.



import os
import re
from rdkit import Chem
from rdkit.Chem import AllChem, rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from openpyxl import load_workbook
from openpyxl.drawing.image import Image as XLImage
from PIL import Image as PILImage
import cairosvg

# =========================
# CONFIG
# =========================
sdf_file = "1000_best.sdf"
excel_file = "5OJF-open-MP8-noQ_best-docked.xlsx"
output_excel = "5OJF-open-MP8-noQ_best-docked_2D-structures.xlsx"

svg_dir = "svg_structures"
png_dir = "png_structures"

IMG_SIZE = 350

NAME_COLUMN_HEADER = "Name"
STRUCTURE_COLUMN_HEADER = "2D_structure"

# =========================
# SETUP
# =========================
os.makedirs(svg_dir, exist_ok=True)
os.makedirs(png_dir, exist_ok=True)

rdDepictor.SetPreferCoordGen(True)

# =========================
# HELPERS
# =========================
def get_mol_name(mol, idx):
    if mol.HasProp("Name") and mol.GetProp("Name").strip():
        return mol.GetProp("Name").strip()
    if mol.HasProp("_Name") and mol.GetProp("_Name").strip():
        return mol.GetProp("_Name").strip()
    return f"Mol_{idx}"

def clean_name(name):
    return re.sub(r'[\\/*?:"<>|]', "_", name)

# =========================
# DRAW FUNCTION (SVG)
# =========================
def draw_svg(mol, svg_path):
    drawer = rdMolDraw2D.MolDraw2DSVG(IMG_SIZE, IMG_SIZE)
    opts = drawer.drawOptions()

    # --- ChemDraw-like tuning ---
    opts.fixedBondLength = 30
    opts.bondLineWidth = 1.8
    opts.padding = 0.02  # maximize usage of space
    opts.useDefaultAtomPalette()

    # Aromatic style
    opts.addStereoAnnotation = False

    # Force kekulization (ChemDraw-like alternating bonds)
    try:
        Chem.Kekulize(mol, clearAromaticFlags=True)
    except:
        pass

    drawer.DrawMolecule(mol)
    drawer.FinishDrawing()

    svg = drawer.GetDrawingText()

    with open(svg_path, "w") as f:
        f.write(svg)

# =========================
# STEP 1: SDF → SVG + PNG
# =========================
print("Generating SVG + PNG structures...")

supplier = Chem.SDMolSupplier(sdf_file, removeHs=False)
name_to_png = {}

for i, mol in enumerate(supplier):
    if mol is None:
        continue

    name = get_mol_name(mol, i)
    safe_name = clean_name(name)

    # Remove hydrogens
    mol = Chem.RemoveHs(mol)

    # 2D coords
    AllChem.Compute2DCoords(mol)

    svg_path = os.path.join(svg_dir, f"{safe_name}.svg")
    png_path = os.path.join(png_dir, f"{safe_name}.png")

    draw_svg(mol, svg_path)

    # Convert SVG → PNG (for Excel)
    cairosvg.svg2png(
        url=svg_path,
        write_to=png_path,
        output_width=IMG_SIZE,
        output_height=IMG_SIZE
    )

    name_to_png[name] = png_path

print(f"Generated: {len(name_to_png)} molecules")

# =========================
# STEP 2: LOAD EXCEL
# =========================
wb = load_workbook(excel_file)
ws = wb.active

name_col = None
structure_col = None

for col in range(1, ws.max_column + 1):
    val = ws.cell(row=1, column=col).value
    if val == NAME_COLUMN_HEADER:
        name_col = col
    elif val == STRUCTURE_COLUMN_HEADER:
        structure_col = col

if name_col is None or structure_col is None:
    raise ValueError("Columns not found")

# =========================
# STEP 3: INSERT PNG
# =========================
px_to_points = 0.75
col_width_factor = 7

print("Inserting into Excel...")

for row in range(2, ws.max_row + 1):
    val = ws.cell(row=row, column=name_col).value
    if not val:
        continue

    name = str(val).strip()

    if name in name_to_png:
        img = XLImage(name_to_png[name])
        img.width = IMG_SIZE
        img.height = IMG_SIZE

        cell = ws.cell(row=row, column=structure_col).coordinate
        ws.add_image(img, cell)

        ws.row_dimensions[row].height = IMG_SIZE * px_to_points

# Adjust column width
col_letter = ws.cell(row=1, column=structure_col).column_letter
ws.column_dimensions[col_letter].width = IMG_SIZE / col_width_factor

# =========================
# SAVE
# =========================
wb.save(output_excel)

print("Done.")