El siguiente script de Python lee un fichero SDF, en el ejemplo con 1000 moleculas (1000_best.sdf) en 3D y con un campo Name (nombre la molecula), y genera la estructura 2D y la introduce en una celda en un fichero XLSx (5OJF-open-MP8-noQ_best-docked.xlsx) que tiene varias columnas, entre ellas una con el mismo nombre de cada molecula del fichero SDF llamada Name; tal y como lo haría ChemDraw. Genera de salida el fichero 5OJF-open-MP8-noQ_best-docked_2D-structures.xlsx.
Recuerda que debes tener activado el entorno de conda donde hayas instalado rdkit, openpyxl, Cairo, etc.
Lo puedes conseguir (mejor con rango de Administrador en un equipo Windows) con una sola línea... conda install -c conda-forge rdkit openpyxl pillow cairosvg cairo.
import os
import re
from rdkit import Chem
from rdkit.Chem import AllChem, rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from openpyxl import load_workbook
from openpyxl.drawing.image import Image as XLImage
from PIL import Image as PILImage
import cairosvg
# =========================
# CONFIG
# =========================
sdf_file = "1000_best.sdf"
excel_file = "5OJF-open-MP8-noQ_best-docked.xlsx"
output_excel = "5OJF-open-MP8-noQ_best-docked_2D-structures.xlsx"
svg_dir = "svg_structures"
png_dir = "png_structures"
IMG_SIZE = 350
NAME_COLUMN_HEADER = "Name"
STRUCTURE_COLUMN_HEADER = "2D_structure"
# =========================
# SETUP
# =========================
os.makedirs(svg_dir, exist_ok=True)
os.makedirs(png_dir, exist_ok=True)
rdDepictor.SetPreferCoordGen(True)
# =========================
# HELPERS
# =========================
def get_mol_name(mol, idx):
if mol.HasProp("Name") and mol.GetProp("Name").strip():
return mol.GetProp("Name").strip()
if mol.HasProp("_Name") and mol.GetProp("_Name").strip():
return mol.GetProp("_Name").strip()
return f"Mol_{idx}"
def clean_name(name):
return re.sub(r'[\\/*?:"<>|]', "_", name)
# =========================
# DRAW FUNCTION (SVG)
# =========================
def draw_svg(mol, svg_path):
drawer = rdMolDraw2D.MolDraw2DSVG(IMG_SIZE, IMG_SIZE)
opts = drawer.drawOptions()
# --- ChemDraw-like tuning ---
opts.fixedBondLength = 30
opts.bondLineWidth = 1.8
opts.padding = 0.02 # maximize usage of space
opts.useDefaultAtomPalette()
# Aromatic style
opts.addStereoAnnotation = False
# Force kekulization (ChemDraw-like alternating bonds)
try:
Chem.Kekulize(mol, clearAromaticFlags=True)
except:
pass
drawer.DrawMolecule(mol)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
with open(svg_path, "w") as f:
f.write(svg)
# =========================
# STEP 1: SDF → SVG + PNG
# =========================
print("Generating SVG + PNG structures...")
supplier = Chem.SDMolSupplier(sdf_file, removeHs=False)
name_to_png = {}
for i, mol in enumerate(supplier):
if mol is None:
continue
name = get_mol_name(mol, i)
safe_name = clean_name(name)
# Remove hydrogens
mol = Chem.RemoveHs(mol)
# 2D coords
AllChem.Compute2DCoords(mol)
svg_path = os.path.join(svg_dir, f"{safe_name}.svg")
png_path = os.path.join(png_dir, f"{safe_name}.png")
draw_svg(mol, svg_path)
# Convert SVG → PNG (for Excel)
cairosvg.svg2png(
url=svg_path,
write_to=png_path,
output_width=IMG_SIZE,
output_height=IMG_SIZE
)
name_to_png[name] = png_path
print(f"Generated: {len(name_to_png)} molecules")
# =========================
# STEP 2: LOAD EXCEL
# =========================
wb = load_workbook(excel_file)
ws = wb.active
name_col = None
structure_col = None
for col in range(1, ws.max_column + 1):
val = ws.cell(row=1, column=col).value
if val == NAME_COLUMN_HEADER:
name_col = col
elif val == STRUCTURE_COLUMN_HEADER:
structure_col = col
if name_col is None or structure_col is None:
raise ValueError("Columns not found")
# =========================
# STEP 3: INSERT PNG
# =========================
px_to_points = 0.75
col_width_factor = 7
print("Inserting into Excel...")
for row in range(2, ws.max_row + 1):
val = ws.cell(row=row, column=name_col).value
if not val:
continue
name = str(val).strip()
if name in name_to_png:
img = XLImage(name_to_png[name])
img.width = IMG_SIZE
img.height = IMG_SIZE
cell = ws.cell(row=row, column=structure_col).coordinate
ws.add_image(img, cell)
ws.row_dimensions[row].height = IMG_SIZE * px_to_points
# Adjust column width
col_letter = ws.cell(row=1, column=structure_col).column_letter
ws.column_dimensions[col_letter].width = IMG_SIZE / col_width_factor
# =========================
# SAVE
# =========================
wb.save(output_excel)
print("Done.")