Setting this up was a nightmare of broken dependencies, but I finally found the magic formula (macOS and Linux).
1. Install miniconda from:
https://www.anaconda.com/docs/getting-started/miniconda/install#macos-linux-installation
# Create the new environment with Python 3.10
conda create --name chemoenv_py310 python=3.10 -y
conda activate chemoenv_py310
# Install RDKit and Pin NumPy with Conda
conda install -c conda-forge rdkit "numpy<2" -y
# Install ML Packages with Pip
pip install torch tensorflow deepchem tf_keras
# Install the Remainder from requirements.txt
pip install -r requirements.txt
2. Insert these into requirements.txt…
# Web server and utilities
fastapi
uvicorn==0.24.0.post1
python-multipart==0.0.6
# Database
SQLAlchemy==2.0.23
alembic==1.13.0
# Other utilities
plotly==5.18.0
pandas>=2.0.3
celery==5.3.6
redis==5.0.1
py3Dmol==2.0.4
tqdm==4.66.4
# Requires Python >= 3.10
dimorphite-dl==2.0.2
3. Once installed….
conda activate chemoenv_py310
..and run this test python3 script.
# --- test_chemoinformatics_environment.py ---
import os
import sys
# --- Helper Functions for Colored Output ---
def print_success(message):
"""Prints a message in green."""
print(f"\033[92m[SUCCESS] {message}\033[0m")
def print_failure(message):
"""Prints a message in red."""
print(f"\033[91m[FAILURE] {message}\033[0m")
def print_info(message):
"""Prints an informational message."""
print(f"\033[94m{message}\033[0m")
def run_checks():
"""Runs all environment verification checks."""
print_info("--- Starting Chemoinformatics Environment Integration Test ---")
all_passed = True
# --- NumPy Check ---
try:
import numpy as np
print_info(f"NumPy version detected: {np.__version__}")
if not np.__version__.startswith('1.'):
print_info("Warning: NumPy version is 2.x or higher. This may cause issues with older packages.")
except ImportError as e:
print_failure(f"NumPy Check: Could not import NumPy: {e}")
print_info("Cannot proceed without NumPy. Aborting.")
return
except Exception as e:
print_failure(f"NumPy Check: An unexpected error occurred: {e}")
return
print_info("-" * 20)
# --- 1. RDKit Check ---
try:
from rdkit import Chem
from rdkit.Chem import Descriptors
print_success(f"RDKit Import: Version {Chem.rdVersion} imported successfully.")
# Functional Test
caffeine_smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
mol = Chem.MolFromSmiles(caffeine_smiles)
mol_wt = Descriptors.ExactMolWt(mol)
expected_wt = 194.080376
if abs(mol_wt - expected_wt) < 0.01:
print_success(f"RDKit Functionality: Calculated MolWt for Caffeine: {mol_wt:.2f}")
else:
raise ValueError(f"Incorrect MolWt calculated: {mol_wt}")
except ImportError as e:
print_failure(f"RDKit: Could not be imported. Error: {e}")
all_passed = False
except Exception as e:
print_failure(f"RDKit: An error occurred: {e}")
all_passed = False
print_info("-" * 20)
# --- 2. Dimorphite-DL Check ---
try:
from dimorphite_dl import protonate_smiles
print_success("Dimorphite-DL Import: Version 2.0.2 imported successfully.")
# Functional Test
glycine_smiles = "NCC(=O)O"
protonated_forms = protonate_smiles(glycine_smiles, ph_min=7.0, ph_max=7.0)
if protonated_forms and "NCC(=O)[O-]" in protonated_forms[0]:
print_success(f"Dimorphite-DL Functionality: Protonated Glycine at pH 7.0: {protonated_forms[0]}")
else:
raise ValueError(f"Unexpected protonation result: {protonated_forms}")
except ImportError as e:
print_failure(f"Dimorphite-DL: Could not be imported. Error: {e}")
all_passed = False
except Exception as e:
print_failure(f"Dimorphite-DL: An error occurred: {e}")
all_passed = False
print_info("-" * 20)
# --- 3. DeepChem and TensorFlow Integration Check ---
try:
# Set environment variable BEFORE importing deepchem/tensorflow
os.environ['TF_USE_LEGACY_KERAS'] = 'True'
import deepchem as dc
print_success(f"DeepChem Import: Version {dc.__version__} imported successfully.")
import tensorflow as tf
print_success(f"TensorFlow Import: Version {tf.__version__} imported successfully.")
# Check for the legacy keras adapter
import tf_keras
print_success(f"tf_keras Import: Version {tf_keras.__version__} imported successfully.")
# Integration Test
smiles = ["CC(=O)Oc1ccccc1C(=O)O", "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O"]
labels = [1, 0] # Example labels (e.g., active/inactive)
featurizer = dc.feat.CircularFingerprint(size=1024)
X = featurizer.featurize(smiles)
dataset = dc.data.NumpyDataset(X=X, y=np.array(labels))
print_success("DeepChem Data Handling: Successfully featurized and created dataset.")
# Use a simple model compatible with TF legacy Keras
model = dc.models.MultitaskClassifier(
n_tasks=1,
n_features=1024,
layer_sizes=[512],
model_dir=None, # Use a temporary directory
tensorflow_optimizer=tf_keras.optimizers.Adam(learning_rate=0.001)
)
# A simple fit test for one epoch
model.fit(dataset, nb_epoch=1)
print_success("DeepChem-TensorFlow Integration: Successfully initialized and trained a TF-backend model for one epoch.")
except ImportError as e:
print_failure(f"DeepChem/TensorFlow: A required library could not be imported. Error: {e}")
all_passed = False
except Exception as e:
print_failure(f"DeepChem/TensorFlow: An integration error occurred: {e}")
all_passed = False
print_info("\n--- Verification Complete ---")
if all_passed:
print_success("\n All checks passed successfully! Your environment is correctly configured.")
else:
print_failure("\n Some checks failed. Please review the log above and check your installation.")
if __name__ == "__main__":
run_checks()