Python: Chemoinformatics Setup

Setting this up was a nightmare of broken dependencies, but I finally found the magic formula (macOS and Linux).

1. Install miniconda from:
https://www.anaconda.com/docs/getting-started/miniconda/install#macos-linux-installation

# Create the new environment with Python 3.10
conda create --name chemoenv_py310 python=3.10 -y
conda activate chemoenv_py310

# Install RDKit and Pin NumPy with Conda
conda install -c conda-forge rdkit "numpy<2" -y

# Install ML Packages with Pip
pip install torch tensorflow deepchem tf_keras

# Install the Remainder from requirements.txt
pip install -r requirements.txt

2. Insert these into requirements.txt…

# Web server and utilities

fastapi
uvicorn==0.24.0.post1
python-multipart==0.0.6

# Database

SQLAlchemy==2.0.23
alembic==1.13.0

# Other utilities

plotly==5.18.0
pandas>=2.0.3
celery==5.3.6
redis==5.0.1
py3Dmol==2.0.4
tqdm==4.66.4

# Requires Python >= 3.10

dimorphite-dl==2.0.2

3. Once installed….

conda activate chemoenv_py310

..and run this test python3 script.

# --- test_chemoinformatics_environment.py ---

import os
import sys

# --- Helper Functions for Colored Output ---
def print_success(message):
    """Prints a message in green."""
    print(f"\033[92m[SUCCESS] {message}\033[0m")

def print_failure(message):
    """Prints a message in red."""
    print(f"\033[91m[FAILURE] {message}\033[0m")

def print_info(message):
    """Prints an informational message."""
    print(f"\033[94m{message}\033[0m")

def run_checks():
    """Runs all environment verification checks."""
    print_info("--- Starting Chemoinformatics Environment Integration Test ---")
    all_passed = True

    # --- NumPy Check ---
    try:
        import numpy as np
        print_info(f"NumPy version detected: {np.__version__}")
        if not np.__version__.startswith('1.'):
             print_info("Warning: NumPy version is 2.x or higher. This may cause issues with older packages.")
    except ImportError as e:
        print_failure(f"NumPy Check: Could not import NumPy: {e}")
        print_info("Cannot proceed without NumPy. Aborting.")
        return
    except Exception as e:
        print_failure(f"NumPy Check: An unexpected error occurred: {e}")
        return

    print_info("-" * 20)

    # --- 1. RDKit Check ---
    try:
        from rdkit import Chem
        from rdkit.Chem import Descriptors
        print_success(f"RDKit Import: Version {Chem.rdVersion} imported successfully.")
        
        # Functional Test
        caffeine_smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
        mol = Chem.MolFromSmiles(caffeine_smiles)
        mol_wt = Descriptors.ExactMolWt(mol)
        expected_wt = 194.080376
        if abs(mol_wt - expected_wt) < 0.01:
            print_success(f"RDKit Functionality: Calculated MolWt for Caffeine: {mol_wt:.2f}")
        else:
            raise ValueError(f"Incorrect MolWt calculated: {mol_wt}")
    except ImportError as e:
        print_failure(f"RDKit: Could not be imported. Error: {e}")
        all_passed = False
    except Exception as e:
        print_failure(f"RDKit: An error occurred: {e}")
        all_passed = False

    print_info("-" * 20)

    # --- 2. Dimorphite-DL Check ---
    try:
        from dimorphite_dl import protonate_smiles
        print_success("Dimorphite-DL Import: Version 2.0.2 imported successfully.")

        # Functional Test
        glycine_smiles = "NCC(=O)O"
        protonated_forms = protonate_smiles(glycine_smiles, ph_min=7.0, ph_max=7.0)
        if protonated_forms and "NCC(=O)[O-]" in protonated_forms[0]:
            print_success(f"Dimorphite-DL Functionality: Protonated Glycine at pH 7.0: {protonated_forms[0]}")
        else:
            raise ValueError(f"Unexpected protonation result: {protonated_forms}")
    except ImportError as e:
        print_failure(f"Dimorphite-DL: Could not be imported. Error: {e}")
        all_passed = False
    except Exception as e:
        print_failure(f"Dimorphite-DL: An error occurred: {e}")
        all_passed = False

    print_info("-" * 20)
    
    # --- 3. DeepChem and TensorFlow Integration Check ---
    try:
        # Set environment variable BEFORE importing deepchem/tensorflow
        os.environ['TF_USE_LEGACY_KERAS'] = 'True'

        import deepchem as dc
        print_success(f"DeepChem Import: Version {dc.__version__} imported successfully.")
        
        import tensorflow as tf
        print_success(f"TensorFlow Import: Version {tf.__version__} imported successfully.")

        # Check for the legacy keras adapter
        import tf_keras
        print_success(f"tf_keras Import: Version {tf_keras.__version__} imported successfully.")

        # Integration Test
        smiles = ["CC(=O)Oc1ccccc1C(=O)O", "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O"]
        labels = [1, 0] # Example labels (e.g., active/inactive)

        featurizer = dc.feat.CircularFingerprint(size=1024)
        X = featurizer.featurize(smiles)
        dataset = dc.data.NumpyDataset(X=X, y=np.array(labels))
        print_success("DeepChem Data Handling: Successfully featurized and created dataset.")

        # Use a simple model compatible with TF legacy Keras
        model = dc.models.MultitaskClassifier(
            n_tasks=1,
            n_features=1024,
            layer_sizes=[512],
            model_dir=None, # Use a temporary directory
            tensorflow_optimizer=tf_keras.optimizers.Adam(learning_rate=0.001)
        )
        
        # A simple fit test for one epoch
        model.fit(dataset, nb_epoch=1)
        print_success("DeepChem-TensorFlow Integration: Successfully initialized and trained a TF-backend model for one epoch.")

    except ImportError as e:
        print_failure(f"DeepChem/TensorFlow: A required library could not be imported. Error: {e}")
        all_passed = False
    except Exception as e:
        print_failure(f"DeepChem/TensorFlow: An integration error occurred: {e}")
        all_passed = False

    print_info("\n--- Verification Complete ---")
    if all_passed:
        print_success("\n All checks passed successfully! Your environment is correctly configured.")
    else:
        print_failure("\n Some checks failed. Please review the log above and check your installation.")


if __name__ == "__main__":
    run_checks()

"Donny, you're out of your element!"