RUST: Access the PubChem API

// Cargo.toml

[dependencies]
axum = "0.7"
tokio = { version = "1", features = ["full"] }
serde = { version = "1.0", features = ["derive"] }
reqwest = { version = "0.11", features = ["json", "rustls-tls"] }
serde_json = "1.0" 
regex = "1.11.1"
// main.rs

use axum::{extract::Form, response::Html, routing::post, Router};
use serde::Deserialize;
use tokio::net::TcpListener;
use std::net::SocketAddr;
use reqwest;
use serde_json::Value;
use regex::Regex;
use std::collections::HashSet;

#[derive(Deserialize)]
struct SmilesInput {
    smiles: String,
}

async fn get_compound_info(Form(input): Form<SmilesInput>) -> Result<String, String> {
    let cid = get_cid(&input.smiles).await?;
    let properties = get_properties(&cid).await?;
    let casrns = get_casrns(&cid).await?;

    let cid_link = format!(r#"<b>CID:</b> <a href="https://pubchem.ncbi.nlm.nih.gov/compound/{}">{}</a>"#, cid, cid);
    let results = format!("{}<br>{}", cid_link, properties.join("<br>"));

    Ok(format!("{}<br>{}", results, casrns))
}

async fn get_cid(smiles: &str) -> Result<String, String> {
    let url = format!("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{}/cids/TXT", smiles);
    let response = reqwest::get(&url).await.map_err(|e| e.to_string())?;
    let text = response.text().await.map_err(|e| e.to_string())?;
    let cid = text.trim().to_string();

    if cid.is_empty() || cid.parse::<u32>().is_err() {
        Err("Error|Invalid SMILES".to_string())
    } else {
        Ok(cid)
    }
}

async fn get_properties(cid: &str) -> Result<Vec<String>, String> {
    let properties = vec!["MolecularFormula", "IUPACName", "CanonicalSMILES"];
    let mut results = Vec::new();

    for prop in properties {
        let url = format!("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/property/{}/JSON", cid, prop);
        let response = reqwest::get(&url).await.map_err(|e| e.to_string())?;
        let json: Value = response.json().await.map_err(|e| e.to_string())?;

        let value = json.get("PropertyTable")
            .and_then(|table| table.get("Properties"))
            .and_then(|props| props.as_array().and_then(|arr| arr.get(0)))
            .and_then(|prop_data| prop_data.get(prop))
            .and_then(|v| v.as_str())
            .map(|s| format!("<b>{}:</b> {}", prop, s))
            .unwrap_or_else(|| format!("<b>{}:</b> Not Available", prop));

        results.push(value);
    }
    Ok(results)
}

async fn get_casrns(cid: &str) -> Result<String, String> {
    let url = format!("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/xrefs/RegistryID/JSON", cid);
    let response = reqwest::get(&url).await.map_err(|e| e.to_string())?;
    let json: Value = response.json().await.map_err(|e| e.to_string())?;

    let casrn_regex = Regex::new(r"^\d{2,7}-\d{2}-\d$").unwrap();
    let mut casrns = HashSet::new();

    if let Some(registry_ids) = json.get("InformationList")
        .and_then(|list| list.get("Information"))
        .and_then(|info| info.as_array().and_then(|arr| arr.get(0)))
        .and_then(|info| info.get("RegistryID"))
        .and_then(|ids| ids.as_array())
    {
        for id in registry_ids {
            if let Some(s) = id.as_str() {
                let clean_s = s.replace("CAS-", "");
                if casrn_regex.is_match(&clean_s) && clean_s.split('-').all(|part| part.chars().all(|c| c.is_digit(10))) {
                    if let Some(cas) = validate_cas(&clean_s) {
                        casrns.insert(cas);
                    }
                }
            } else if let Some(n) = id.as_number() {
                let num_str = n.to_string();
                if casrn_regex.is_match(&num_str) && num_str.split('-').all(|part| part.chars().all(|c| c.is_digit(10))) {
                    if let Some(cas) = validate_cas(&num_str) {
                        casrns.insert(cas);
                    }
                }
            }
        }
    }

    if casrns.is_empty() {
        Ok("<b>CASRN:</b> Not Available".to_string())
    } else {
        Ok(format!("<b>CASRN:</b> {}", casrns.into_iter().collect::<Vec<String>>().join(", ")))
    }
}

fn validate_cas(cas: &str) -> Option<String> {
    let parts: Vec<&str> = cas.split('-').collect();
    if parts.len() != 3 {
        return None;
    }
    let check_digit_str = parts[2];
    if check_digit_str.len() != 1 {
        return None;
    }

    let check_digit = check_digit_str.chars().next().unwrap().to_digit(10).unwrap();
    let digits_to_calculate = format!("{}{}", parts[0], parts[1]);
    if calculate_cas_check_digit(&digits_to_calculate) == check_digit {
        return Some(cas.to_string());
    }
    None
}

fn calculate_cas_check_digit(input: &str) -> u32 {
    let mut sum = 0;
    for (i, c) in input.chars().rev().enumerate() {
        let digit = c.to_digit(10).unwrap();
        sum += digit * (i as u32 + 1);
    }
    sum % 10
}

async fn index() -> Html<&'static str> {
    Html(r#"<!DOCTYPE html><html><head><title>Chemical Information</title></head><body><h1>Chemical Information</h1><form id="smiles-form"><input type="text" name="smiles" id="smiles-input" placeholder="Enter SMILES string" /><button type="submit">Get Information</button></form><div id="info-display"></div><script>document.getElementById('smiles-form').addEventListener('submit', async (event) => {event.preventDefault();const smiles = document.getElementById('smiles-input').value;const response = await fetch('/info', {method: 'POST',headers: {'Content-Type': 'application/x-www-form-urlencoded',},body: `smiles=${encodeURIComponent(smiles)}`,});const responseText = await response.text();document.getElementById('info-display').innerHTML = responseText;});</script></body></html>"#)
}

#[tokio::main]
async fn main() {
    let app = Router::new()
        .route("/", axum::routing::get(index))
        .route("/info", post(|input: Form<SmilesInput>| async {
            match get_compound_info(input).await {
                Ok(result) => result,
                Err(err) => err,
            }
        }));

    let addr = SocketAddr::from(([0, 0, 0, 0], 8000));
    let listener = TcpListener::bind(addr).await.unwrap();
    println!("listening on {}", listener.local_addr().unwrap());
    axum::serve(listener, app.into_make_service()).await.unwrap();
}

"How are you gonna keep ‘em down on the farm once they’ve seen Karl Hungus?"