// Cargo.toml
[dependencies]
axum = "0.7"
tokio = { version = "1", features = ["full"] }
serde = { version = "1.0", features = ["derive"] }
reqwest = { version = "0.11", features = ["json", "rustls-tls"] }
serde_json = "1.0"
regex = "1.11.1"
// main.rs
use axum::{extract::Form, response::Html, routing::post, Router};
use serde::Deserialize;
use tokio::net::TcpListener;
use std::net::SocketAddr;
use reqwest;
use serde_json::Value;
use regex::Regex;
use std::collections::HashSet;
#[derive(Deserialize)]
struct SmilesInput {
smiles: String,
}
async fn get_compound_info(Form(input): Form<SmilesInput>) -> Result<String, String> {
let cid = get_cid(&input.smiles).await?;
let properties = get_properties(&cid).await?;
let casrns = get_casrns(&cid).await?;
let cid_link = format!(r#"<b>CID:</b> <a href="https://pubchem.ncbi.nlm.nih.gov/compound/{}">{}</a>"#, cid, cid);
let results = format!("{}<br>{}", cid_link, properties.join("<br>"));
Ok(format!("{}<br>{}", results, casrns))
}
async fn get_cid(smiles: &str) -> Result<String, String> {
let url = format!("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{}/cids/TXT", smiles);
let response = reqwest::get(&url).await.map_err(|e| e.to_string())?;
let text = response.text().await.map_err(|e| e.to_string())?;
let cid = text.trim().to_string();
if cid.is_empty() || cid.parse::<u32>().is_err() {
Err("Error|Invalid SMILES".to_string())
} else {
Ok(cid)
}
}
async fn get_properties(cid: &str) -> Result<Vec<String>, String> {
let properties = vec!["MolecularFormula", "IUPACName", "CanonicalSMILES"];
let mut results = Vec::new();
for prop in properties {
let url = format!("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/property/{}/JSON", cid, prop);
let response = reqwest::get(&url).await.map_err(|e| e.to_string())?;
let json: Value = response.json().await.map_err(|e| e.to_string())?;
let value = json.get("PropertyTable")
.and_then(|table| table.get("Properties"))
.and_then(|props| props.as_array().and_then(|arr| arr.get(0)))
.and_then(|prop_data| prop_data.get(prop))
.and_then(|v| v.as_str())
.map(|s| format!("<b>{}:</b> {}", prop, s))
.unwrap_or_else(|| format!("<b>{}:</b> Not Available", prop));
results.push(value);
}
Ok(results)
}
async fn get_casrns(cid: &str) -> Result<String, String> {
let url = format!("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/xrefs/RegistryID/JSON", cid);
let response = reqwest::get(&url).await.map_err(|e| e.to_string())?;
let json: Value = response.json().await.map_err(|e| e.to_string())?;
let casrn_regex = Regex::new(r"^\d{2,7}-\d{2}-\d$").unwrap();
let mut casrns = HashSet::new();
if let Some(registry_ids) = json.get("InformationList")
.and_then(|list| list.get("Information"))
.and_then(|info| info.as_array().and_then(|arr| arr.get(0)))
.and_then(|info| info.get("RegistryID"))
.and_then(|ids| ids.as_array())
{
for id in registry_ids {
if let Some(s) = id.as_str() {
let clean_s = s.replace("CAS-", "");
if casrn_regex.is_match(&clean_s) && clean_s.split('-').all(|part| part.chars().all(|c| c.is_digit(10))) {
if let Some(cas) = validate_cas(&clean_s) {
casrns.insert(cas);
}
}
} else if let Some(n) = id.as_number() {
let num_str = n.to_string();
if casrn_regex.is_match(&num_str) && num_str.split('-').all(|part| part.chars().all(|c| c.is_digit(10))) {
if let Some(cas) = validate_cas(&num_str) {
casrns.insert(cas);
}
}
}
}
}
if casrns.is_empty() {
Ok("<b>CASRN:</b> Not Available".to_string())
} else {
Ok(format!("<b>CASRN:</b> {}", casrns.into_iter().collect::<Vec<String>>().join(", ")))
}
}
fn validate_cas(cas: &str) -> Option<String> {
let parts: Vec<&str> = cas.split('-').collect();
if parts.len() != 3 {
return None;
}
let check_digit_str = parts[2];
if check_digit_str.len() != 1 {
return None;
}
let check_digit = check_digit_str.chars().next().unwrap().to_digit(10).unwrap();
let digits_to_calculate = format!("{}{}", parts[0], parts[1]);
if calculate_cas_check_digit(&digits_to_calculate) == check_digit {
return Some(cas.to_string());
}
None
}
fn calculate_cas_check_digit(input: &str) -> u32 {
let mut sum = 0;
for (i, c) in input.chars().rev().enumerate() {
let digit = c.to_digit(10).unwrap();
sum += digit * (i as u32 + 1);
}
sum % 10
}
async fn index() -> Html<&'static str> {
Html(r#"<!DOCTYPE html><html><head><title>Chemical Information</title></head><body><h1>Chemical Information</h1><form id="smiles-form"><input type="text" name="smiles" id="smiles-input" placeholder="Enter SMILES string" /><button type="submit">Get Information</button></form><div id="info-display"></div><script>document.getElementById('smiles-form').addEventListener('submit', async (event) => {event.preventDefault();const smiles = document.getElementById('smiles-input').value;const response = await fetch('/info', {method: 'POST',headers: {'Content-Type': 'application/x-www-form-urlencoded',},body: `smiles=${encodeURIComponent(smiles)}`,});const responseText = await response.text();document.getElementById('info-display').innerHTML = responseText;});</script></body></html>"#)
}
#[tokio::main]
async fn main() {
let app = Router::new()
.route("/", axum::routing::get(index))
.route("/info", post(|input: Form<SmilesInput>| async {
match get_compound_info(input).await {
Ok(result) => result,
Err(err) => err,
}
}));
let addr = SocketAddr::from(([0, 0, 0, 0], 8000));
let listener = TcpListener::bind(addr).await.unwrap();
println!("listening on {}", listener.local_addr().unwrap());
axum::serve(listener, app.into_make_service()).await.unwrap();
}