RUST: Parse XML stream

// Cargo.toml

[dependencies]
quick-xml = "0.31.0"
// XML Sample : Extract CAS, name, mol formula to pipe-delimited file

<Chemical id="0000034742" displayFormula="C12-H14-O4" displayName="Monobutyl phthalate">
<NameList><NameOfSubstance>Monobutyl phthalate<SourceList><Source>MeSH</Source></SourceList></NameOfSubstance>
<Synonyms>Mono-n-butyl-phthalate<SourceList><Source>NLM</Source></SourceList></Synonyms></NameList><NumberList>
<CASRegistryNumber>34-74-2<SourceList/></CASRegistryNumber></NumberList><ClassificationList/>
<FormulaList><MolecularFormula>C12-H14-O4<SourceList><Source>NLM</Source></SourceList></MolecularFormula></FormulaList>
<FormulaFragmentList/><NoteList/><LocatorList><FileLocator url="https://meshb.nlm.nih.gov/record/ui?name=monobutyl phthalate">MeSH</FileLocator>
<FileLocator url="https://pubchem.ncbi.nlm.nih.gov/#query=34-74-2">PubChem</FileLocator>
<FileLocator url="https://pubmed.ncbi.nlm.nih.gov/?term=monobutyl+phthalate">PubMed</FileLocator>
<FileLocator url="https://www.ncbi.nlm.nih.gov/pmc/?term=%22monobutyl+phthalate%22">PubMed Central</FileLocator>
<InternetLocator url="https://search.usa.gov/search?utf8=?&m=false&affiliate=usagov&query="monobutyl+phthalate"">USA.gov</InternetLocator></LocatorList></Chemical>
use quick_xml::Reader;
use quick_xml::events::Event;
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};

fn extract_chemical_data(
    xml_file_path: &str,
    output_file_path: &str,
) -> Result<(), Box<dyn std::error::Error>> {
    let file = File::open(xml_file_path)?;
    let reader = BufReader::new(file);
    let mut xml_reader = Reader::from_reader(reader);
    xml_reader.trim_text(true);

    let output_file = File::create(output_file_path)?;
    let mut writer = BufWriter::new(output_file);

    let mut buf = Vec::new();
    let mut cas_registry_number = String::new();
    let mut name_of_substance = String::new();
    let mut molecular_formula = String::new();
    let mut current_element = String::new();

    loop {
        match xml_reader.read_event_into(&mut buf) {
            Ok(Event::Start(ref e)) => {
                current_element = String::from_utf8(e.name().0.to_vec())?; // Corrected to use e.name().0
                match e.name().as_ref() {
                    b"CASRegistryNumber" => {
                        cas_registry_number.clear();
                    }
                    b"NameOfSubstance" => {
                        name_of_substance.clear();
                    }
                    b"MolecularFormula" => {
                        molecular_formula.clear();
                    }
                    _ => (),
                }
            }
            Ok(Event::Text(e)) => match current_element.as_str() {
                "CASRegistryNumber" => cas_registry_number = e.unescape()?.to_string(),
                "NameOfSubstance" => name_of_substance = e.unescape()?.to_string(),
                "MolecularFormula" => molecular_formula = e.unescape()?.to_string(),
                _ => (),
            },
            Ok(Event::End(ref e)) => {
                if e.name().as_ref() == b"Chemical" {
                    writeln!(
                        writer,
                        "{}|{}|{}",
                        cas_registry_number, name_of_substance, molecular_formula
                    )?;
                    cas_registry_number.clear();
                    name_of_substance.clear();
                    molecular_formula.clear();
                }
            }
            Ok(Event::Eof) => break,
            Err(e) => return Err(Box::new(e)),
            _ => (),
        }
        buf.clear();
    }

    Ok(())
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let xml_file_path = "chemid-2023-02-22.xml";
    let output_file_path = "output.txt";
    extract_chemical_data(xml_file_path, output_file_path)?;
    println!("Data extracted successfully!");
    Ok(())
}

"I’m here to fix deine kabel"