Source code for autoemxsp.runners.Batch_Fit_Spectra

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Batch fitting of multiple X-ray spectra for fitting parameter extraction.

This module allows running the fitting step for multiple spectra across multiple samples.
It uses the `fit_and_quantify_spectrum` function internally with quantification disabled,
then extracts the values of desired fitting parameters.

Features
---------
    - Accepts a list of sample IDs and spectrum IDs.
    - Supports 'all' spectra mode for each sample.

Example
--------
>>> from autoemxsp.runners import batch_fit_spectra
>>> batch_fit_spectra(
...     sample_IDs=["Sample1", "Sample2"],
...     spectrum_IDs="all",
...     plot_signal=False
... )

Created on Fri Aug 20 09:34:34 2025

@author: Andrea
"""

import os
import logging
import pandas as pd
from datetime import datetime
from typing import List, Optional

import autoemxsp.utils.constants as cnst
import autoemxsp.config.defaults as dflt
from autoemxsp.utils import get_sample_dir, print_double_separator
from autoemxsp.runners.Fit_and_Quantify_Spectrum_fromDatacsv import fit_and_quantify_spectrum_fromDatacsv

# Configure logging (same style as fit_and_quantify_spectrum)
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

__all__ = ["batch_fit_spectra"]

[docs] def batch_fit_spectra(sample_IDs, spectrum_IDs, is_standard: bool, fit_params_vals_to_extract: Optional[List[str]] = None, spectrum_lims: tuple = None, samples_path: str = None, output_path: str = 'Fitting output', use_instrument_background: bool = dflt.use_instrument_background, quantify_plot: bool = True, plot_signal: bool = True, zoom_plot: bool = False, line_to_plot: str = '', els_substrate: list = None, fit_tol: float = 1e-4, is_particle: bool = True, max_undetectable_w_fr: float = 0, force_single_iteration: bool = False, interrupt_fits_bad_spectra: bool = False, print_results: bool = True, quant_verbose: bool = True, fitting_verbose: bool = True ): """ Run fitting for multiple spectra across multiple samples to extract values of fitting parameters. Parameters ---------- sample_IDs : list of str List of sample identifiers. spectrum_IDs : list of int or str List of spectrum IDs to process (values reported in 'Spectrum #' column in Data.csv), or 'all' to process all spectra in each sample. is_standard : bool Defines whether measurement is of a standard (i.e., well defined composition) or not fit_params_vals_to_extract : list of str, optional List of fitting parameter names whose value to extract and save samples_path : str, optional Base directory where results are stored. Default: autoemxsp/Results output_path : str, optional Directory where the extracted values of fitted parameters are saved. Default: /Fitting output use_instrument_background : bool, optional Whether to use instrument background if present. quantify_plot : bool, optional Whether to plot quantification results. plot_signal : bool, optional Whether to plot the signal. zoom_plot : bool, optional Whether to zoom on a specific line. line_to_plot : str, optional Line to zoom in plot. els_substrate : list, optional List of substrate elements. fit_tol : float, optional Fit tolerance. is_particle : bool, optional If True, treat sample as particle (powder). max_undetectable_w_fr : float, optional Maximum allowed weight fraction for undetectable elements (default: 0). force_single_iteration : bool, optional If True, quantification will be run for a single iteration only (default: False). interrupt_fits_bad_spectra : bool, optional If True, interrupt fitting if bad spectra are detected (default: False). print_results : bool, optional If True, prints all fitted parameters and their values (default: True). quant_verbose : bool, optional If True, prints quantification operations fitting_verbose : bool, optional If True, prints fitting operations Returns ------- quantifier : XSp_Quantifier The quantifier object containing the results, fit parameters, and methods for further analysis and plotting. """ if samples_path is None: parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) samples_path = os.path.join(parent_dir, cnst.RESULTS_DIR) print_double_separator() logging.info("Starting batch fitting process...") logging.info(f"Results path: {samples_path}") extracted_par_vals = {} for sample_ID in sample_IDs: print_double_separator() logging.info(f"Processing sample '{sample_ID}'...") try: sample_dir = get_sample_dir(samples_path, sample_ID) except Exception as e: logging.warning("Failed to get sample directory for %s: %s", sample_ID, e) continue data_filename = cnst.STDS_MEAS_FILENAME if is_standard else cnst.DATA_FILENAME data_path = os.path.join(sample_dir, f"{data_filename}.csv") if not os.path.exists(data_path): logging.warning(f"Data file not found for sample '{sample_ID}'. Skipping.") continue try: df = pd.read_csv(data_path) except Exception as e: logging.warning(f"Could not read {data_path} for sample '{sample_ID}': {e}") continue if cnst.SP_ID_DF_KEY not in df.columns: logging.warning(f"Column '{cnst.SP_ID_DF_KEY}' not found in {data_path}. Skipping sample '{sample_ID}'.") continue # Determine spectra to process if spectrum_IDs == 'all' or (isinstance(spectrum_IDs, list) and len(spectrum_IDs) == 1 and spectrum_IDs[0] == 'all'): spectra_to_process = df[cnst.SP_ID_DF_KEY].unique() logging.info(f"Found {len(spectra_to_process)} spectra for sample '{sample_ID}'.") else: spectra_to_process = spectrum_IDs logging.info(f"Processing specified spectra for sample '{sample_ID}': {spectra_to_process}") sample_fit_results = [] for sp_id in spectra_to_process: print_double_separator() logging.info(f"Fitting Sample '{sample_ID}', Spectrum {sp_id} (fit only, no quantification)") try: quantifier = fit_and_quantify_spectrum_fromDatacsv( sample_ID=sample_ID, spectrum_ID=sp_id, is_standard = is_standard, results_path=samples_path, spectrum_lims = spectrum_lims, use_instrument_background = use_instrument_background, quantify_plot = False, plot_signal = plot_signal, zoom_plot = zoom_plot, line_to_plot = line_to_plot, els_substrate = els_substrate, fit_tol = fit_tol, is_particle = is_particle, max_undetectable_w_fr = max_undetectable_w_fr, force_single_iteration = force_single_iteration, interrupt_fits_bad_spectra = interrupt_fits_bad_spectra, print_results=print_results, quant_verbose = quant_verbose, fitting_verbose = fitting_verbose ) except Exception as e: logging.exception(f"Error fitting spectrum {sp_id} for sample '{sample_ID}': {e}") sample_fit_results.append(None) else: if fit_params_vals_to_extract and quantifier.bad_quant_flag is None: params = quantifier.fit_result.params extracted_vals = {} for param_name in fit_params_vals_to_extract: if param_name in params: extracted_vals[param_name] = params[param_name].value else: extracted_vals[param_name] = pd.NA sample_fit_results.append({'sp_id' : sp_id, **extracted_vals}) else: sample_fit_results.append(None) if fit_params_vals_to_extract: # Create DataFrame from current sample_fit_results filtered_results = [item for item in sample_fit_results if item is not None] # Remove None entries temp_df = pd.DataFrame(filtered_results) # Calculate mean and std for numeric columns (excluding sp_id) mean_vals = temp_df.drop(columns=['sp_id']).mean(numeric_only=True).to_dict() std_vals = temp_df.drop(columns=['sp_id']).std(numeric_only=True).to_dict() # Append mean and std rows directly to sample_fit_results sample_fit_results.append({'sp_id': 'mean', **mean_vals}) sample_fit_results.append({'sp_id': 'std', **std_vals}) # Now create final DataFrame results_df = pd.DataFrame(sample_fit_results) # Save without index if not os.path.exists(output_path): os.makedirs(output_path) now = datetime.now() now_formatted = now.strftime("%Y%m%d_%Hh%Mm") file_path = os.path.join(output_path, f"{now_formatted}_{sample_ID}_FitParamVals.csv") results_df.to_csv(file_path, index=False) extracted_par_vals[sample_ID] = sample_fit_results else: extracted_par_vals[sample_ID] = None logging.info("Batch fitting process completed.") return extracted_par_vals