Source code for autoemxsp.runners.Batch_Fit_Spectra

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Batch fitting of multiple X-ray spectra for fitting parameter extraction.

This module allows running the fitting step for multiple spectra across multiple samples.
It uses the `fit_and_quantify_spectrum` function internally with quantification disabled,
then extracts the values of desired fitting parameters.

Features
---------
    - Accepts a list of sample IDs and spectrum IDs.
    - Supports 'all' spectra mode for each sample.

Example
--------
>>> from autoemxsp.runners import batch_fit_spectra
>>> batch_fit_spectra(
...     sample_IDs=["Sample1", "Sample2"],
...     spectrum_IDs="all",
...     plot_signal=False
... )

Created on Fri Aug 20 09:34:34 2025

@author: Andrea
"""

import os
import logging
import pandas as pd
from datetime import datetime
from typing import List, Optional

import autoemxsp.utils.constants as cnst
import autoemxsp.config.defaults as dflt
from autoemxsp.utils import get_sample_dir, print_double_separator
from autoemxsp.runners.Fit_and_Quantify_Spectrum_fromDatacsv import fit_and_quantify_spectrum_fromDatacsv

# Configure logging (same style as fit_and_quantify_spectrum)
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

__all__ = ["batch_fit_spectra"]


[docs]
def batch_fit_spectra(sample_IDs,
                      spectrum_IDs,
                      is_standard: bool,
                      fit_params_vals_to_extract: Optional[List[str]] = None,
                      spectrum_lims: tuple = None,
                      samples_path: str = None,
                      output_path: str = 'Fitting output',
                      use_instrument_background: bool = dflt.use_instrument_background,
                      quantify_plot: bool = True,
                      plot_signal: bool = True,
                      zoom_plot: bool = False,
                      line_to_plot: str = '',
                      els_substrate: list = None,
                      fit_tol: float = 1e-4,
                      is_particle: bool = True,
                      max_undetectable_w_fr: float = 0,
                      force_single_iteration: bool = False,
                      interrupt_fits_bad_spectra: bool = False,
                      print_results: bool = True,
                      quant_verbose: bool = True,
                      fitting_verbose: bool = True
):
    """
    Run fitting for multiple spectra across multiple samples to extract values of fitting parameters.
        
    Parameters
    ----------
    sample_IDs : list of str
        List of sample identifiers.
    spectrum_IDs : list of int or str
        List of spectrum IDs to process (values reported in 'Spectrum #' column in Data.csv),
        or 'all' to process all spectra in each sample.
    is_standard : bool
        Defines whether measurement is of a standard (i.e., well defined composition) or not
    fit_params_vals_to_extract : list of str, optional
        List of fitting parameter names whose value to extract and save
    samples_path : str, optional
        Base directory where results are stored. Default: autoemxsp/Results
    output_path : str, optional
        Directory where the extracted values of fitted parameters are saved. Default: /Fitting output 
    use_instrument_background : bool, optional
        Whether to use instrument background if present.
    quantify_plot : bool, optional
        Whether to plot quantification results.
    plot_signal : bool, optional
        Whether to plot the signal.
    zoom_plot : bool, optional
        Whether to zoom on a specific line.
    line_to_plot : str, optional
        Line to zoom in plot.
    els_substrate : list, optional
        List of substrate elements.
    fit_tol : float, optional
        Fit tolerance.
    is_particle : bool, optional
        If True, treat sample as particle (powder).
    max_undetectable_w_fr : float, optional
        Maximum allowed weight fraction for undetectable elements (default: 0).
    force_single_iteration : bool, optional
        If True, quantification will be run for a single iteration only (default: False).
    interrupt_fits_bad_spectra : bool, optional
        If True, interrupt fitting if bad spectra are detected (default: False).
    print_results : bool, optional
        If True, prints all fitted parameters and their values (default: True).
    quant_verbose : bool, optional
        If True, prints quantification operations
    fitting_verbose : bool, optional
        If True, prints fitting operations
        
    Returns
    -------
    quantifier : XSp_Quantifier
        The quantifier object containing the results, fit parameters, and methods for further analysis and plotting.
    """
    
    if samples_path is None:
        parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        samples_path = os.path.join(parent_dir, cnst.RESULTS_DIR)
    
    print_double_separator()
    logging.info("Starting batch fitting process...")
    logging.info(f"Results path: {samples_path}")
    
    extracted_par_vals = {}
    for sample_ID in sample_IDs:
        print_double_separator()
        logging.info(f"Processing sample '{sample_ID}'...")
        try:
            sample_dir = get_sample_dir(samples_path, sample_ID)
        except Exception as e:
            logging.warning("Failed to get sample directory for %s: %s", sample_ID, e)
            continue
        data_filename = cnst.STDS_MEAS_FILENAME if is_standard else cnst.DATA_FILENAME
        data_path = os.path.join(sample_dir, f"{data_filename}.csv")

        if not os.path.exists(data_path):
            logging.warning(f"Data file not found for sample '{sample_ID}'. Skipping.")
            continue

        try:
            df = pd.read_csv(data_path)
        except Exception as e:
            logging.warning(f"Could not read {data_path} for sample '{sample_ID}': {e}")
            continue

        if cnst.SP_ID_DF_KEY not in df.columns:
            logging.warning(f"Column '{cnst.SP_ID_DF_KEY}' not found in {data_path}. Skipping sample '{sample_ID}'.")
            continue

        # Determine spectra to process
        if spectrum_IDs == 'all' or (isinstance(spectrum_IDs, list) and len(spectrum_IDs) == 1 and spectrum_IDs[0] == 'all'):
            spectra_to_process = df[cnst.SP_ID_DF_KEY].unique()
            logging.info(f"Found {len(spectra_to_process)} spectra for sample '{sample_ID}'.")
        else:
            spectra_to_process = spectrum_IDs
            logging.info(f"Processing specified spectra for sample '{sample_ID}': {spectra_to_process}")
        
        sample_fit_results = []
        for sp_id in spectra_to_process:
            print_double_separator()
            logging.info(f"Fitting Sample '{sample_ID}', Spectrum {sp_id} (fit only, no quantification)")
            try:
                quantifier = fit_and_quantify_spectrum_fromDatacsv(
                    sample_ID=sample_ID,
                    spectrum_ID=sp_id,
                    is_standard = is_standard,
                    results_path=samples_path,
                    spectrum_lims = spectrum_lims,
                    use_instrument_background = use_instrument_background,
                    quantify_plot = False,
                    plot_signal = plot_signal,
                    zoom_plot = zoom_plot,
                    line_to_plot = line_to_plot,
                    els_substrate = els_substrate,
                    fit_tol = fit_tol,
                    is_particle = is_particle,
                    max_undetectable_w_fr = max_undetectable_w_fr,
                    force_single_iteration = force_single_iteration,
                    interrupt_fits_bad_spectra = interrupt_fits_bad_spectra,
                    print_results=print_results,
                    quant_verbose = quant_verbose,
                    fitting_verbose = fitting_verbose
                )
            except Exception as e:
                logging.exception(f"Error fitting spectrum {sp_id} for sample '{sample_ID}': {e}")
                sample_fit_results.append(None)
            else:
                if fit_params_vals_to_extract and quantifier.bad_quant_flag is None:
                    params = quantifier.fit_result.params
                    extracted_vals = {}
                    for param_name in fit_params_vals_to_extract:
                        if param_name in params:
                            extracted_vals[param_name] = params[param_name].value
                        else:
                            extracted_vals[param_name] = pd.NA
                    sample_fit_results.append({'sp_id' : sp_id, **extracted_vals})
                else:
                    sample_fit_results.append(None)
                    
        if fit_params_vals_to_extract:
            # Create DataFrame from current sample_fit_results
            filtered_results = [item for item in sample_fit_results if item is not None] # Remove None entries
            temp_df = pd.DataFrame(filtered_results)
            
            # Calculate mean and std for numeric columns (excluding sp_id)
            mean_vals = temp_df.drop(columns=['sp_id']).mean(numeric_only=True).to_dict()
            std_vals = temp_df.drop(columns=['sp_id']).std(numeric_only=True).to_dict()
            
            # Append mean and std rows directly to sample_fit_results
            sample_fit_results.append({'sp_id': 'mean', **mean_vals})
            sample_fit_results.append({'sp_id': 'std', **std_vals})
            
            # Now create final DataFrame
            results_df = pd.DataFrame(sample_fit_results)
            
            # Save without index
            if not os.path.exists(output_path):
                os.makedirs(output_path)
            now = datetime.now()
            now_formatted = now.strftime("%Y%m%d_%Hh%Mm")
            file_path = os.path.join(output_path, f"{now_formatted}_{sample_ID}_FitParamVals.csv")
            results_df.to_csv(file_path, index=False)
            
            extracted_par_vals[sample_ID] = sample_fit_results 
        else:
            extracted_par_vals[sample_ID] = None
                    
    logging.info("Batch fitting process completed.")
    
    return extracted_par_vals