Source code for autoemxsp.runners.batch_fit_spectra

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Batch fitting of multiple X-ray spectra for fitting parameter extraction.

This module allows running the fitting step for multiple spectra across multiple samples.
It uses the `fit_and_quantify_spectrum` function internally with quantification disabled,
then extracts the values of desired fitting parameters.

Features
---------
    - Accepts a list of sample IDs and spectrum IDs.
    - Supports 'all' spectra mode for each sample.

Example
--------
>>> from autoemxsp.runners import batch_fit_spectra
>>> batch_fit_spectra(
...     sample_IDs=["Sample1", "Sample2"],
...     spectrum_IDs="all",
...     plot_signal=False
... )

Created on Fri Aug 20 09:34:34 2025

@author: Andrea
"""

import os
import logging
import pandas as pd
from datetime import datetime
from typing import List, Optional

import autoemxsp.utils.constants as cnst
from autoemxsp.utils import get_sample_dir, print_double_separator
from autoemxsp.runners.fit_and_quantify_spectrum import fit_and_quantify_spectrum

# Configure logging (same style as fit_and_quantify_spectrum)
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s: %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)


[docs]
def batch_fit_spectra(sample_IDs,
                      spectrum_IDs,
                      is_standard: bool,
                      fit_params_vals_to_extract: Optional[List[str]] = None,
                      spectrum_lims: tuple = None,
                      samples_path: str = None,
                      output_path: str = 'Fitting output',
                      use_instrument_background: bool = False,
                      quantify_plot: bool = True,
                      plot_signal: bool = True,
                      zoom_plot: bool = False,
                      line_to_plot: str = '',
                      els_substrate: list = None,
                      fit_tol: float = 1e-4,
                      is_particle: bool = True,
                      max_undetectable_w_fr: float = 0,
                      force_single_iteration: bool = False,
                      interrupt_fits_bad_spectra: bool = False,
                      print_results: bool = True,
                      quant_verbose: bool = True,
                      fitting_verbose: bool = True
):
    """
    Run fitting for multiple spectra across multiple samples to extract values of fitting parameters.
        
    Parameters
    ----------
    sample_IDs : list of str
        List of sample identifiers.
    spectrum_IDs : list of int or str
        List of spectrum IDs to process (values reported in 'Spectrum #' column in Data.csv),
        or 'all' to process all spectra in each sample.
    is_standard : bool
        Defines whether measurement is of a standard (i.e., well defined composition) or not
    fit_params_vals_to_extract : list of str, optional
        List of fitting parameter names whose value to extract and save
    samples_path : str, optional
        Base directory where results are stored. Default: autoemxsp/Results
    output_path : str, optional
        Directory where the extracted values of fitted parameters are saved. Default: /Fitting output 
    use_instrument_background : bool, optional
        Whether to use instrument background if present.
    quantify_plot : bool, optional
        Whether to plot quantification results.
    plot_signal : bool, optional
        Whether to plot the signal.
    zoom_plot : bool, optional
        Whether to zoom on a specific line.
    line_to_plot : str, optional
        Line to zoom in plot.
    els_substrate : list, optional
        List of substrate elements.
    fit_tol : float, optional
        Fit tolerance.
    is_particle : bool, optional
        If True, treat sample as particle (powder).
    max_undetectable_w_fr : float, optional
        Maximum allowed weight fraction for undetectable elements (default: 0).
    force_single_iteration : bool, optional
        If True, quantification will be run for a single iteration only (default: False).
    interrupt_fits_bad_spectra : bool, optional
        If True, interrupt fitting if bad spectra are detected (default: False).
    print_results : bool, optional
        If True, prints all fitted parameters and their values (default: True).
    quant_verbose : bool, optional
        If True, prints quantification operations
    fitting_verbose : bool, optional
        If True, prints fitting operations
        
    Returns
    -------
    quantifier : XSp_Quantifier
        The quantifier object containing the results, fit parameters, and methods for further analysis and plotting.
    """
    
    if samples_path is None:
        parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        samples_path = os.path.join(parent_dir, cnst.RESULTS_DIR)
    
    print_double_separator()
    logging.info("Starting batch fitting process...")
    logging.info(f"Results path: {samples_path}")
    
    extracted_par_vals = {}
    for sample_ID in sample_IDs:
        print_double_separator()
        logging.info(f"Processing sample '{sample_ID}'...")
        try:
            sample_dir = get_sample_dir(samples_path, sample_ID)
        except Exception as e:
            logging.warning("Failed to get sample directory for %s: %s", sample_ID, e)
            continue
        data_filename = cnst.STDS_MEAS_FILENAME if is_standard else cnst.DATA_FILENAME
        data_path = os.path.join(sample_dir, f"{data_filename}.csv")

        if not os.path.exists(data_path):
            logging.warning(f"Data file not found for sample '{sample_ID}'. Skipping.")
            continue

        try:
            df = pd.read_csv(data_path)
        except Exception as e:
            logging.warning(f"Could not read {data_path} for sample '{sample_ID}': {e}")
            continue

        if cnst.SP_ID_DF_KEY not in df.columns:
            logging.warning(f"Column '{cnst.SP_ID_DF_KEY}' not found in {data_path}. Skipping sample '{sample_ID}'.")
            continue

        # Determine spectra to process
        if spectrum_IDs == 'all' or (isinstance(spectrum_IDs, list) and len(spectrum_IDs) == 1 and spectrum_IDs[0] == 'all'):
            spectra_to_process = df[cnst.SP_ID_DF_KEY].unique()
            logging.info(f"Found {len(spectra_to_process)} spectra for sample '{sample_ID}'.")
        else:
            spectra_to_process = spectrum_IDs
            logging.info(f"Processing specified spectra for sample '{sample_ID}': {spectra_to_process}")
        
        sample_fit_results = []
        for sp_id in spectra_to_process:
            print_double_separator()
            logging.info(f"Fitting Sample '{sample_ID}', Spectrum {sp_id} (fit only, no quantification)")
            try:
                quantifier = fit_and_quantify_spectrum(
                    sample_ID=sample_ID,
                    spectrum_ID=sp_id,
                    is_standard = is_standard,
                    results_path=samples_path,
                    spectrum_lims = spectrum_lims,
                    use_instrument_background = use_instrument_background,
                    quantify_plot = False,
                    plot_signal = plot_signal,
                    zoom_plot = zoom_plot,
                    line_to_plot = line_to_plot,
                    els_substrate = els_substrate,
                    fit_tol = fit_tol,
                    is_particle = is_particle,
                    max_undetectable_w_fr = max_undetectable_w_fr,
                    force_single_iteration = force_single_iteration,
                    interrupt_fits_bad_spectra = interrupt_fits_bad_spectra,
                    print_results=print_results,
                    quant_verbose = quant_verbose,
                    fitting_verbose = fitting_verbose
                )
            except Exception as e:
                logging.exception(f"Error fitting spectrum {sp_id} for sample '{sample_ID}': {e}")
                sample_fit_results.append(None)
            else:
                if fit_params_vals_to_extract and quantifier.bad_quant_flag is None:
                    params = quantifier.fit_result.params
                    extracted_vals = {}
                    for param_name in fit_params_vals_to_extract:
                        if param_name in params:
                            extracted_vals[param_name] = params[param_name].value
                        else:
                            extracted_vals[param_name] = pd.NA
                    sample_fit_results.append({'sp_id' : sp_id, **extracted_vals})
                else:
                    sample_fit_results.append(None)
                    
        if fit_params_vals_to_extract:
            # Create DataFrame from current sample_fit_results
            filtered_results = [item for item in sample_fit_results if item is not None] # Remove None entries
            temp_df = pd.DataFrame(filtered_results)
            
            # Calculate mean and std for numeric columns (excluding sp_id)
            mean_vals = temp_df.drop(columns=['sp_id']).mean(numeric_only=True).to_dict()
            std_vals = temp_df.drop(columns=['sp_id']).std(numeric_only=True).to_dict()
            
            # Append mean and std rows directly to sample_fit_results
            sample_fit_results.append({'sp_id': 'mean', **mean_vals})
            sample_fit_results.append({'sp_id': 'std', **std_vals})
            
            # Now create final DataFrame
            results_df = pd.DataFrame(sample_fit_results)
            
            # Save without index
            if not os.path.exists(output_path):
                os.makedirs(output_path)
            now = datetime.now()
            now_formatted = now.strftime("%Y%m%d_%Hh%Mm")
            file_path = os.path.join(output_path, f"{now_formatted}_{sample_ID}_FitParamVals.csv")
            results_df.to_csv(file_path, index=False)
            
            extracted_par_vals[sample_ID] = sample_fit_results 
        else:
            extracted_par_vals[sample_ID] = None
                    
    logging.info("Batch fitting process completed.")
    
    return extracted_par_vals