#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Batch fitting of multiple X-ray spectra for fitting parameter extraction.
This module allows running the fitting step for multiple spectra across multiple samples.
It uses the `fit_and_quantify_spectrum` function internally with quantification disabled,
then extracts the values of desired fitting parameters.
Features
---------
- Accepts a list of sample IDs and spectrum IDs.
- Supports 'all' spectra mode for each sample.
Example
--------
>>> from autoemxsp.runners import batch_fit_spectra
>>> batch_fit_spectra(
... sample_IDs=["Sample1", "Sample2"],
... spectrum_IDs="all",
... plot_signal=False
... )
Created on Fri Aug 20 09:34:34 2025
@author: Andrea
"""
import os
import logging
import pandas as pd
from datetime import datetime
from typing import List, Optional
import autoemxsp.utils.constants as cnst
from autoemxsp.utils import get_sample_dir, print_double_separator
from autoemxsp.runners.fit_and_quantify_spectrum import fit_and_quantify_spectrum
# Configure logging (same style as fit_and_quantify_spectrum)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
[docs]
def batch_fit_spectra(sample_IDs,
spectrum_IDs,
is_standard: bool,
fit_params_vals_to_extract: Optional[List[str]] = None,
spectrum_lims: tuple = None,
samples_path: str = None,
output_path: str = 'Fitting output',
use_instrument_background: bool = False,
quantify_plot: bool = True,
plot_signal: bool = True,
zoom_plot: bool = False,
line_to_plot: str = '',
els_substrate: list = None,
fit_tol: float = 1e-4,
is_particle: bool = True,
max_undetectable_w_fr: float = 0,
force_single_iteration: bool = False,
interrupt_fits_bad_spectra: bool = False,
print_results: bool = True,
quant_verbose: bool = True,
fitting_verbose: bool = True
):
"""
Run fitting for multiple spectra across multiple samples to extract values of fitting parameters.
Parameters
----------
sample_IDs : list of str
List of sample identifiers.
spectrum_IDs : list of int or str
List of spectrum IDs to process (values reported in 'Spectrum #' column in Data.csv),
or 'all' to process all spectra in each sample.
is_standard : bool
Defines whether measurement is of a standard (i.e., well defined composition) or not
fit_params_vals_to_extract : list of str, optional
List of fitting parameter names whose value to extract and save
samples_path : str, optional
Base directory where results are stored. Default: autoemxsp/Results
output_path : str, optional
Directory where the extracted values of fitted parameters are saved. Default: /Fitting output
use_instrument_background : bool, optional
Whether to use instrument background if present.
quantify_plot : bool, optional
Whether to plot quantification results.
plot_signal : bool, optional
Whether to plot the signal.
zoom_plot : bool, optional
Whether to zoom on a specific line.
line_to_plot : str, optional
Line to zoom in plot.
els_substrate : list, optional
List of substrate elements.
fit_tol : float, optional
Fit tolerance.
is_particle : bool, optional
If True, treat sample as particle (powder).
max_undetectable_w_fr : float, optional
Maximum allowed weight fraction for undetectable elements (default: 0).
force_single_iteration : bool, optional
If True, quantification will be run for a single iteration only (default: False).
interrupt_fits_bad_spectra : bool, optional
If True, interrupt fitting if bad spectra are detected (default: False).
print_results : bool, optional
If True, prints all fitted parameters and their values (default: True).
quant_verbose : bool, optional
If True, prints quantification operations
fitting_verbose : bool, optional
If True, prints fitting operations
Returns
-------
quantifier : XSp_Quantifier
The quantifier object containing the results, fit parameters, and methods for further analysis and plotting.
"""
if samples_path is None:
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
samples_path = os.path.join(parent_dir, cnst.RESULTS_DIR)
print_double_separator()
logging.info("Starting batch fitting process...")
logging.info(f"Results path: {samples_path}")
extracted_par_vals = {}
for sample_ID in sample_IDs:
print_double_separator()
logging.info(f"Processing sample '{sample_ID}'...")
try:
sample_dir = get_sample_dir(samples_path, sample_ID)
except Exception as e:
logging.warning("Failed to get sample directory for %s: %s", sample_ID, e)
continue
data_filename = cnst.STDS_MEAS_FILENAME if is_standard else cnst.DATA_FILENAME
data_path = os.path.join(sample_dir, f"{data_filename}.csv")
if not os.path.exists(data_path):
logging.warning(f"Data file not found for sample '{sample_ID}'. Skipping.")
continue
try:
df = pd.read_csv(data_path)
except Exception as e:
logging.warning(f"Could not read {data_path} for sample '{sample_ID}': {e}")
continue
if cnst.SP_ID_DF_KEY not in df.columns:
logging.warning(f"Column '{cnst.SP_ID_DF_KEY}' not found in {data_path}. Skipping sample '{sample_ID}'.")
continue
# Determine spectra to process
if spectrum_IDs == 'all' or (isinstance(spectrum_IDs, list) and len(spectrum_IDs) == 1 and spectrum_IDs[0] == 'all'):
spectra_to_process = df[cnst.SP_ID_DF_KEY].unique()
logging.info(f"Found {len(spectra_to_process)} spectra for sample '{sample_ID}'.")
else:
spectra_to_process = spectrum_IDs
logging.info(f"Processing specified spectra for sample '{sample_ID}': {spectra_to_process}")
sample_fit_results = []
for sp_id in spectra_to_process:
print_double_separator()
logging.info(f"Fitting Sample '{sample_ID}', Spectrum {sp_id} (fit only, no quantification)")
try:
quantifier = fit_and_quantify_spectrum(
sample_ID=sample_ID,
spectrum_ID=sp_id,
is_standard = is_standard,
results_path=samples_path,
spectrum_lims = spectrum_lims,
use_instrument_background = use_instrument_background,
quantify_plot = False,
plot_signal = plot_signal,
zoom_plot = zoom_plot,
line_to_plot = line_to_plot,
els_substrate = els_substrate,
fit_tol = fit_tol,
is_particle = is_particle,
max_undetectable_w_fr = max_undetectable_w_fr,
force_single_iteration = force_single_iteration,
interrupt_fits_bad_spectra = interrupt_fits_bad_spectra,
print_results=print_results,
quant_verbose = quant_verbose,
fitting_verbose = fitting_verbose
)
except Exception as e:
logging.exception(f"Error fitting spectrum {sp_id} for sample '{sample_ID}': {e}")
sample_fit_results.append(None)
else:
if fit_params_vals_to_extract and quantifier.bad_quant_flag is None:
params = quantifier.fit_result.params
extracted_vals = {}
for param_name in fit_params_vals_to_extract:
if param_name in params:
extracted_vals[param_name] = params[param_name].value
else:
extracted_vals[param_name] = pd.NA
sample_fit_results.append({'sp_id' : sp_id, **extracted_vals})
else:
sample_fit_results.append(None)
if fit_params_vals_to_extract:
# Create DataFrame from current sample_fit_results
filtered_results = [item for item in sample_fit_results if item is not None] # Remove None entries
temp_df = pd.DataFrame(filtered_results)
# Calculate mean and std for numeric columns (excluding sp_id)
mean_vals = temp_df.drop(columns=['sp_id']).mean(numeric_only=True).to_dict()
std_vals = temp_df.drop(columns=['sp_id']).std(numeric_only=True).to_dict()
# Append mean and std rows directly to sample_fit_results
sample_fit_results.append({'sp_id': 'mean', **mean_vals})
sample_fit_results.append({'sp_id': 'std', **std_vals})
# Now create final DataFrame
results_df = pd.DataFrame(sample_fit_results)
# Save without index
if not os.path.exists(output_path):
os.makedirs(output_path)
now = datetime.now()
now_formatted = now.strftime("%Y%m%d_%Hh%Mm")
file_path = os.path.join(output_path, f"{now_formatted}_{sample_ID}_FitParamVals.csv")
results_df.to_csv(file_path, index=False)
extracted_par_vals[sample_ID] = sample_fit_results
else:
extracted_par_vals[sample_ID] = None
logging.info("Batch fitting process completed.")
return extracted_par_vals