Source code for sluyspy.fit

# -*- coding: utf-8 -*-
# SPDX-License-Identifier: EUPL-1.2
#  
#  Copyright (c) 2022-2024  Marc van der Sluys - Nikhef/Utrecht University - marc.vandersluys.nl
#  
#  This file is part of the sluyspy Python package:
#  Marc van der Sluys' personal Python modules.
#  See: https://github.com/MarcvdSluys/sluyspy
#  
#  This is free software: you can redistribute it and/or modify it under the terms of the European Union
#  Public Licence 1.2 (EUPL 1.2).  This software is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#  PURPOSE.  See the EU Public Licence for more details.  You should have received a copy of the European
#  Union Public Licence along with this code.  If not, see <https://www.eupl.eu/1.2/en/>.


"""Fitting functions for the sluyspy package"""


import numpy as _np
import pandas.core as _pdc
import sluyspy.cli as _scli


[docs] def np_polyfit_chi2(xvals, yvals, order, ysigmas=None, verbosity=0): """Wrapper for NumPy polyfit, that returns the fitting coefficients and reduced chi squared. Print details if desired. Note: does not compute the uncertainties in the coefficients; use scipy_curvefit_chi2() for that. Parameters: xvals (float): X values. yvals (float): Y values. order (int): Polynomial order for fit. ysigmas (float): Uncertainties (errors, standard deviations) in the y values; defaults to None -> all sigmas=1. verbosity (int): Verbosity: 0: quiet, 1: print red.chi2, 2: print more, 3: print table, 4: print all; defaults to 0. Returns: (tuple): Tuple consisting of (coefs, red_chi2): - coefs (float): Array of fitting coefficients, with length (order+1). - red_chi2 (float): Reduced chi squared (chi^2 / (n-m)) for the fit. """ if ysigmas is None: if verbosity>0: print('ysigmas=None; assuming sigma=1 for all data points.') coefs, resids, rank, sing_vals, rcond = _np.polyfit(xvals, yvals, order, full=True) ysigmas = yvals*0 + 1 # This is implicitly assumed in the previous line; works for pd.Series and numpy arrays else: coefs, resids, rank, sing_vals, rcond = _np.polyfit(xvals, yvals, order, w=1/ysigmas, full=True) # Note, not 1/sigma**2! # Print polyfit-specific details: if verbosity>3: print('coefs: ', coefs) print('resids: ', resids) print('rank: ', rank) print('sing_vals: ', sing_vals) print('rcond: ', rcond) # Compute reduced chi^2 and print general fit details: red_chi2 = print_fit_details('np_polyfit', coefs, xvals,yvals,ysigmas, verbosity=verbosity) return coefs, red_chi2
[docs] def scipy_curvefit_chi2(fit_fun, xvals, yvals, coefs0, ysigmas=None, verbosity=0): """Wrapper for SciPy's curve_fit, that returns the fitting coefficients and reduced chi^2. Print details if desired. Parameters: fit_fun (function): Fitting function, should look like def fit_fun(x, a,b,c): and return y=f(x, a,b,c). xvals (float): Array containing x values to fit. yvals (float): Array containing y values to fit. coefs0 (float): Array with initial guess for fitting coefficients. ysigmas (float): Array containing y sigmas/uncertainties. verbosity (int): Verbosity to stdout (0-4). Returns: (tuple): Tuple containing (coefs, dcoefs, red_chi2, var_cov, ier): - coefs (float): Array containing the final fitting coefficients. - dcoefs (float): Array containing uncertainties on the fitting coefficients. - red_chi2 (float): Reduced chi squared (chi2/(n-m)). - var_cov (float): 2D array containing the variance-covariance matrix. - ier (int): Return value, >0 if the fit succeeded. """ from scipy.optimize import curve_fit as _curve_fit try: # Do the fit: coefs, var_cov, infodict, mesg, ier = _curve_fit(fit_fun, xvals, yvals, p0=coefs0, sigma=ysigmas, method='lm', full_output=True) # If call failed: except Exception as e: if verbosity>0: print(__name__+': fit failed: ', end='') print(e) ier = -1 # If call failed or fit did not converge: if ier<=0: if verbosity>0: print('Fit failed: %i' % ier) return None, None, None, None, ier # Print specific curve_fit output: if verbosity>3: print('Fit success: ', ier) print('Message: ', mesg) if verbosity>4: print('Info dict:\n', infodict, '\n') print('Initial coefficients: ', coefs0) print('Final coefficients: ', coefs) print('Variance/covariance matrix: \n', var_cov) # Compute some fit-quality parameters: if ysigmas is None: ysigmas = yvals*0 + 1 # This is implicitly assumed in the previous line; works for pd.Series and numpy arrays dcoefs = _np.sqrt(_np.diag(var_cov)) # Standard deviations on the coefficients # Compute reduced chi^2 and print general fit details: red_chi2 = print_fit_details('scipy_curvefit', coefs, xvals,yvals,ysigmas, dcoefs=dcoefs, var_cov=var_cov, fit_fun=fit_fun, verbosity=verbosity) return coefs, dcoefs, red_chi2, var_cov, ier
[docs] def correlation_matrix_from_variance_covariance_matrix(var_cov): """Compute the normalised correlation matrix from a variance-covariance matrix. Parameters: var_cov (float): 2D NumPy array containing the variance-covariance matrix. Returns: (float): 2D NumPy array containing the normalised-correlation matrix. """ corr = _np.zeros_like(var_cov) matsize = _np.shape(corr)[0] for i in range(matsize): for j in range(matsize): corr[i][j] = var_cov[i,j] / _np.sqrt(var_cov[i,i] * var_cov[j,j]) return corr
[docs] def polynomial(x, *coefs): """Return a polynomial y = Sum_i=0^N-1 coef_i x^i. Parameters: x (float): X value(s). coefs (float): Array of polynomial coefficients. The polynomial order is defined by the number of coefficients in the array. Returns: (float): Polynomial value. """ y = 0 i = 0 for coef in coefs: y += coef * _np.power(x, i) i += 1 return y
def _print_matrix(mat, mat_type, coef_names=None): """Print the contents of a 2D (correlation or covariance) matrix. Parameters: mat (float): 2D NumPy array containing the variance-covariance matrix. """ # Matrix size: matsize = _np.shape(mat)[0] # Derive fomatting for names and numbers from matrix type: if mat_type == 'corr': namfmt = ' %7s' maxnamlen = 7 numfmt = '%8.4f' # ' -0.1238' else: namfmt = ' %12s' maxnamlen = 12 numfmt = '%13.5e' # ' -1.12345e-13' # Length of coefficient names: if coef_names is None: namlen = 0 else: namlen = len(coef_names[0]) # Print row of coefficient numbers: print(' '*(3+namlen), end='') # Spaces: c_i + name length for i in range(matsize): print(namfmt % ('c'+str(i)), end='') print() # Print row of coefficient names: if coef_names is not None: print(' '*(3+namlen), end='') # Spaces: c_i + name length for i in range(matsize): print(namfmt % (coef_names[i].strip()[:maxnamlen]), end='') # Cut off name to preserve table formatting print() # Print columns with coefficient numbers, coefficient names and values: for i in range(matsize): if matsize > 10: print('c%2i' % (i), end='') else: print('c%1i ' % (i), end='') if coef_names is not None: print(coef_names[i], end='') for j in range(matsize): print(numfmt % (mat[i,j]), end='') print() return