From 2d11249bc56e7c94e053c092605228aa6ec6f538 Mon Sep 17 00:00:00 2001 From: Justus Helo Date: Sun, 8 Feb 2026 17:14:25 +0200 Subject: [PATCH 1/5] Ignore virtual environment venv (#43) --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 6a9cd33..a91fe95 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ sdist/* docs/html docs/jupyter_execute app.html + +# Virtual environment +venv/ \ No newline at end of file From bf93d74becdc93877f1f0ead940615e29f0e08dd Mon Sep 17 00:00:00 2001 From: Justus Helo Date: Sun, 8 Feb 2026 17:19:46 +0200 Subject: [PATCH 2/5] Fix code style and end-of-file issues (#43) --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a91fe95..a1bfbb1 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,4 @@ docs/jupyter_execute app.html # Virtual environment -venv/ \ No newline at end of file +venv/ From acc0f66fc61f019fe7eb743e07682956856cd508 Mon Sep 17 00:00:00 2001 From: Justus Helo Date: Thu, 12 Feb 2026 14:37:20 +0200 Subject: [PATCH 3/5] Fix sensitivity indices to match Matlab SOE calculation. --- src/simdec/sensitivity_indices.py | 88 ++++++++++++++++++------------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/src/simdec/sensitivity_indices.py b/src/simdec/sensitivity_indices.py index 531c496..232ffcf 100644 --- a/src/simdec/sensitivity_indices.py +++ b/src/simdec/sensitivity_indices.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -import warnings import numpy as np import pandas as pd @@ -61,7 +60,7 @@ def sensitivity_indices( Sensitivity indices, combined effect of each input. foe : ndarray of shape (n_factors, 1) First-order effects (also called 'main' or 'individual'). - soe : ndarray of shape (n_factors, 1) + soe_full : ndarray of shape (n_factors, 1) Second-order effects (also called 'interaction'). Examples @@ -96,15 +95,21 @@ def sensitivity_indices( array([0.43157591, 0.44241433, 0.11767249]) """ + # Handle inputs conversion if isinstance(inputs, pd.DataFrame): cat_columns = inputs.select_dtypes(["category", "O"]).columns inputs[cat_columns] = inputs[cat_columns].apply( lambda x: x.astype("category").cat.codes ) inputs = inputs.to_numpy() - if isinstance(output, pd.DataFrame): + + # Handle output conversion first, then flatten + if isinstance(output, (pd.DataFrame, pd.Series)): output = output.to_numpy() + # Flatten output if it's (N, 1) + output = output.flatten() + n_runs, n_factors = inputs.shape n_bins_foe, n_bins_soe = number_of_bins(n_runs, n_factors) @@ -116,55 +121,64 @@ def sensitivity_indices( soe = np.zeros((n_factors, n_factors)) for i in range(n_factors): - # first order + # 1. First-order effects (FOE) xi = inputs[:, i] bin_avg, _, binnumber = stats.binned_statistic( - x=xi, values=output, bins=n_bins_foe + x=xi, values=output, bins=n_bins_foe, statistic="mean" ) - # can have NaN in the average but no corresponding binnumber - bin_avg = bin_avg[~np.isnan(bin_avg)] - bin_counts = np.unique(binnumber, return_counts=True)[1] - # weighted variance and divide by the overall variance of the output - foe[i] = _weighted_var(bin_avg, weights=bin_counts) / var_y + # Filter empty bins and get weights (counts) + mask_foe = ~np.isnan(bin_avg) + mean_i_foe = bin_avg[mask_foe] + # binnumber starts at 1; 0 is for values outside range + bin_counts_foe = np.unique(binnumber[binnumber > 0], return_counts=True)[1] - # second order + foe[i] = _weighted_var(mean_i_foe, weights=bin_counts_foe) / var_y + + # 2. Second-order effects (SOE) for j in range(n_factors): - if i == j or j < i: + if j <= i: continue xj = inputs[:, j] - bin_avg, *edges, binnumber = stats.binned_statistic_2d( + # 2D Binned Statistic for Var(E[Y|Xi, Xj]) + bin_avg_ij, x_edges, y_edges, binnumber_ij = stats.binned_statistic_2d( x=xi, y=xj, values=output, bins=n_bins_soe, expand_binnumbers=False ) - mean_ij = bin_avg[~np.isnan(bin_avg)] - bin_counts = np.unique(binnumber, return_counts=True)[1] - var_ij = _weighted_var(mean_ij, weights=bin_counts) - - # expand_binnumbers here - nbin = np.array([len(edges_) + 1 for edges_ in edges]) - binnumbers = np.asarray(np.unravel_index(binnumber, nbin)) + mask_ij = ~np.isnan(bin_avg_ij) + mean_ij = bin_avg_ij[mask_ij] + counts_ij = np.unique(binnumber_ij[binnumber_ij > 0], return_counts=True)[1] + var_ij = _weighted_var(mean_ij, weights=counts_ij) - bin_counts_i = np.unique(binnumbers[0], return_counts=True)[1] - bin_counts_j = np.unique(binnumbers[1], return_counts=True)[1] - - # handle NaNs - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - mean_i = np.nanmean(bin_avg, axis=1) - mean_i = mean_i[~np.isnan(mean_i)] - mean_j = np.nanmean(bin_avg, axis=0) - mean_j = mean_j[~np.isnan(mean_j)] - - var_i = _weighted_var(mean_i, weights=bin_counts_i) - var_j = _weighted_var(mean_j, weights=bin_counts_j) + # Marginal Var(E[Y|Xi]) using n_bins_soe to match MATLAB logic + bin_avg_i_soe, _, binnumber_i_soe = stats.binned_statistic( + x=xi, values=output, bins=n_bins_soe, statistic="mean" + ) + mask_i = ~np.isnan(bin_avg_i_soe) + counts_i = np.unique( + binnumber_i_soe[binnumber_i_soe > 0], return_counts=True + )[1] + var_i_soe = _weighted_var(bin_avg_i_soe[mask_i], weights=counts_i) + + # Marginal Var(E[Y|Xj]) using n_bins_soe to match MATLAB logic + bin_avg_j_soe, _, binnumber_j_soe = stats.binned_statistic( + x=xj, values=output, bins=n_bins_soe, statistic="mean" + ) + mask_j = ~np.isnan(bin_avg_j_soe) + counts_j = np.unique( + binnumber_j_soe[binnumber_j_soe > 0], return_counts=True + )[1] + var_j_soe = _weighted_var(bin_avg_j_soe[mask_j], weights=counts_j) - soe[i, j] = (var_ij - var_i - var_j) / var_y + soe[i, j] = (var_ij - var_i_soe - var_j_soe) / var_y - soe = np.where(soe == 0, soe.T, soe) - si[i] = foe[i] + soe[:, i].sum() / 2 + # Mirror SOE and calculate Combined Effect (SI) + # SI is FOE + half of all interactions associated with that variable + soe_full = soe + soe.T + for k in range(n_factors): + si[k] = foe[k] + (soe_full[:, k].sum() / 2) - return SensitivityAnalysisResult(si, foe, soe) + return SensitivityAnalysisResult(si, foe, soe_full) From 6495b6124822ed5708856cc5664d8b0566c06033 Mon Sep 17 00:00:00 2001 From: Justus Helo Date: Fri, 13 Feb 2026 12:00:13 +0200 Subject: [PATCH 4/5] Fix variable selection logic and match MATLAB state formation --- src/simdec/decomposition.py | 41 +++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/simdec/decomposition.py b/src/simdec/decomposition.py index 3c1220a..958d2aa 100644 --- a/src/simdec/decomposition.py +++ b/src/simdec/decomposition.py @@ -68,7 +68,7 @@ def decomposition( output: pd.DataFrame, *, sensitivity_indices: np.ndarray, - dec_limit: float = 1, + dec_limit: float | None = None, auto_ordering: bool = True, states: list[int] | None = None, statistic: Literal["mean", "median"] | None = "mean", @@ -79,7 +79,7 @@ def decomposition( ---------- inputs : DataFrame of shape (n_runs, n_factors) Input variables. - output : DataFrame of shape (n_runs, 1) + output : DataFrame of shape (n_runs, 1) or (n_runs,) Target variable. sensitivity_indices : ndarray of shape (n_factors, 1) Sensitivity indices, combined effect of each input. @@ -116,7 +116,7 @@ def decomposition( inputs[cat_col] = codes inputs = inputs.to_numpy() - output = output.to_numpy() + output = output.to_numpy().flatten() # 1. variables for decomposition var_order = np.argsort(sensitivity_indices)[::-1] @@ -125,26 +125,41 @@ def decomposition( sensitivity_indices = sensitivity_indices[var_order] if auto_ordering: - n_var_dec = np.where(np.cumsum(sensitivity_indices) < dec_limit)[0].size + # handle edge case where sensitivity indices don't sum exactly to 1.0 + if dec_limit is None: + dec_limit = 0.8 * np.sum(sensitivity_indices) + + cumulative_sum = np.cumsum(sensitivity_indices) + indices_over_limit = np.where(cumulative_sum >= dec_limit)[0] + + if indices_over_limit.size > 0: + n_var_dec = indices_over_limit[0] + 1 + else: + n_var_dec = sensitivity_indices.size + n_var_dec = max(1, n_var_dec) # keep at least one variable n_var_dec = min(5, n_var_dec) # use at most 5 variables else: n_var_dec = inputs.shape[1] - # 2. states formation + # 2. variable selection and reordering + if auto_ordering: + var_names = var_names[var_order[:n_var_dec]].tolist() + inputs = inputs[:, var_order[:n_var_dec]] + else: + var_names = var_names[:n_var_dec].tolist() + inputs = inputs[:, :n_var_dec] + + # 3. states formation (after reordering/selection) if states is None: - states = 3 if n_var_dec < 3 else 2 + states = 3 if n_var_dec <= 2 else 2 states = [states] * n_var_dec for i in range(n_var_dec): n_unique = np.unique(inputs[:, i]).size states[i] = n_unique if n_unique <= 5 else states[i] - if auto_ordering: - var_names = var_names[var_order[:n_var_dec]].tolist() - inputs = inputs[:, var_order[:n_var_dec]] - - # 3. decomposition + # 4. decomposition bins = [] statistic_methods = { @@ -153,8 +168,8 @@ def decomposition( } try: statistic_method = statistic_methods[statistic] - except IndexError: - msg = f"'statistic' must be one of {statistic_methods.values()}" + except KeyError: + msg = f"'statistic' must be one of {statistic_methods.keys()}" raise ValueError(msg) def statistic_(inputs): From dcc7fd7d03d770a10c94baddd2af4ed90352f59a Mon Sep 17 00:00:00 2001 From: Justus Helo Date: Thu, 26 Feb 2026 16:01:29 +0200 Subject: [PATCH 5/5] Update fix of sensitivity_indices.py, add tests for Issue #43 and edit test_decomposition.py to follow new logic --- src/simdec/sensitivity_indices.py | 8 +++--- tests/test_decomposition.py | 4 ++- tests/test_decomposition_43.py | 29 ++++++++++++++++++++++ tests/test_sensitivity_indices_43.py | 37 ++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 tests/test_decomposition_43.py create mode 100644 tests/test_sensitivity_indices_43.py diff --git a/src/simdec/sensitivity_indices.py b/src/simdec/sensitivity_indices.py index 232ffcf..ed9a07b 100644 --- a/src/simdec/sensitivity_indices.py +++ b/src/simdec/sensitivity_indices.py @@ -60,7 +60,7 @@ def sensitivity_indices( Sensitivity indices, combined effect of each input. foe : ndarray of shape (n_factors, 1) First-order effects (also called 'main' or 'individual'). - soe_full : ndarray of shape (n_factors, 1) + soe : ndarray of shape (n_factors, n_factors) Second-order effects (also called 'interaction'). Examples @@ -177,8 +177,8 @@ def sensitivity_indices( # Mirror SOE and calculate Combined Effect (SI) # SI is FOE + half of all interactions associated with that variable - soe_full = soe + soe.T + soe = soe + soe.T for k in range(n_factors): - si[k] = foe[k] + (soe_full[:, k].sum() / 2) + si[k] = foe[k] + (soe[:, k].sum() / 2) - return SensitivityAnalysisResult(si, foe, soe_full) + return SensitivityAnalysisResult(si, foe, soe) diff --git a/tests/test_decomposition.py b/tests/test_decomposition.py index f6f1bfb..fa0989c 100644 --- a/tests/test_decomposition.py +++ b/tests/test_decomposition.py @@ -16,7 +16,9 @@ def test_decomposition(): output_name, *v_names = list(data.columns) inputs, output = data[v_names], data[output_name] si = np.array([0.04, 0.50, 0.11, 0.28]) - res = sd.decomposition(inputs=inputs, output=output, sensitivity_indices=si) + res = sd.decomposition( + inputs=inputs, output=output, sensitivity_indices=si, dec_limit=1 + ) assert res.var_names == ["sigma_res", "R", "Rp0.2", "Kf"] assert res.states == [2, 2, 2, 2] diff --git a/tests/test_decomposition_43.py b/tests/test_decomposition_43.py new file mode 100644 index 0000000..21add5a --- /dev/null +++ b/tests/test_decomposition_43.py @@ -0,0 +1,29 @@ +import numpy as np +import pandas as pd +from scipy.stats import qmc, uniform, lognorm +import simdec as sd + + +def test_decomposition_default(): + m = 13 + sampler = qmc.Sobol(d=2, scramble=True, seed=42) + sample = sampler.random_base2(m=m) + + # deposit_0: uniform(500, 1500) + deposit_0 = uniform.ppf(sample[:, 0], loc=500, scale=1000) + + # interest_rate: lognormal + sigma = 0.5 + mu = np.log(0.01) + sigma**2 + interest_rate = lognorm.ppf(sample[:, 1], s=sigma, scale=np.exp(mu)) + + deposit_20 = deposit_0 * (1 + interest_rate) ** 20 + + inputs = pd.DataFrame({"deposit_0": deposit_0, "interest_rate": interest_rate}) + output = pd.Series(deposit_20, name="deposit_20") + indices = sd.sensitivity_indices(inputs=inputs, output=output) + si = indices.si + res = sd.decomposition(inputs=inputs, output=output, sensitivity_indices=si) + + assert len(res.var_names) == 2 + assert res.var_names == ["deposit_0", "interest_rate"] diff --git a/tests/test_sensitivity_indices_43.py b/tests/test_sensitivity_indices_43.py new file mode 100644 index 0000000..61a8a23 --- /dev/null +++ b/tests/test_sensitivity_indices_43.py @@ -0,0 +1,37 @@ +import numpy as np +import numpy.testing as npt +import pandas as pd +from scipy.stats import qmc, uniform, lognorm +import simdec as sd + +# Testing fix for issue #43 + + +def test_sensitivity_indices_43(): + m = 13 + sampler = qmc.Sobol(d=2, scramble=True, seed=42) + sample = sampler.random_base2(m=m) + + # deposit_0: uniform(500, 1500) + deposit_0 = uniform.ppf(sample[:, 0], loc=500, scale=1000) + + # interest_rate: lognormal + sigma = 0.5 + mu = np.log(0.01) + sigma**2 + interest_rate = lognorm.ppf(sample[:, 1], s=sigma, scale=np.exp(mu)) + + deposit_20 = deposit_0 * (1 + interest_rate) ** 20 + + inputs = pd.DataFrame({"deposit_0": deposit_0, "interest_rate": interest_rate}) + output = pd.Series(deposit_20, name="deposit_20") + + res = sd.sensitivity_indices(inputs, output) + + # MATLAB Results + expected_si = np.array([0.7101, 0.2739]) + expected_foe = np.array([0.7028, 0.2666]) + expected_soe_12 = 0.0146 + + npt.assert_allclose(res.si, expected_si, atol=3e-2) + npt.assert_allclose(res.first_order, expected_foe, atol=3e-2) + npt.assert_allclose(res.second_order[0, 1], expected_soe_12, atol=1e-2)