Source code for snf_simulations.utils
"""Utility functions for spectrum interpolation and sampling."""
import numpy as np
[docs]
def linear_interpolate_with_errors(
original_bins: np.ndarray,
original_content: np.ndarray,
original_errors: np.ndarray,
new_bins: np.ndarray,
) -> tuple[np.ndarray, np.ndarray]:
"""Linearly interpolate histogram content and propagate errors onto new bins."""
if len(original_bins) != len(original_content) + 1:
msg = "original_bins must have length len(original_content) + 1"
raise ValueError(msg)
if len(original_errors) != len(original_content):
msg = "original_errors must have the same length as original_content"
raise ValueError(msg)
if len(original_bins) < 2: # noqa: PLR2004
msg = "original_bins must have at least two values"
raise ValueError(msg)
if len(new_bins) < 2: # noqa: PLR2004
msg = "new_bins must have at least two values"
raise ValueError(msg)
if np.any(np.diff(original_bins) <= 0):
msg = "original_bins must be strictly increasing"
raise ValueError(msg)
if np.any(np.diff(new_bins) <= 0):
msg = "new_bins must be strictly increasing"
raise ValueError(msg)
# Interpolate new content values
original_centres = (original_bins[:-1] + original_bins[1:]) / 2
new_centres = (new_bins[:-1] + new_bins[1:]) / 2
new_content = np.interp(new_centres, original_centres, original_content)
# Any extrapolated bins outside the original range should be set to zero
lower_edges = new_bins[:-1]
upper_edges = new_bins[1:]
lower_mask = upper_edges <= original_bins[0]
upper_mask = lower_edges >= original_bins[-1]
extrapolation_mask = lower_mask | upper_mask
new_content[extrapolation_mask] = 0
# Propagate errors
new_errors = np.zeros_like(new_centres)
for i, centre in enumerate(new_centres):
# Check for extrapolated bins - these should keep zero error
if extrapolation_mask[i]:
continue
# Handle boundary centres explicitly to avoid out-of-range indexing.
# For bins that overlap the original edge range but whose centres fall outside
# the original centre range, keep the nearest edge-bin error constant.
# This avoids pseudo-bin (under/overflow) interpolation at the boundaries.
if centre <= original_centres[0] or np.isclose(centre, original_centres[0]):
new_errors[i] = original_errors[0]
continue
if centre >= original_centres[-1] or np.isclose(centre, original_centres[-1]):
new_errors[i] = original_errors[-1]
continue
# Find the two closest original bin centers.
# Using np.searchsorted finds the "insertion point" for the new centre,
# i.e. the index of where it would go to keep the array sorted.
# So if the original_centres are [1, 2, 3] and centre is 2.5, idx will be 2
# as it would fit between 2 (index 1) and 3 (index 2).
# Therefore the surrounding bins are at idx-1 and idx.
idx = int(np.searchsorted(original_centres, centre))
lower_idx = idx - 1
upper_idx = idx
# Calculate new error by propagating errors from the two surrounding bins,
# weighted by distance to the new centre.
c_lower = original_centres[lower_idx]
c_upper = original_centres[upper_idx]
err_lower = original_errors[lower_idx]
err_upper = original_errors[upper_idx]
weight_lower = (c_upper - centre) / (c_upper - c_lower)
weight_upper = (centre - c_lower) / (c_upper - c_lower)
new_errors[i] = np.sqrt(
weight_lower**2 * err_lower**2 + weight_upper**2 * err_upper**2,
)
return new_content, new_errors
[docs]
def sample_histogram(
bin_edges: np.ndarray,
bin_contents: np.ndarray,
samples: int = 100,
seed: int | None = None,
) -> np.ndarray:
"""Sample x values from histogram bins, similar to ROOT TH1::GetRandom.
Args:
bin_edges: 1D array of bin edges with length N+1.
bin_contents: 1D array of bin contents with length N.
samples: Number of samples to draw.
seed: Seed for reproducible random sampling.
Returns:
Array of sampled x values.
"""
if bin_edges.ndim != 1 or bin_contents.ndim != 1:
msg = "bin_edges and bin_contents must be 1D arrays"
raise ValueError(msg)
if len(bin_edges) != len(bin_contents) + 1:
msg = "bin_edges must have length len(bin_contents) + 1"
raise ValueError(msg)
widths = np.diff(bin_edges)
if np.any(widths <= 0):
msg = "bin_edges must be strictly increasing"
raise ValueError(msg)
if np.any(bin_contents < 0):
msg = "bin_contents must be non-negative"
raise ValueError(msg)
# Match ROOT TH1::GetRandom behaviour: bin selection probability is
# proportional to bin content, then sample uniformly within the selected bin.
weights = bin_contents
total_weight = np.sum(weights)
if total_weight <= 0: # Avoid division by zero errors
msg = "Histogram has zero total area; cannot sample"
raise ValueError(msg)
probabilities = weights / total_weight
# Use numpy's random choice to select X bins according to their probabilities,
# for the requested number of samples.
rng = np.random.default_rng(seed)
sampled_indices = rng.choice(len(bin_contents), size=samples, p=probabilities)
# Finally, for each bin take a uniform sample between the upper and lower edges.
# This gives a continuous distribution of sampled x values from within the bins.
lower = bin_edges[sampled_indices]
upper = bin_edges[sampled_indices + 1]
return rng.uniform(lower, upper)