Module toleranceinterval.oneside.oneside
Expand source code
# -- coding: utf-8 --
# MIT License
#
# Copyright (c) 2019 Charles Jekel
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
from scipy.stats import binom, norm, nct
from ..hk import HansonKoopmans
from ..checks import numpy_array, assert_2d_sort
def normal(x, p, g):
r"""
Compute one-side tolerance bound using the normal distribution.
Computes the one-sided tolerance interval using the normal distribution.
This follows the derivation in [1] to calculate the interval as a factor
of sample standard deviations away from the sample mean. See also [2].
Parameters
----------
x : ndarray (1-D, or 2-D)
Numpy array of samples to compute the tolerance bound. Assumed data
type is np.float. Shape of (m, n) is assumed for 2-D arrays with m
number of sets of sample size n.
p : float
Percentile for the TI to estimate.
g : float
Confidence level where g > 0. and g < 1.
Returns
-------
ndarray (1-D)
The normal distribution toleranace bound.
References
----------
[1] Young, D. S. (2010). tolerance: An R Package for Estimating
Tolerance Intervals. Journal of Statistical Software; Vol 1, Issue 5
(2010). Retrieved from http://dx.doi.org/10.18637/jss.v036.i05
[2] Montgomery, D. C., & Runger, G. C. (2018). Chapter 8. Statistical
Intervals for a Single Sample. In Applied Statistics and Probability
for Engineers, 7th Edition.
Examples
--------
Estimate the 10th percentile lower bound with 95% confidence of the
following 100 random samples from a normal distribution.
>>> import numpy as np
>>> import toleranceinterval as ti
>>> x = np.random.nomral(100)
>>> lb = ti.oneside.normal(x, 0.1, 0.95)
Estimate the 90th percentile upper bound with 95% confidence of the
following 100 random samples from a normal distribution.
>>> ub = ti.oneside.normal(x, 0.9, 0.95)
"""
x = numpy_array(x) # check if numpy array, if not make numpy array
x = assert_2d_sort(x)
m, n = x.shape
if p < 0.5:
p = 1.0 - p
minus = True
else:
minus = False
zp = norm.ppf(p)
t = nct.ppf(g, df=n-1., nc=np.sqrt(n)*zp)
k = t / np.sqrt(n)
if minus:
return x.mean(axis=1) - (k*x.std(axis=1, ddof=1))
else:
return x.mean(axis=1) + (k*x.std(axis=1, ddof=1))
def lognormal(x, p, g):
r"""
Compute one-side tolerance bound using the lognormal distribution.
Computes the one-sided tolerance interval using the lognormal distribution.
This just performs a ln and exp transformations of the normal distribution.
Parameters
----------
x : ndarray (1-D, or 2-D)
Numpy array of samples to compute the tolerance bound. Assumed data
type is np.float. Shape of (m, n) is assumed for 2-D arrays with m
number of sets of sample size n.
p : float
Percentile for the TI to estimate.
g : float
Confidence level where g > 0. and g < 1.
Returns
-------
ndarray (1-D)
The normal distribution toleranace bound.
Examples
--------
Estimate the 10th percentile lower bound with 95% confidence of the
following 100 random samples from a lognormal distribution.
>>> import numpy as np
>>> import toleranceinterval as ti
>>> x = np.random.random(100)
>>> lb = ti.oneside.lognormal(x, 0.1, 0.95)
Estimate the 90th percentile upper bound with 95% confidence of the
following 100 random samples from a lognormal distribution.
>>> ub = ti.oneside.lognormal(x, 0.9, 0.95)
"""
x = numpy_array(x) # check if numpy array, if not make numpy array
x = assert_2d_sort(x)
return np.exp(normal(np.log(x), p, g))
def non_parametric(x, p, g):
r"""
Compute one-side tolerance bound using traditional non-parametric method.
Computes a tolerance interval for any percentile, confidence level, and
number of samples using the traditional non-parametric method [1] [2].
This assumes that the true distribution is continuous.
Parameters
----------
x : ndarray (1-D, or 2-D)
Numpy array of samples to compute the tolerance bound. Assumed data
type is np.float. Shape of (m, n) is assumed for 2-D arrays with m
number of sets of sample size n.
p : float
Percentile for the TI to estimate.
g : float
Confidence level where g > 0. and g < 1.
Returns
-------
ndarray (1-D)
The non-parametric toleranace interval bound. Returns np.nan if a
non-parametric tolerance interval does not exist for the combination
of percentile, confidence level, and number of samples.
Notes
-----
The non-parametric tolerance inteval only exists for certain combinations
of percentile, confidence level, and number of samples.
References
----------
[1] Hong, L. J., Huang, Z., & Lam, H. (2017). Learning-based robust
optimization: Procedures and statistical guarantees. ArXiv Preprint
ArXiv:1704.04342.
[2] 9.5.5.3 Nonparametric Procedure. (2017). In MMPDS-12 : Metallic
materials properties development and standardization. Battelle
Memorial Institute.
Examples
--------
Estimate the 10th percentile bound with 95% confidence of the
following 300 random samples from a normal distribution.
>>> import numpy as np
>>> import toleranceinterval as ti
>>> x = np.random.random(300)
>>> bound = ti.oneside.non_parametric(x, 0.1, 0.95)
Estimate the 90th percentile bound with 95% confidence of the
following 300 random samples from a normal distribution.
>>> bound = ti.oneside.non_parametric(x, 0.9, 0.95)
"""
x = numpy_array(x) # check if numpy array, if not make numpy array
x = assert_2d_sort(x)
m, n = x.shape
r = np.arange(0, n)
if p < 0.5:
left_tail = True
confidence_index = binom.sf(r, n, p)
else:
left_tail = False
confidence_index = binom.cdf(r, n, p)
boolean_index = confidence_index >= g
if boolean_index.sum() > 0:
if left_tail:
return x[:, np.max(np.where(boolean_index))]
else:
return x[:, np.min(np.where(boolean_index))]
else:
return np.nan*np.ones(m)
def hanson_koopmans(x, p, g, j=-1, method='secant', max_iter=200, tol=1e-5,
step_size=1e-4):
r"""
Compute left tail probabilities using the HansonKoopmans method [1].
Runs the HansonKoopmans solver object to find the left tail bound for any
percentile, confidence level, and number of samples. This assumes the
lowest value is the first order statistic, but you can specify the index
of the second order statistic as j.
Parameters
----------
x : ndarray (1-D, or 2-D)
Numpy array of samples to compute the tolerance bound. Assumed data
type is np.float. Shape of (m, n) is assumed for 2-D arrays with m
number of sets of sample size n.
p : float
Percentile for lower limits when p < 0.5 and upper limits when
p >= 0.5.
g : float
Confidence level where g > 0. and g < 1.
j : int, optional
Index of the second value to use for the second order statistic.
Default is the last value j = -1 = n-1 if p < 0.5. If p >= 0.5,
the second index is defined as index=n-j-1, with default j = n-1.
method : string, optional
Which rootfinding method to use to solve for the Hanson-Koopmans
bound. Default is method='secant' which appears to converge
quickly. Other choices include 'newton-raphson' and 'halley'.
max_iter : int, optional
Maximum number of iterations for the root finding method.
tol : float, optional
Tolerance for the root finding method to converge.
step_size : float, optional
Step size for the secant solver. Default step_size = 1e-4.
Returns
-------
ndarray (1-D)
The Hanson-Koopmans toleranace interval bound as np.float with shape m.
Returns np.nan if the rootfinding method did not converge.
Notes
-----
The Hanson-Koopmans bound assumes the true distribution belongs to the
log-concave CDF class of distributions [1].
This implemnation will always extrapolate beyond the lowest sample. If
interpolation is needed within the sample set, this method falls back to
the traditional non-parametric method using non_parametric(x, p, g).
j uses Python style index notation.
References
----------
[1] Hanson, D. L., & Koopmans, L. H. (1964). Tolerance Limits for
the Class of Distributions with Increasing Hazard Rates. Ann. Math.
Statist., 35(4), 1561-1570. https://doi.org/10.1214/aoms/1177700380
Examples
--------
Estimate the 10th percentile with 95% confidence of the following 10
random samples.
>>> import numpy as np
>>> import toleranceinterval as ti
>>> x = np.random.random(10)
>>> bound = ti.oneside.hanson_koopmans(x, 0.1, 0.95)
Estimate the 90th percentile with 95% confidence.
>>> bound = ti.oneside.hanson_koopmans(x, 0.9, 0.95)
"""
x = numpy_array(x) # check if numpy array, if not make numpy array
x = assert_2d_sort(x)
m, n = x.shape
if j == -1:
# Need to use n for the HansonKoopmans solver
j = n - 1
assert j < n
if p < 0.5:
lower = True
myhk = HansonKoopmans(p, g, n, j, method=method, max_iter=max_iter,
tol=tol, step_size=step_size)
else:
lower = False
myhk = HansonKoopmans(1.0-p, g, n, j, method=method, max_iter=max_iter,
tol=tol, step_size=step_size)
if myhk.fall_back:
return non_parametric(x, p, g)
if myhk.un_conv:
return np.nan
else:
b = float(myhk.b)
if lower:
bound = x[:, j] - b*(x[:, j]-x[:, 0])
else:
bound = b*(x[:, n-1] - x[:, n-j-1]) + x[:, n-j-1]
return bound
def hanson_koopmans_cmh(x, p, g, j=-1, method='secant', max_iter=200, tol=1e-5,
step_size=1e-4):
r"""
Compute CMH style tail probabilities using the HansonKoopmans method [1].
Runs the HansonKoopmans solver object to find the left tail bound for any
percentile, confidence level, and number of samples. This assumes the
lowest value is the first order statistic, but you can specify the index
of the second order statistic as j. CMH variant is the Composite Materials
Handbook which calculates the same b, but uses a different order statistic
calculation [2].
Parameters
----------
x : ndarray (1-D, or 2-D)
Numpy array of samples to compute the tolerance bound. Assumed data
type is np.float. Shape of (m, n) is assumed for 2-D arrays with m
number of sets of sample size n.
p : float
Percentile for lower limits when p < 0.5.
g : float
Confidence level where g > 0. and g < 1.
j : int, optional
Index of the second value to use for the second order statistic.
Default is the last value j = -1 = n-1 if p < 0.5. If p >= 0.5,
the second index is defined as index=n-j-1, with default j = n-1.
method : string, optional
Which rootfinding method to use to solve for the Hanson-Koopmans
bound. Default is method='secant' which appears to converge
quickly. Other choices include 'newton-raphson' and 'halley'.
max_iter : int, optional
Maximum number of iterations for the root finding method.
tol : float, optional
Tolerance for the root finding method to converge.
step_size : float, optional
Step size for the secant solver. Default step_size = 1e-4.
Returns
-------
ndarray (1-D)
The Hanson-Koopmans toleranace interval bound as np.float with shape m.
Returns np.nan if the rootfinding method did not converge.
Notes
-----
The Hanson-Koopmans bound assumes the true distribution belongs to the
log-concave CDF class of distributions [1].
This implemnation will always extrapolate beyond the lowest sample. If
interpolation is needed within the sample set, this method falls back to
the traditional non-parametric method using non_parametric(x, p, g).
j uses Python style index notation.
CMH variant estimates lower tails only!
References
----------
[1] Hanson, D. L., & Koopmans, L. H. (1964). Tolerance Limits for
the Class of Distributions with Increasing Hazard Rates. Ann. Math.
Statist., 35(4), 1561-1570. https://doi.org/10.1214/aoms/1177700380
[2] Volume 1: Guidelines for Characterization of Structural Materials.
(2017). In Composite Materials Handbook. SAE International.
Examples
--------
Estimate the 10th percentile with 95% confidence of the following 10
random samples.
>>> import numpy as np
>>> import toleranceinterval as ti
>>> x = np.random.random(10)
>>> bound = ti.oneside.hanson_koopmans_cmh(x, 0.1, 0.95)
Estimate the 1st percentile with 95% confidence.
>>> bound = ti.oneside.hanson_koopmans_cmh(x, 0.01, 0.95)
"""
x = numpy_array(x) # check if numpy array, if not make numpy array
x = assert_2d_sort(x)
m, n = x.shape
if j == -1:
# Need to use n for the HansonKoopmans solver
j = n - 1
assert j < n
if p >= 0.5:
raise ValueError('p must be < 0.5!')
myhk = HansonKoopmans(p, g, n, j, method=method, max_iter=max_iter,
tol=tol, step_size=step_size)
if myhk.fall_back:
return non_parametric(x, p, g)
if myhk.un_conv:
return np.nan
else:
b = float(myhk.b)
bound = x[:, j] * (x[:, 0]/x[:, j])**b
return bound
Functions
def hanson_koopmans(x, p, g, j=-1, method='secant', max_iter=200, tol=1e-05, step_size=0.0001)
-
Compute left tail probabilities using the HansonKoopmans method [1].
Runs the HansonKoopmans solver object to find the left tail bound for any percentile, confidence level, and number of samples. This assumes the lowest value is the first order statistic, but you can specify the index of the second order statistic as j.
Parameters
x
:ndarray (1-D,
or2-D)
- Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n.
p
:float
- Percentile for lower limits when p < 0.5 and upper limits when p >= 0.5.
g
:float
- Confidence level where g > 0. and g < 1.
j
:int
, optional- Index of the second value to use for the second order statistic. Default is the last value j = -1 = n-1 if p < 0.5. If p >= 0.5, the second index is defined as index=n-j-1, with default j = n-1.
method
:string
, optional- Which rootfinding method to use to solve for the Hanson-Koopmans bound. Default is method='secant' which appears to converge quickly. Other choices include 'newton-raphson' and 'halley'.
max_iter
:int
, optional- Maximum number of iterations for the root finding method.
tol
:float
, optional- Tolerance for the root finding method to converge.
step_size
:float
, optional- Step size for the secant solver. Default step_size = 1e-4.
Returns
ndarray (1-D)
- The Hanson-Koopmans toleranace interval bound as np.float with shape m. Returns np.nan if the rootfinding method did not converge.
Notes
The Hanson-Koopmans bound assumes the true distribution belongs to the log-concave CDF class of distributions [1].
This implemnation will always extrapolate beyond the lowest sample. If interpolation is needed within the sample set, this method falls back to the traditional non-parametric method using non_parametric(x, p, g).
j uses Python style index notation.
References
[1] Hanson, D. L., & Koopmans, L. H. (1964). Tolerance Limits for the Class of Distributions with Increasing Hazard Rates. Ann. Math. Statist., 35(4), 1561-1570. https://doi.org/10.1214/aoms/1177700380
Examples
Estimate the 10th percentile with 95% confidence of the following 10 random samples.
>>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(10) >>> bound = ti.oneside.hanson_koopmans(x, 0.1, 0.95)
Estimate the 90th percentile with 95% confidence.
>>> bound = ti.oneside.hanson_koopmans(x, 0.9, 0.95)
Expand source code
def hanson_koopmans(x, p, g, j=-1, method='secant', max_iter=200, tol=1e-5, step_size=1e-4): r""" Compute left tail probabilities using the HansonKoopmans method [1]. Runs the HansonKoopmans solver object to find the left tail bound for any percentile, confidence level, and number of samples. This assumes the lowest value is the first order statistic, but you can specify the index of the second order statistic as j. Parameters ---------- x : ndarray (1-D, or 2-D) Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n. p : float Percentile for lower limits when p < 0.5 and upper limits when p >= 0.5. g : float Confidence level where g > 0. and g < 1. j : int, optional Index of the second value to use for the second order statistic. Default is the last value j = -1 = n-1 if p < 0.5. If p >= 0.5, the second index is defined as index=n-j-1, with default j = n-1. method : string, optional Which rootfinding method to use to solve for the Hanson-Koopmans bound. Default is method='secant' which appears to converge quickly. Other choices include 'newton-raphson' and 'halley'. max_iter : int, optional Maximum number of iterations for the root finding method. tol : float, optional Tolerance for the root finding method to converge. step_size : float, optional Step size for the secant solver. Default step_size = 1e-4. Returns ------- ndarray (1-D) The Hanson-Koopmans toleranace interval bound as np.float with shape m. Returns np.nan if the rootfinding method did not converge. Notes ----- The Hanson-Koopmans bound assumes the true distribution belongs to the log-concave CDF class of distributions [1]. This implemnation will always extrapolate beyond the lowest sample. If interpolation is needed within the sample set, this method falls back to the traditional non-parametric method using non_parametric(x, p, g). j uses Python style index notation. References ---------- [1] Hanson, D. L., & Koopmans, L. H. (1964). Tolerance Limits for the Class of Distributions with Increasing Hazard Rates. Ann. Math. Statist., 35(4), 1561-1570. https://doi.org/10.1214/aoms/1177700380 Examples -------- Estimate the 10th percentile with 95% confidence of the following 10 random samples. >>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(10) >>> bound = ti.oneside.hanson_koopmans(x, 0.1, 0.95) Estimate the 90th percentile with 95% confidence. >>> bound = ti.oneside.hanson_koopmans(x, 0.9, 0.95) """ x = numpy_array(x) # check if numpy array, if not make numpy array x = assert_2d_sort(x) m, n = x.shape if j == -1: # Need to use n for the HansonKoopmans solver j = n - 1 assert j < n if p < 0.5: lower = True myhk = HansonKoopmans(p, g, n, j, method=method, max_iter=max_iter, tol=tol, step_size=step_size) else: lower = False myhk = HansonKoopmans(1.0-p, g, n, j, method=method, max_iter=max_iter, tol=tol, step_size=step_size) if myhk.fall_back: return non_parametric(x, p, g) if myhk.un_conv: return np.nan else: b = float(myhk.b) if lower: bound = x[:, j] - b*(x[:, j]-x[:, 0]) else: bound = b*(x[:, n-1] - x[:, n-j-1]) + x[:, n-j-1] return bound
def hanson_koopmans_cmh(x, p, g, j=-1, method='secant', max_iter=200, tol=1e-05, step_size=0.0001)
-
Compute CMH style tail probabilities using the HansonKoopmans method [1].
Runs the HansonKoopmans solver object to find the left tail bound for any percentile, confidence level, and number of samples. This assumes the lowest value is the first order statistic, but you can specify the index of the second order statistic as j. CMH variant is the Composite Materials Handbook which calculates the same b, but uses a different order statistic calculation [2].
Parameters
x
:ndarray (1-D,
or2-D)
- Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n.
p
:float
- Percentile for lower limits when p < 0.5.
g
:float
- Confidence level where g > 0. and g < 1.
j
:int
, optional- Index of the second value to use for the second order statistic. Default is the last value j = -1 = n-1 if p < 0.5. If p >= 0.5, the second index is defined as index=n-j-1, with default j = n-1.
method
:string
, optional- Which rootfinding method to use to solve for the Hanson-Koopmans bound. Default is method='secant' which appears to converge quickly. Other choices include 'newton-raphson' and 'halley'.
max_iter
:int
, optional- Maximum number of iterations for the root finding method.
tol
:float
, optional- Tolerance for the root finding method to converge.
step_size
:float
, optional- Step size for the secant solver. Default step_size = 1e-4.
Returns
ndarray (1-D)
- The Hanson-Koopmans toleranace interval bound as np.float with shape m. Returns np.nan if the rootfinding method did not converge.
Notes
The Hanson-Koopmans bound assumes the true distribution belongs to the log-concave CDF class of distributions [1].
This implemnation will always extrapolate beyond the lowest sample. If interpolation is needed within the sample set, this method falls back to the traditional non-parametric method using non_parametric(x, p, g).
j uses Python style index notation.
CMH variant estimates lower tails only!
References
[1] Hanson, D. L., & Koopmans, L. H. (1964). Tolerance Limits for the Class of Distributions with Increasing Hazard Rates. Ann. Math. Statist., 35(4), 1561-1570. https://doi.org/10.1214/aoms/1177700380
[2] Volume 1: Guidelines for Characterization of Structural Materials. (2017). In Composite Materials Handbook. SAE International.
Examples
Estimate the 10th percentile with 95% confidence of the following 10 random samples.
>>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(10) >>> bound = ti.oneside.hanson_koopmans_cmh(x, 0.1, 0.95)
Estimate the 1st percentile with 95% confidence.
>>> bound = ti.oneside.hanson_koopmans_cmh(x, 0.01, 0.95)
Expand source code
def hanson_koopmans_cmh(x, p, g, j=-1, method='secant', max_iter=200, tol=1e-5, step_size=1e-4): r""" Compute CMH style tail probabilities using the HansonKoopmans method [1]. Runs the HansonKoopmans solver object to find the left tail bound for any percentile, confidence level, and number of samples. This assumes the lowest value is the first order statistic, but you can specify the index of the second order statistic as j. CMH variant is the Composite Materials Handbook which calculates the same b, but uses a different order statistic calculation [2]. Parameters ---------- x : ndarray (1-D, or 2-D) Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n. p : float Percentile for lower limits when p < 0.5. g : float Confidence level where g > 0. and g < 1. j : int, optional Index of the second value to use for the second order statistic. Default is the last value j = -1 = n-1 if p < 0.5. If p >= 0.5, the second index is defined as index=n-j-1, with default j = n-1. method : string, optional Which rootfinding method to use to solve for the Hanson-Koopmans bound. Default is method='secant' which appears to converge quickly. Other choices include 'newton-raphson' and 'halley'. max_iter : int, optional Maximum number of iterations for the root finding method. tol : float, optional Tolerance for the root finding method to converge. step_size : float, optional Step size for the secant solver. Default step_size = 1e-4. Returns ------- ndarray (1-D) The Hanson-Koopmans toleranace interval bound as np.float with shape m. Returns np.nan if the rootfinding method did not converge. Notes ----- The Hanson-Koopmans bound assumes the true distribution belongs to the log-concave CDF class of distributions [1]. This implemnation will always extrapolate beyond the lowest sample. If interpolation is needed within the sample set, this method falls back to the traditional non-parametric method using non_parametric(x, p, g). j uses Python style index notation. CMH variant estimates lower tails only! References ---------- [1] Hanson, D. L., & Koopmans, L. H. (1964). Tolerance Limits for the Class of Distributions with Increasing Hazard Rates. Ann. Math. Statist., 35(4), 1561-1570. https://doi.org/10.1214/aoms/1177700380 [2] Volume 1: Guidelines for Characterization of Structural Materials. (2017). In Composite Materials Handbook. SAE International. Examples -------- Estimate the 10th percentile with 95% confidence of the following 10 random samples. >>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(10) >>> bound = ti.oneside.hanson_koopmans_cmh(x, 0.1, 0.95) Estimate the 1st percentile with 95% confidence. >>> bound = ti.oneside.hanson_koopmans_cmh(x, 0.01, 0.95) """ x = numpy_array(x) # check if numpy array, if not make numpy array x = assert_2d_sort(x) m, n = x.shape if j == -1: # Need to use n for the HansonKoopmans solver j = n - 1 assert j < n if p >= 0.5: raise ValueError('p must be < 0.5!') myhk = HansonKoopmans(p, g, n, j, method=method, max_iter=max_iter, tol=tol, step_size=step_size) if myhk.fall_back: return non_parametric(x, p, g) if myhk.un_conv: return np.nan else: b = float(myhk.b) bound = x[:, j] * (x[:, 0]/x[:, j])**b return bound
def lognormal(x, p, g)
-
Compute one-side tolerance bound using the lognormal distribution.
Computes the one-sided tolerance interval using the lognormal distribution. This just performs a ln and exp transformations of the normal distribution.
Parameters
x
:ndarray (1-D,
or2-D)
- Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n.
p
:float
- Percentile for the TI to estimate.
g
:float
- Confidence level where g > 0. and g < 1.
Returns
ndarray (1-D)
- The normal distribution toleranace bound.
Examples
Estimate the 10th percentile lower bound with 95% confidence of the following 100 random samples from a lognormal distribution.
>>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(100) >>> lb = ti.oneside.lognormal(x, 0.1, 0.95)
Estimate the 90th percentile upper bound with 95% confidence of the following 100 random samples from a lognormal distribution.
>>> ub = ti.oneside.lognormal(x, 0.9, 0.95)
Expand source code
def lognormal(x, p, g): r""" Compute one-side tolerance bound using the lognormal distribution. Computes the one-sided tolerance interval using the lognormal distribution. This just performs a ln and exp transformations of the normal distribution. Parameters ---------- x : ndarray (1-D, or 2-D) Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n. p : float Percentile for the TI to estimate. g : float Confidence level where g > 0. and g < 1. Returns ------- ndarray (1-D) The normal distribution toleranace bound. Examples -------- Estimate the 10th percentile lower bound with 95% confidence of the following 100 random samples from a lognormal distribution. >>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(100) >>> lb = ti.oneside.lognormal(x, 0.1, 0.95) Estimate the 90th percentile upper bound with 95% confidence of the following 100 random samples from a lognormal distribution. >>> ub = ti.oneside.lognormal(x, 0.9, 0.95) """ x = numpy_array(x) # check if numpy array, if not make numpy array x = assert_2d_sort(x) return np.exp(normal(np.log(x), p, g))
def non_parametric(x, p, g)
-
Compute one-side tolerance bound using traditional non-parametric method.
Computes a tolerance interval for any percentile, confidence level, and number of samples using the traditional non-parametric method [1] [2]. This assumes that the true distribution is continuous.
Parameters
x
:ndarray (1-D,
or2-D)
- Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n.
p
:float
- Percentile for the TI to estimate.
g
:float
- Confidence level where g > 0. and g < 1.
Returns
ndarray (1-D)
- The non-parametric toleranace interval bound. Returns np.nan if a non-parametric tolerance interval does not exist for the combination of percentile, confidence level, and number of samples.
Notes
The non-parametric tolerance inteval only exists for certain combinations of percentile, confidence level, and number of samples.
References
[1] Hong, L. J., Huang, Z., & Lam, H. (2017). Learning-based robust optimization: Procedures and statistical guarantees. ArXiv Preprint ArXiv:1704.04342.
[2] 9.5.5.3 Nonparametric Procedure. (2017). In MMPDS-12 : Metallic materials properties development and standardization. Battelle Memorial Institute.
Examples
Estimate the 10th percentile bound with 95% confidence of the following 300 random samples from a normal distribution.
>>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(300) >>> bound = ti.oneside.non_parametric(x, 0.1, 0.95)
Estimate the 90th percentile bound with 95% confidence of the following 300 random samples from a normal distribution.
>>> bound = ti.oneside.non_parametric(x, 0.9, 0.95)
Expand source code
def non_parametric(x, p, g): r""" Compute one-side tolerance bound using traditional non-parametric method. Computes a tolerance interval for any percentile, confidence level, and number of samples using the traditional non-parametric method [1] [2]. This assumes that the true distribution is continuous. Parameters ---------- x : ndarray (1-D, or 2-D) Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n. p : float Percentile for the TI to estimate. g : float Confidence level where g > 0. and g < 1. Returns ------- ndarray (1-D) The non-parametric toleranace interval bound. Returns np.nan if a non-parametric tolerance interval does not exist for the combination of percentile, confidence level, and number of samples. Notes ----- The non-parametric tolerance inteval only exists for certain combinations of percentile, confidence level, and number of samples. References ---------- [1] Hong, L. J., Huang, Z., & Lam, H. (2017). Learning-based robust optimization: Procedures and statistical guarantees. ArXiv Preprint ArXiv:1704.04342. [2] 9.5.5.3 Nonparametric Procedure. (2017). In MMPDS-12 : Metallic materials properties development and standardization. Battelle Memorial Institute. Examples -------- Estimate the 10th percentile bound with 95% confidence of the following 300 random samples from a normal distribution. >>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.random(300) >>> bound = ti.oneside.non_parametric(x, 0.1, 0.95) Estimate the 90th percentile bound with 95% confidence of the following 300 random samples from a normal distribution. >>> bound = ti.oneside.non_parametric(x, 0.9, 0.95) """ x = numpy_array(x) # check if numpy array, if not make numpy array x = assert_2d_sort(x) m, n = x.shape r = np.arange(0, n) if p < 0.5: left_tail = True confidence_index = binom.sf(r, n, p) else: left_tail = False confidence_index = binom.cdf(r, n, p) boolean_index = confidence_index >= g if boolean_index.sum() > 0: if left_tail: return x[:, np.max(np.where(boolean_index))] else: return x[:, np.min(np.where(boolean_index))] else: return np.nan*np.ones(m)
def normal(x, p, g)
-
Compute one-side tolerance bound using the normal distribution.
Computes the one-sided tolerance interval using the normal distribution. This follows the derivation in [1] to calculate the interval as a factor of sample standard deviations away from the sample mean. See also [2].
Parameters
x
:ndarray (1-D,
or2-D)
- Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n.
p
:float
- Percentile for the TI to estimate.
g
:float
- Confidence level where g > 0. and g < 1.
Returns
ndarray (1-D)
- The normal distribution toleranace bound.
References
[1] Young, D. S. (2010). tolerance: An R Package for Estimating Tolerance Intervals. Journal of Statistical Software; Vol 1, Issue 5 (2010). Retrieved from http://dx.doi.org/10.18637/jss.v036.i05
[2] Montgomery, D. C., & Runger, G. C. (2018). Chapter 8. Statistical Intervals for a Single Sample. In Applied Statistics and Probability for Engineers, 7th Edition.
Examples
Estimate the 10th percentile lower bound with 95% confidence of the following 100 random samples from a normal distribution.
>>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.nomral(100) >>> lb = ti.oneside.normal(x, 0.1, 0.95)
Estimate the 90th percentile upper bound with 95% confidence of the following 100 random samples from a normal distribution.
>>> ub = ti.oneside.normal(x, 0.9, 0.95)
Expand source code
def normal(x, p, g): r""" Compute one-side tolerance bound using the normal distribution. Computes the one-sided tolerance interval using the normal distribution. This follows the derivation in [1] to calculate the interval as a factor of sample standard deviations away from the sample mean. See also [2]. Parameters ---------- x : ndarray (1-D, or 2-D) Numpy array of samples to compute the tolerance bound. Assumed data type is np.float. Shape of (m, n) is assumed for 2-D arrays with m number of sets of sample size n. p : float Percentile for the TI to estimate. g : float Confidence level where g > 0. and g < 1. Returns ------- ndarray (1-D) The normal distribution toleranace bound. References ---------- [1] Young, D. S. (2010). tolerance: An R Package for Estimating Tolerance Intervals. Journal of Statistical Software; Vol 1, Issue 5 (2010). Retrieved from http://dx.doi.org/10.18637/jss.v036.i05 [2] Montgomery, D. C., & Runger, G. C. (2018). Chapter 8. Statistical Intervals for a Single Sample. In Applied Statistics and Probability for Engineers, 7th Edition. Examples -------- Estimate the 10th percentile lower bound with 95% confidence of the following 100 random samples from a normal distribution. >>> import numpy as np >>> import toleranceinterval as ti >>> x = np.random.nomral(100) >>> lb = ti.oneside.normal(x, 0.1, 0.95) Estimate the 90th percentile upper bound with 95% confidence of the following 100 random samples from a normal distribution. >>> ub = ti.oneside.normal(x, 0.9, 0.95) """ x = numpy_array(x) # check if numpy array, if not make numpy array x = assert_2d_sort(x) m, n = x.shape if p < 0.5: p = 1.0 - p minus = True else: minus = False zp = norm.ppf(p) t = nct.ppf(g, df=n-1., nc=np.sqrt(n)*zp) k = t / np.sqrt(n) if minus: return x.mean(axis=1) - (k*x.std(axis=1, ddof=1)) else: return x.mean(axis=1) + (k*x.std(axis=1, ddof=1))