'''
Some helper functions for diPLSlib
'''
import numpy as np
from scipy.stats import norm
from scipy.stats import f
from math import exp, sqrt
from scipy.special import erf
[docs]
def gengaus(length, mu, sigma, mag, noise=0):
"""
Generate a Gaussian spectrum-like signal with optional random noise.
Parameters
----------
length : int
Length of the generated signal.
mu : float
Mean of the Gaussian function.
sigma : float
Standard deviation of the Gaussian function.
mag : float
Magnitude of the Gaussian signal.
noise : float, optional (default=0)
Standard deviation of the Gaussian noise to be added to the signal.
Returns
-------
signal : ndarray of shape (length,)
The generated Gaussian signal with noise.
Examples
--------
>>> from diPLSlib.utils.misc import gengaus
>>> import numpy as np
>>> import scipy.stats
>>> signal = gengaus(100, 50, 10, 5, noise=0.1)
"""
s = mag*norm.pdf(np.arange(length),mu,sigma)
n = noise*np.random.rand(length)
signal = s + n
return signal
[docs]
def hellipse(X, alpha=0.05):
"""
Compute the 95% confidence interval ellipse for a 2D scatter plot.
Parameters
----------
X : ndarray of shape (n_samples, 2)
Matrix of data points.
alpha : float, optional (default=0.05)
Significance level for the confidence interval.
Returns
-------
el : ndarray of shape (2, 100)
Coordinates of the ellipse's points. To plot, use `plt.plot(el[0, :], el[1, :])`.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> from diPLSlib.utils.misc import hellipse
>>> X = np.random.random((100, 2))
>>> el = hellipse(X)
>>> plt.scatter(X[:,0], X[:,1], label='Data points') # doctest: +ELLIPSIS
<matplotlib.collections.PathCollection object at ...>
>>> plt.plot(el[0,:], el[1,:], label='95% Confidence Ellipse') # doctest: +ELLIPSIS
[<matplotlib.lines.Line2D object at ...>]
>>> plt.legend() # doctest: +ELLIPSIS
<matplotlib.legend.Legend object at ...>
"""
# Means
mean_all = np.zeros((2,1))
mean_all[0] = np.mean(X[:,0])
mean_all[1] = np.mean(X[:,1])
# Covariance matrix
X = X[:,:2]
comat_all = np.cov(np.transpose(X))
# SVD
U,S,V = np.linalg.svd(comat_all)
# Confidence limit computed as the 95% quantile of the F-Distribution
N = np.shape(X)[0]
quant = 1 - alpha
Conf = (2*(N-2))/(N-2)*f.ppf(quant,2,(N-2))
# Evalute CI on (0,2pi)
el = np.zeros((2,100))
t = np.linspace(0,2*np.pi,100)
for j in np.arange(100):
sT = np.matmul(U,np.diag(np.sqrt(S*Conf)))
el[:,j] = np.transpose(mean_all)+np.matmul(sT,np.array([np.cos(t[j]),np.sin(t[j])]))
return el
[docs]
def rmse(y, yhat):
"""
Compute the Root Mean Squared Error (RMSE) between two arrays.
Parameters
----------
y : ndarray of shape (n_samples,)
True values.
yhat : ndarray of shape (n_samples,)
Predicted values.
Returns
-------
error : ndarray of shape (n_samples,)
The RMSE between `y` and `yhat`.
Examples
--------
>>> import numpy as np
>>> from diPLSlib.utils.misc import rmse
>>> x = np.array([1, 2, 3])
>>> y = np.array([2, 3, 4])
>>> error = rmse(x, y)
>>> print(error)
1.0
"""
return np.sqrt(((y.ravel()-yhat.ravel())**2).mean())
[docs]
def calibrateAnalyticGaussianMechanism(epsilon, delta, GS, tol = 1.e-12):
"""
Calibrate a Gaussian perturbation for differential privacy using the analytic Gaussian mechanism of [Balle and Wang, ICML'18]
Parameters
----------
epsilon : float
Privacy parameter epsilon
delta : float
Desired privacy failure probability
GS : float
Upper bound on the L2-sensitivity of the function to which the mechanism is applied
tol : float
Error tolerance for binary search
Returns
-------
sigma : float
Standard deviation of Gaussian noise needed to achieve (epsilon,delta)-DP under global sensitivity GS
References
----------
- Balle, B., & Wang, Y. X. (2018, July). Improving the gaussian mechanism for differential privacy: Analytical calibration and optimal denoising. In International Conference on Machine Learning (pp. 394-403). PMLR.
Examples
--------
>>> from diPLSlib.utils.misc import calibrateAnalyticGaussianMechanism
>>> calibrateAnalyticGaussianMechanism(1.0, 1e-5, 1.0)
3.730631634944469
"""
def Phi(t):
return 0.5*(1.0 + erf(float(t)/sqrt(2.0)))
def caseA(epsilon,s):
return Phi(sqrt(epsilon*s)) - exp(epsilon)*Phi(-sqrt(epsilon*(s+2.0)))
def caseB(epsilon,s):
return Phi(-sqrt(epsilon*s)) - exp(epsilon)*Phi(-sqrt(epsilon*(s+2.0)))
def doubling_trick(predicate_stop, s_inf, s_sup):
while(not predicate_stop(s_sup)):
s_inf = s_sup
s_sup = 2.0*s_inf
return s_inf, s_sup
def binary_search(predicate_stop, predicate_left, s_inf, s_sup):
s_mid = s_inf + (s_sup-s_inf)/2.0
while(not predicate_stop(s_mid)):
if (predicate_left(s_mid)):
s_sup = s_mid
else:
s_inf = s_mid
s_mid = s_inf + (s_sup-s_inf)/2.0
return s_mid
delta_thr = caseA(epsilon, 0.0)
if (delta == delta_thr):
alpha = 1.0
else:
if (delta > delta_thr):
predicate_stop_DT = lambda s : caseA(epsilon, s) >= delta
function_s_to_delta = lambda s : caseA(epsilon, s)
predicate_left_BS = lambda s : function_s_to_delta(s) > delta
function_s_to_alpha = lambda s : sqrt(1.0 + s/2.0) - sqrt(s/2.0)
else:
predicate_stop_DT = lambda s : caseB(epsilon, s) <= delta
function_s_to_delta = lambda s : caseB(epsilon, s)
predicate_left_BS = lambda s : function_s_to_delta(s) < delta
function_s_to_alpha = lambda s : sqrt(1.0 + s/2.0) + sqrt(s/2.0)
predicate_stop_BS = lambda s : abs(function_s_to_delta(s) - delta) <= tol
s_inf, s_sup = doubling_trick(predicate_stop_DT, 0.0, 1.0)
s_final = binary_search(predicate_stop_BS, predicate_left_BS, s_inf, s_sup)
alpha = function_s_to_alpha(s_final)
sigma = alpha*GS/sqrt(2.0*epsilon)
return sigma