Source code for diPLSlib.utils.misc

'''
Some helper functions for diPLSlib
'''

import numpy as np
from scipy.stats import norm 
from scipy.stats import f
from math import exp, sqrt
from scipy.special import erf

[docs] def gengaus(length, mu, sigma, mag, noise=0): """ Generate a Gaussian spectrum-like signal with optional random noise. Parameters ---------- length : int Length of the generated signal. mu : float Mean of the Gaussian function. sigma : float Standard deviation of the Gaussian function. mag : float Magnitude of the Gaussian signal. noise : float, optional (default=0) Standard deviation of the Gaussian noise to be added to the signal. Returns ------- signal : ndarray of shape (length,) The generated Gaussian signal with noise. Examples -------- >>> from diPLSlib.utils.misc import gengaus >>> import numpy as np >>> import scipy.stats >>> signal = gengaus(100, 50, 10, 5, noise=0.1) """ s = mag*norm.pdf(np.arange(length),mu,sigma) n = noise*np.random.rand(length) signal = s + n return signal
[docs] def hellipse(X, alpha=0.05): """ Compute the 95% confidence interval ellipse for a 2D scatter plot. Parameters ---------- X : ndarray of shape (n_samples, 2) Matrix of data points. alpha : float, optional (default=0.05) Significance level for the confidence interval. Returns ------- el : ndarray of shape (2, 100) Coordinates of the ellipse's points. To plot, use `plt.plot(el[0, :], el[1, :])`. Examples -------- >>> import matplotlib.pyplot as plt >>> import numpy as np >>> from diPLSlib.utils.misc import hellipse >>> X = np.random.random((100, 2)) >>> el = hellipse(X) >>> plt.scatter(X[:,0], X[:,1], label='Data points') # doctest: +ELLIPSIS <matplotlib.collections.PathCollection object at ...> >>> plt.plot(el[0,:], el[1,:], label='95% Confidence Ellipse') # doctest: +ELLIPSIS [<matplotlib.lines.Line2D object at ...>] >>> plt.legend() # doctest: +ELLIPSIS <matplotlib.legend.Legend object at ...> """ # Means mean_all = np.zeros((2,1)) mean_all[0] = np.mean(X[:,0]) mean_all[1] = np.mean(X[:,1]) # Covariance matrix X = X[:,:2] comat_all = np.cov(np.transpose(X)) # SVD U,S,V = np.linalg.svd(comat_all) # Confidence limit computed as the 95% quantile of the F-Distribution N = np.shape(X)[0] quant = 1 - alpha Conf = (2*(N-2))/(N-2)*f.ppf(quant,2,(N-2)) # Evalute CI on (0,2pi) el = np.zeros((2,100)) t = np.linspace(0,2*np.pi,100) for j in np.arange(100): sT = np.matmul(U,np.diag(np.sqrt(S*Conf))) el[:,j] = np.transpose(mean_all)+np.matmul(sT,np.array([np.cos(t[j]),np.sin(t[j])])) return el
[docs] def rmse(y, yhat): """ Compute the Root Mean Squared Error (RMSE) between two arrays. Parameters ---------- y : ndarray of shape (n_samples,) True values. yhat : ndarray of shape (n_samples,) Predicted values. Returns ------- error : ndarray of shape (n_samples,) The RMSE between `y` and `yhat`. Examples -------- >>> import numpy as np >>> from diPLSlib.utils.misc import rmse >>> x = np.array([1, 2, 3]) >>> y = np.array([2, 3, 4]) >>> error = rmse(x, y) >>> print(error) 1.0 """ return np.sqrt(((y.ravel()-yhat.ravel())**2).mean())
[docs] def calibrateAnalyticGaussianMechanism(epsilon, delta, GS, tol = 1.e-12): """ Calibrate a Gaussian perturbation for differential privacy using the analytic Gaussian mechanism of [Balle and Wang, ICML'18] Parameters ---------- epsilon : float Privacy parameter epsilon delta : float Desired privacy failure probability GS : float Upper bound on the L2-sensitivity of the function to which the mechanism is applied tol : float Error tolerance for binary search Returns ------- sigma : float Standard deviation of Gaussian noise needed to achieve (epsilon,delta)-DP under global sensitivity GS References ---------- - Balle, B., & Wang, Y. X. (2018, July). Improving the gaussian mechanism for differential privacy: Analytical calibration and optimal denoising. In International Conference on Machine Learning (pp. 394-403). PMLR. Examples -------- >>> from diPLSlib.utils.misc import calibrateAnalyticGaussianMechanism >>> calibrateAnalyticGaussianMechanism(1.0, 1e-5, 1.0) 3.730631634944469 """ def Phi(t): return 0.5*(1.0 + erf(float(t)/sqrt(2.0))) def caseA(epsilon,s): return Phi(sqrt(epsilon*s)) - exp(epsilon)*Phi(-sqrt(epsilon*(s+2.0))) def caseB(epsilon,s): return Phi(-sqrt(epsilon*s)) - exp(epsilon)*Phi(-sqrt(epsilon*(s+2.0))) def doubling_trick(predicate_stop, s_inf, s_sup): while(not predicate_stop(s_sup)): s_inf = s_sup s_sup = 2.0*s_inf return s_inf, s_sup def binary_search(predicate_stop, predicate_left, s_inf, s_sup): s_mid = s_inf + (s_sup-s_inf)/2.0 while(not predicate_stop(s_mid)): if (predicate_left(s_mid)): s_sup = s_mid else: s_inf = s_mid s_mid = s_inf + (s_sup-s_inf)/2.0 return s_mid delta_thr = caseA(epsilon, 0.0) if (delta == delta_thr): alpha = 1.0 else: if (delta > delta_thr): predicate_stop_DT = lambda s : caseA(epsilon, s) >= delta function_s_to_delta = lambda s : caseA(epsilon, s) predicate_left_BS = lambda s : function_s_to_delta(s) > delta function_s_to_alpha = lambda s : sqrt(1.0 + s/2.0) - sqrt(s/2.0) else: predicate_stop_DT = lambda s : caseB(epsilon, s) <= delta function_s_to_delta = lambda s : caseB(epsilon, s) predicate_left_BS = lambda s : function_s_to_delta(s) < delta function_s_to_alpha = lambda s : sqrt(1.0 + s/2.0) + sqrt(s/2.0) predicate_stop_BS = lambda s : abs(function_s_to_delta(s) - delta) <= tol s_inf, s_sup = doubling_trick(predicate_stop_DT, 0.0, 1.0) s_final = binary_search(predicate_stop_BS, predicate_left_BS, s_inf, s_sup) alpha = function_s_to_alpha(s_final) sigma = alpha*GS/sqrt(2.0*epsilon) return sigma