# -*- coding: utf-8 -*-
# Modules
import numpy as np
import scipy.linalg
import scipy.stats
from scipy.linalg import eigh
import scipy.spatial.distance as scd
from scipy.spatial import distance_matrix
from sklearn.metrics import pairwise as kernels
import warnings
warnings.filterwarnings("ignore")
from sklearn.utils.validation import check_array
from diPLSlib.utils.misc import calibrateAnalyticGaussianMechanism
[docs]
def kdapls(x: np.ndarray, y: np.ndarray, xs: np.ndarray, xt,
A: int,
l,
kernel_params: dict = {"type": "rbf", "gamma": 10}):
r'''
Perform Kernel Domain Adaptive Partial Least Squares (kda-PLS) regression.
This method fits a Kernel PLS regression model using labeled source domain data and potentially
unlabeled target domain data. In contrast to di-PLS, kda-PLS aligns the source and target distributions in a RKHS in a non-parametric way, thus making no assumptions about the underlying data distributions.
Mathematically, for each latent variable (LV), kda‐PLS finds a weight vector :math:`\mathbf{w}` (with :math:`\mathbf{w}^T\mathbf{w} = 1`) that maximizes
.. math::
\max_{\mathbf{w} : \mathbf{w}^T\mathbf{w} = 1} \Biggl(
\mathbf{w}^T K(X_s, X_s)^T Y Y^T K(X_s, X_s) \mathbf{w}
- \gamma \mathbf{w}^T K(X_{st}, X_s)^T H L H K(X_{st}, X_s) \mathbf{w}
\Biggr),
where
- :math:`K(X_s, X_s)` is the kernel matrix computed from the source-domain data,
- :math:`K(X_{st}, X_s)` is the kernel matrix computed between the combined source/target data :math:`X_{st} = [X_s; X_t]` and the source-domain data,
- :math:`Y` is the response variable,
- :math:`H` denotes the centering matrix,
- :math:`L` is the Laplacian matrix defined such that :math:`L_{ij}=1` if the i-th and j-th sample in :math:`X_{st}` belong to the same domain and 0 otherwise,
- :math:`\gamma` is the regularization parameter that balances maximizing the covariance between :math:`K(X_s, X_s)` and :math:`Y` with minimizing the domain discrepancy.
Parameters
----------
x : ndarray of shape (n_samples, n_features)
Labeled source domain data.
y : ndarray of shape (n_samples, 1)
Response variable associated with the source domain.
xs : ndarray of shape (n_source_samples, n_features)
Source domain feature data.
xt : ndarray of shape (n_target_samples, n_features) or list of ndarray
Target domain feature data. Multiple domains can be provided as a list.
A : int
Number of latent variables to use in the model.
l : float or tuple of length A
Regularization parameter. If a single value is provided, the same regularization is applied to all latent variables.
kernel_params : dict, default={"type": "rbf", "gamma": 10}
Kernel parameters. The dictionary must contain the following keys:
- "type": str, default="rbf"
Type of kernel to use. Supported types are "rbf", "linear", and "primal".
- "gamma": float, default=10
Kernel coefficient for the RBF kernel.
Returns
-------
b : ndarray of shape (n_features, 1)
Regression coefficient vector.
bst : ndarray of shape (n_features, 1)
Regression coefficient vector for the target domain.
T : ndarray of shape (n_samples, A)
Training data projections (scores).
Tst : ndarray of shape (n_source_samples + n_target_samples, A)
Source and target domain projections (scores).
W : ndarray of shape (m, A)
Weight matrix.
P : ndarray of shape (m, A)
Loadings matrix for source domain.
Pst : ndarray of shape (m, A)
Loadings matrix for source and target domains.
E : ndarray
Residuals for source domain.
Est : ndarray
Residuals for source and target domains.
Ey : ndarray
Residuals of response variable.
C : ndarray of shape (A, q)
Regression vector relating projections to the response variable.
centering : dict
Dictionary containing centering information.
References
----------
1. Huang, G., Chen, X., Li, L., Chen, X., Yuan, L., & Shi, W. (2020). Domain adaptive partial least squares regression. Chemometrics and Intelligent Laboratory Systems, 201, 103986.
Examples
--------
>>> import numpy as np
>>> from diPLSlib.functions import kdapls
>>> x = np.random.random((100, 10))
>>> y = np.random.random((100, 1))
>>> xs = np.random.random((50, 10))
>>> xt = np.random.random((50, 10))
>>> b, bst, T, Tst, W, P, Pst, E, Est, Ey, C, centering = kdapls(x, y, xs, xt, 2, 0.5)
'''
# Input validation
x = check_array(x, dtype=np.float64)
xs = check_array(xs, dtype=np.float64)
if isinstance(xt, list):
xt = [check_array(xti, dtype=np.float64) for xti in xt]
else:
xt = check_array(xt, dtype=np.float64)
y = check_array(y, dtype=np.float64)
# Get dimensions of arrays and initialize matrices
(ns, k) = np.shape(xs)
(n, k) = np.shape(x)
#(nt, k) = np.shape(xt)
#xst = np.vstack((xs,xt))
if isinstance(xt, list):
nt_list = [np.shape(xti)[0] for xti in xt]
nt = sum(nt_list)
xst = np.vstack([xs] + xt)
else:
(nt, k) = np.shape(xt)
xst = np.vstack((xs, xt))
Y = y.copy()
if Y.ndim == 1:
Y = Y.reshape(-1,1).copy()
q = Y.shape[1]
if kernel_params["type"] == "primal":
m = k
else:
m = n
W = np.zeros([m, A])
T = np.zeros([n, A])
Tst = np.zeros([ns+nt, A])
P = np.zeros([m, A])
Pst = np.zeros([m, A])
C = np.zeros([A, q])
# Laplace matrix
J = (1/n)*np.ones((n,n))
H = np.eye(n) - J
Jst = (1/(ns+nt))*np.ones((ns+nt,ns+nt))
Hst = np.eye(ns+nt) - Jst
#L1 = np.ones((ns+nt,1))
#L1[ns:,0] = -1
#L = L1@L1.T
L = np.zeros((ns + nt, ns + nt))
L[:ns, :ns] = 1
if isinstance(xt, list):
start_idx = ns
for nti in nt_list:
L[start_idx:start_idx+nti, start_idx:start_idx+nti] = 1
start_idx += nti
else:
L[ns:, ns:] = 1
# Compute kernel matrices
if kernel_params["type"] == "rbf":
gamma = kernel_params["gamma"]
K = kernels.rbf_kernel(x, x, gamma = gamma)
Kst = kernels.rbf_kernel(xst, x, gamma = gamma)
elif kernel_params["type"] == "linear":
K = x@x.T
Kst = xst@x.T
elif kernel_params["type"] == "primal":
K = x.copy()
Kst = xst.copy()
# Store Centering elements
centering = {}
y_mean_ = Y.mean(axis=0)
# Source domain
centering[0] = {}
centering[0]["n"] = n
centering[0]["K"] = K
centering[0]["y_mean_"] = y_mean_
# Source-target domain
centering[1] = {}
centering[1]["n"] = ns+nt
centering[1]["K"] = Kst
centering[1]["y_mean_"] = y_mean_
# Centering
if kernel_params["type"] == "primal":
K = H@K
Kst = Hst@Kst
else:
K = H@K@H
Kst = Kst - Kst@J - Jst@Kst + Jst@Kst@J
Y = H@Y
# Compute LVs
for i in range(A):
if isinstance(l, tuple) and len(l) == A: # Separate regularization params for each LV
lA = l[i]
elif isinstance(l, (float, int, np.int64)): # The same regularization param for each LV
lA = l
else:
raise ValueError("The regularization parameter must be either a single value or an A-tuple.")
# Compute domain-invariant weight vector
wM = (K.T@Y@Y.T@K) - lA*(Kst.T@L@Kst)
wd , wm = eigh(wM)
w = wm[:,-1]
w.shape = (w.shape[0],1)
# Compute scores and normalize
t = K@w
tst = Kst@w
t = t / np.linalg.norm(t)
tst = tst / np.linalg.norm(tst)
# Compute loadings
p = K.T@t
pst = Kst.T@tst
# Regress y on t
c = t.T@Y
# Store w,t,p,c
W[:, i] = w.reshape(m)
T[:, i] = t.reshape(n)
Tst[:, i] = tst.reshape(ns+nt)
P[:, i] = p.reshape(m)
Pst[:, i] = pst.reshape(m)
C[i] = c.reshape(q)
# Deflation
K = K - t@p.T
Kst = Kst - tst@pst.T
Y = Y - (t@c)
# Calculate regression vector
b = W@(np.linalg.inv(P.T@W))@C
bst = W@(np.linalg.inv(Pst.T@W))@C
# Residuals
E = K
Est = Kst
Ey = Y
return b, bst, T, Tst, W, P, Pst, E, Est, Ey, C, centering
[docs]
def dipals(x, y, xs, xt, A, l, heuristic: bool = False, target_domain=0, laplacian: bool = False):
"""
Perform (Multiple) Domain-Invariant Partial Least Squares (di-PLS) regression.
This method fits a PLS regression model using labeled source domain data and potentially
unlabeled target domain data across multiple domains, aiming to build a model that
generalizes well across different domains.
Parameters
----------
x : ndarray of shape (n_samples, n_features)
Labeled source domain data.
y : ndarray of shape (n_samples, 1)
Response variable associated with the source domain.
xs : ndarray of shape (n_source_samples, n_features)
Source domain feature data.
xt : ndarray of shape (n_target_samples, n_features) or list of ndarray
Target domain feature data. Multiple domains can be provided as a list.
A : int
Number of latent variables to use in the model.
l : float or tuple of length A
Regularization parameter. If a single value is provided, the same regularization is applied to all latent variables.
heuristic : bool, default=False
If True, automatically determine the regularization parameter to equally balance fitting Y and minimizing domain discrepancy.
target_domain : int, default=0
Specifies which target domain the model should apply to, where 0 indicates the source domain.
laplacian : bool, default=False
If True, uses a Laplacian matrix to regularize distances between matched calibration transfer
samples in latent variable space.
Returns
-------
b : ndarray of shape (n_features, 1)
Regression coefficient vector.
T : ndarray of shape (n_samples, A)
Training data projections (scores).
Ts : ndarray of shape (n_source_samples, A)
Source domain projections (scores).
Tt : ndarray of shape (n_target_samples, A) or list of ndarray
Target domain projections (scores).
W : ndarray of shape (n_features, A)
Weight matrix.
P : ndarray of shape (n_features, A)
Loadings matrix corresponding to x.
Ps : ndarray of shape (n_features, A)
Loadings matrix corresponding to xs.
Pt : ndarray of shape (n_features, A) or list of ndarray
Loadings matrix corresponding to xt.
E : ndarray
Residuals of training data.
Es : ndarray
Source domain residual matrix.
Et : ndarray or list of ndarray
Target domain residual matrix.
Ey : ndarray
Residuals of response variable in the source domain.
C : ndarray of shape (A, 1)
Regression vector relating source projections to the response variable.
opt_l : ndarray of shape (A,)
Heuristically determined regularization parameter for each latent variable.
discrepancy : ndarray of shape (A,)
The variance discrepancy between source and target domain projections.
References
----------
1. Ramin Nikzad-Langerodi et al., "Domain-Invariant Partial Least Squares Regression", Analytical Chemistry, 2018.
2. Ramin Nikzad-Langerodi et al., "Domain-Invariant Regression under Beer-Lambert's Law", Proc. ICMLA, 2019.
3. Ramin Nikzad-Langerodi et al., "Domain adaptation for regression under Beer–Lambert’s law", Knowledge-Based Systems, 2020.
4. B. Mikulasek et al., "Partial least squares regression with multiple domains", Journal of Chemometrics, 2023.
Examples
--------
>>> import numpy as np
>>> from diPLSlib.functions import dipals
>>> x = np.random.random((100, 10))
>>> y = np.random.random((100, 1))
>>> xs = np.random.random((50, 10))
>>> xt = np.random.random((50, 10))
>>> b, T, Ts, Tt, W, P, Ps, Pt, E, Es, Et, Ey, C, opt_l, discrepancy = dipals(x, y, xs, xt, 2, 0.1)
"""
# Get array dimensions
(n, k) = np.shape(x)
(ns, k) = np.shape(xs)
# Initialize matrices
Xt = xt
if(type(xt) is list):
Pt = []
Tt = []
for z in range(len(xt)):
Tti = np.zeros([np.shape(xt[z])[0], A])
Pti = np.zeros([k, A])
Pt.append(Pti)
Tt.append(Tti)
else:
(nt, k) = np.shape(xt)
Tt = np.zeros([nt, A])
Pt = np.zeros([k, A])
T = np.zeros([n, A])
P = np.zeros([k, A])
Ts = np.zeros([ns, A])
Ps = np.zeros([k, A])
W = np.zeros([k, A])
C = np.zeros([A, 1])
opt_l = np.zeros(A)
discrepancy = np.zeros(A)
I = np.eye(k)
# Compute LVs
for i in range(A):
if isinstance(l, tuple) and len(l) == A: # Separate regularization params for each LV
lA = l[i]
elif isinstance(l, (float, int, np.int64)): # The same regularization param for each LV
lA = l
else:
raise ValueError("The regularization parameter must be either a single value or an A-tuple.")
# Compute Domain-Invariant Weight Vector
w_pls = ((y.T@x)/(y.T@y)) # Ordinary PLS solution
if(lA != 0 or heuristic is True): # In case of regularization
if(type(xt) is not list):
# Convex relaxation of covariance difference matrix
D = convex_relaxation(xs, xt)
# Multiple target domains
elif(type(xt) is list):
#print('Relaxing domains ... ')
ndoms = len(xt)
D = np.zeros([k, k])
for z in range(ndoms):
d = convex_relaxation(xs, xt[z])
D = D + d
elif(laplacian is True):
J = np.vstack([xs, xt])
L = transfer_laplacian(xs, xt)
D = J.T@L@J
else:
print('xt must either be a matrix or list of (appropriately dimensioned) matrices')
if(heuristic is True): # Regularization parameter heuristic
w_pls = w_pls/np.linalg.norm(w_pls)
gamma = (np.linalg.norm((x-y@w_pls))**2)/(w_pls@D@w_pls.T)
opt_l[i] = gamma
lA = gamma
reg = I+lA/((y.T@y))*D
w = scipy.linalg.solve(reg.T, w_pls.T, assume_a='sym').T # 10 times faster than previous comptation of reg
# Normalize w
w = w/np.linalg.norm(w)
# Absolute difference between variance of source and target domain projections
discrepancy[i] = (w @ D @ w.T).item()
else:
if(type(xt) is list):
D = convex_relaxation(xs, xt[0])
else:
D = convex_relaxation(xs, xt)
w = w_pls/np.linalg.norm(w_pls)
discrepancy[i] = (w @ D @ w.T).item()
# Compute scores
t = x@w.T
ts = xs@w.T
if(type(xt) is list):
tt = []
for z in range(len(xt)):
tti = xt[z]@w.T
tt.append(tti)
else:
tt = xt@w.T
# Regress y on t
c = (y.T@t)/(t.T@t)
# Compute loadings
p = (t.T@x)/(t.T@t)
ps = (ts.T@xs)/(ts.T@ts)
if(type(xt) is list):
pt = []
for z in range(len(xt)):
pti = (tt[z].T@xt[z])/(tt[z].T@tt[z])
pt.append(pti)
else:
pt = (tt.T@xt)/(tt.T@tt)
# Deflate X and y (Gram-Schmidt orthogonalization)
x = x - t@p
if laplacian is False: # Calibration transfer case
xs = xs - ts@ps
if(type(xt) is list):
for z in range(len(xt)):
xt[z] = xt[z] - tt[z]@pt[z]
else:
if(np.sum(xt) != 0): # Deflate target matrix only if not zero
if laplacian is False: # Calibration transfer case
xt = xt - tt@pt
y = y - t*c
# Store w,t,p,c
W[:, i] = w
T[:, i] = t.reshape(n)
Ts[:, i] = ts.reshape(ns)
P[:, i] = p.reshape(k)
Ps[:, i] = ps.reshape(k)
C[i] = c
if(type(xt) is list):
for z in range(len(xt)):
Pt[z][:, i] = pt[z].reshape(k)
Tt[z][:, i] = tt[z].reshape(np.shape(xt[z])[0])
else:
Pt[:, i] = pt.reshape(k)
Tt[:, i] = tt.reshape(nt)
# Calculate regression vector
if laplacian is True: # Calibration transfer case
b = W@(np.linalg.inv(P.T@W))@C
else:
if isinstance(l, tuple): # Check if multiple regularization # parameters are passed (one for each LV)
if target_domain==0: # Multiple target domains (Domain unknown)
b = W@(np.linalg.inv(P.T@W))@C
elif type(xt) is np.ndarray: # Single target domain
b = W@(np.linalg.inv(Pt.T@W))@C
elif type(xt) is list: # Multiple target domains (Domain known)
b = W@(np.linalg.inv(Pt[target_domain-1].T@W))@C
else:
b = W@(np.linalg.inv(P.T@W))@C
# Store residuals
E = x
Es = xs
Et = xt
Ey = y
return b, T, Ts, Tt, W, P, Ps, Pt, E, Es, Et, Ey, C, opt_l, discrepancy
[docs]
def convex_relaxation(xs, xt):
"""
Perform convex relaxation of the covariance difference matrix.
This relaxation involves computing the eigenvalue decomposition of the symmetric covariance
difference matrix, inverting the signs of negative eigenvalues, and reconstructing the matrix.
This corresponds to an upper bound on the covariance difference between source and target domains.
Parameters
----------
xs : ndarray of shape (n_source_samples, n_features)
Feature data from the source domain.
xt : ndarray of shape (n_target_samples, n_features)
Feature data from the target domain.
Returns
-------
D : ndarray of shape (n_features, n_features)
Relaxed covariance difference matrix.
References
----------
Ramin Nikzad-Langerodi et al., "Domain-Invariant Regression under Beer-Lambert's Law", Proc. ICMLA, 2019.
Examples
--------
>>> import numpy as np
>>> from diPLSlib.functions import convex_relaxation
>>> xs = np.random.random((100, 10))
>>> xt = np.random.random((100, 10))
>>> D = convex_relaxation(xs, xt)
"""
# Ensure input arrays are numerical
xs = np.asarray(xs, dtype=np.float64)
xt = np.asarray(xt, dtype=np.float64)
# Check for NaN or infinite values
if not np.all(np.isfinite(xs)) or not np.all(np.isfinite(xt)):
raise ValueError("Input arrays must not contain NaN or infinite values. one sample.")
# Check for complex data
if np.iscomplexobj(xs) or np.iscomplexobj(xt):
raise ValueError("Complex data not supported.")
# Preliminaries
ns = np.shape(xs)[0]
nt = np.shape(xt)[0]
x = np.vstack([xs, xt])
x = x[..., :] - np.mean(x, 0)
# Compute difference between source and target covariance matrices
rot = (1/ns*xs.T@xs- 1/nt*xt.T@xt)
# Convex Relaxation
w,v = eigh(rot)
eigs = np.abs(w)
eigs = np.diag(eigs)
D = v@eigs@v.T
return D
[docs]
def transfer_laplacian(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""
Construct a Laplacian matrix for calibration transfer problems.
Parameters
----------
x : ndarray of shape (n_samples, n_features)
Data samples from device 1.
y : ndarray of shape (n_samples, n_features)
Data samples from device 2.
Returns
-------
L : ndarray of shape (2 * n_samples, 2 * n_samples)
The Laplacian matrix for the calibration transfer problem.
References
----------
Nikzad‐Langerodi, R., & Sobieczky, F. (2021). Graph‐based calibration transfer.
Journal of Chemometrics, 35(4), e3319.
Examples
--------
>>> import numpy as np
>>> from diPLSlib.functions import transfer_laplacian
>>> x = np.array([[1, 2], [3, 4]])
>>> y = np.array([[2, 3], [4, 5]])
>>> L = transfer_laplacian(x, y)
>>> print(L)
[[ 1. 0. -1. -0.]
[ 0. 1. -0. -1.]
[-1. -0. 1. 0.]
[-0. -1. 0. 1.]]
"""
(n, p) = np.shape(x)
I = np.eye(n)
L = np.vstack([np.hstack([I,-I]),np.hstack([-I,I])])
return L
[docs]
def edpls(x: np.ndarray, y: np.ndarray, n_components: int, epsilon: float, delta: float = 0.05, rng=None):
r'''
(\epsilon, \delta)-Differentially Private Partial Least Squares Regression.
A Gaussian mechanism according to Balle & Wang (2018) is used to privately release weights :math:`\mathbf{W}`, scores :math:`\mathbf{T}`,
and :math:`X/Y`-loadings :math:`\mathbf{P}`/:math:`\mathbf{c}` from the PLS1 algorithm. For each latent variable, i.i.d. noise from
:math:`\mathcal{N}(0,\sigma^2)` with variance satisfying
.. math::
\Phi\left( \frac{\Delta}{2\sigma} - \frac{\epsilon\sigma}{\Delta} \right) - e^{\epsilon} \Phi\left( -\frac{\Delta}{2\sigma} - \frac{\epsilon\sigma}{\Delta} \right)\leq \delta,
with :math:`\Phi(t) = \mathrm{P}[\mathcal{N}(0,1)\leq t]` (i.e., the CDF of the standard univariate Gaussian distribution), is added to the weights, scores, and loadings, whereas the sensitivity :math:`\Delta(\cdot)` for the functions releasing the corresponding quantities is calculated as follows:
.. math::
\Delta(w) = \sup_{(\mathbf{x}, y)} |y| \|\mathbf{x}\|_2
.. math::
\Delta(t) \leq \sup_{\mathbf{x}} \|\mathbf{x}\|_2
.. math::
\Delta(p) \leq \sup_{\mathbf{x}} \|\mathbf{x}\|_2
.. math::
\Delta(c) \leq \sup_{y} |y|.
Note that in contrast to the Gaussian mechanism proposed in Dwork et al. (2006) and Dwork et al. (2014), the mechanism of Balle & Wang (2018) guarantees :math:`(\epsilon, \delta)`-differential privacy
for any value of :math:`\epsilon > 0` and not only for :math:`\epsilon \leq 1`.
Parameters
----------
x : ndarray of shape (n_samples, n_features)
Input data.
y : ndarray of shape (n_samples, n_targets)
Target values.
n_components : int
Number of latent variables.
epsilon : float
Privacy loss parameter.
delta : float, default=0.05
Failure probability.
rng : numpy.random.Generator, optional
Random number generator.
Returns
-------
coef_ : ndarray of shape (n_features, n_targets)
Regression coefficients.
x_weights_ : ndarray of shape (n_features, n_components)
X weights.
x_loadings_ : ndarray of shape (n_features, n_components)
X loadings.
y_loadings_ : ndarray of shape (n_components, n_targets)
Y loadings.
x_scores_ : ndarray of shape (n_samples, n_components)
X scores.
x_residuals_ : ndarray of shape (n_samples, n_features)
X residuals.
y_residuals_ : ndarray of shape (n_samples, n_targets)
Y residuals.
References
----------
- R. Nikzad-Langerodi, et al. (2024). (epsilon,delta)-Differentially private partial least squares regression (unpublished).
- Balle, B., & Wang, Y. X. (2018, July). Improving the Gaussian mechanism for differential privacy: Analytical calibration and optimal denoising. In International Conference on Machine Learning (pp. 394-403). PMLR.
Examples
--------
>>> from diPLSlib.functions import edpls
>>> import numpy as np
>>> x = np.random.rand(100, 10)
>>> y = np.random.rand(100, 1)
>>> coef_, x_weights_, x_loadings_, y_loadings_, x_scores_, x_residuals_, y_residuals_ = edpls(x, y, 2, epsilon=0.1, delta=0.05)
'''
# Input validation
x = check_array(x, dtype=np.float64)
y = check_array(y, dtype=np.float64)
# Get dimensions of arrays
(n_, n_features_) = np.shape(x)
I = np.eye(n_features_)
# Weights
x_weights_ = np.zeros([n_features_, n_components])
# X Scores
x_scores_ = np.zeros([n_, n_components])
# X Loadings
x_loadings_ = np.zeros([n_features_, n_components])
# Y Loadings
y_loadings_ = np.zeros([n_components, 1])
# Iterate over the number of components
for i in range(n_components):
# Compute weights w
w_pls = ((y.T@x)/(y.T@y))
# Normalize w (noise-less)
wo = w_pls / np.linalg.norm(w_pls)
# Compute x scores and normalize (noise-less)
to = x @ wo.T
to = to / np.linalg.norm(to)
# Compute x scores and normalize (before adding noise)
t = x @ wo.T
t = to / np.linalg.norm(to)
# Add noise to w
x_min = x.min(axis=0)
x_max = x.max(axis=0)
y_min = y.min(axis=0)
y_max = y.max(axis=0)
x_norm = np.linalg.norm(x_max - x_min)
y_norm = y_max - y_min
x_max_norm = np.linalg.norm(x, axis=1).max()
sensitivity = x_max_norm*y_max
R = calibrateAnalyticGaussianMechanism(epsilon, delta, sensitivity)**2
if rng is None:
v = np.random.normal(0, R, n_features_)
else:
v = rng.normal(0, R, n_features_)
w = wo + v
# Normalize w (after adding noise)
w = w / np.linalg.norm(w)
# Add noise to t
sensitivity = x_max_norm
R = calibrateAnalyticGaussianMechanism(epsilon, delta, sensitivity)**2
if rng is None:
v = np.random.normal(0, R, n_)
else:
v = rng.normal(0, R, n_)
t = t + v.reshape(n_,1)
# Normalize t (after adding noise)
t = t / np.linalg.norm(t)
# Compute x loadings (noise-less)
po = (to.T@x)/(to.T@to)
# Compute x loadings (before adding noise)
p = (to.T@x)/(to.T@to)
# Add noise
#sensitivity = 2*x_max_norm
sensitivity = x_max_norm
R = calibrateAnalyticGaussianMechanism(epsilon, delta, sensitivity)**2
if rng is None:
v = np.random.normal(0, R, n_features_)
else:
v = rng.normal(0, R, n_features_)
p = p + v
# Compute y loadings (noise-less)
co = (y.T@to)/(to.T@to)
# Compute y loadings (before adding noise)
c = (y.T@to)/(to.T@to)
# Add noise
sensitivity = y_max
R = calibrateAnalyticGaussianMechanism(epsilon, delta, sensitivity)**2
if rng is None:
v = np.random.normal(0, R, 1)
else:
v = rng.normal(0, R, 1)
c = c + v
# Store weights, scores and loadings
x_weights_[:, i] = w
x_scores_[:, i] = t.reshape(n_)
x_loadings_[:, i] = p.reshape(n_features_)
y_loadings_[i] = c
# Deflate x and y
x = x - to @ po
y = y - to * co
# Compute regression coefficients
coef_ = x_weights_@(np.linalg.inv(x_loadings_.T@x_weights_))@y_loadings_
# Compute residuals
x_residuals_ = x
y_residuals_ = y
return (coef_, x_weights_, x_loadings_, y_loadings_, x_scores_, x_residuals_, y_residuals_ )