# teg_PCA

Does principal components analysis (PCA) without calculating the covariance matrix, using the NIPALS algorithm. This makes it usable where the normal method would cause an Out of Memory error.

Usage: [L, L2O, O2L, EV, PEx] = teg_PCA(X).

```function [S, L2O, O2L, EV, PEx] = teg_PCA(varargin)

% function [L, L2O, O2L, EV, PEx] = teg_PCA(X, {nCompsMax or expl. var or 'scree'})
%
% Implementation of the NIPALS algorithm.
%
% Input:
%   X: data matrix. Columns are variables, rows are observations.
%   Optional: Stopping criteria:
%       nCompsMax: 1, 2, 3, ... Maximum number of components to extract
%       Expl. var: [0, 1). Proportion of variance to explain.
%       'scree': Stop including components when they explain less than the
%       average amount of variance.
% Output:
%   L: matrix of latent variable scores
%   L2O: matrix of mixing weights: O = L * L2O
%   O2L: matrix of unmixing weights: L = O * O2L
%   EV: matrix of eigenvalues
%   PEx: proportion of explained variance per number of components
%

X = varargin{1};
nCompsTag = -1;
scree = 0;
PEx_req = 1;
if length(varargin) > 1,
par0 = varargin{2};
if strcmp(varargin{2}, 'scree'),
scree = 1;
elseif par0 >= 1,
nCompsTag = varargin{2};
else,
PEx_req = varargin{2};
end;
end;

[nR, nC] = size(X);
for iC = 1:size(X, 2),
mX = mean(X(:, iC));
X(:, iC) = X(:, iC) - mean(X(:, iC));
end;
Full = X;
varX = sum(var(X));
screecrit = varX / min(nR, nC);
S = [];
W = [];
D = [];
AIC = [];
PEx = [];
err0 = [];
eps0 = 0.00000001;

i0 = 1;
error_old = sum(sum(X .^ 2)) / size(X, 1);
err0 = 1;
err00 = error_old;

while 1 == 1,
random_column = 1 + floor(rand * nC);
s = X(:, random_column);
eig_old = s' * s;
step_error = eps0 * eig_old + 1;

fprintf(['Component ' num2str(i0) '\t']);

errstr = '';
errperiod = 0.1;
tic;

it0 = 0;
max_its = min(size(X, 1), size(X, 2));
while step_error > eps0 * eig_old,

if it0 > max_its,
it0 = it0 + 1;
break;
end;

w = (X' * s) / (s' * s);

w = w / sqrt(w' * w);

% Project X onto w: find new score vector
s_new = (X * w) / (w' * w);

% Check for convergence
eig_new = s_new' * s_new;
step_error = abs(eig_old - eig_new);

eig_old = eig_new;
s = s_new;

if toc > errperiod,
tic;
errstr = overwrite_str(errstr, step_error, eig_old, w);
end;
end;
errstr = overwrite_str(errstr, step_error, eig_old, w);
S = [S s];
W = [W; w'];
D(i0, i0) = eig_old / size(X, 1);
for iC = 1:size(X, 2),
v = s * w(iC)';
X(:, iC) = X(:, iC) - v;
end;
dD = diag(D);
PEx = [PEx; sum(dD) / varX];
fprintf(['\t' num2str(PEx(end))]);
fprintf('\n');
% AIC (p. 48)
error_new = 0;
for iC = 1:size(X, 2),
est = S * W(:, iC);
error_new = error_new + sum((Full(:, iC) - est) .^ 2) / length(v);
end;
err0 = [err0; error_new / err00];
error_old = error_new;
% To next iteration, or break
if scree == 1 && D(i0, i0) <= screecrit || isnan(D(i0, i0)),
S = S(:, 1:(end - 1));
W = W(1:(end - 1), :);
D = D(1:(end - 1), 1:(end - 1));
PEx = PEx(1:(end - 1));
fprintf(['Scree crit or NaN eigenvalue hit: removing last component.\n']);
break;
end;
if i0 == nCompsTag + 1 || i0 >= min(size(X, 1), size(X, 2)) || PEx(end) >= PEx_req,
break;
end;
i0 = i0 + 1;
end;

L2O = W;
O2L = L2O' * inv(L2O * L2O');
EV = diag(D);

function errstr = overwrite_str(errstr, step_error, eig_old, w)
for si = 1:length(errstr),
fprintf('\b');
end;
errstr = num2str([eig_old step_error / eig_old]);
fprintf(errstr);
```