KESN

KESN - Keep 'Em Separated, arbitrary Number of groups - tries to find the intuitively optimal partitioning of a set of values into subsets of values. The criterion is the product of the number of elements in each partition, times the ratio of the mean distance between elements of different partitions and the same partition. Thus "wasteful" partitioning is punished, while relative separation is encouraged. For instance, KESN with maxNGroups = 3 separates the sorted vector of [1 2 3 2 1 6 5 7 4 8 7 6 7 10 9] into partitions with indices iMin <= 5, 5 < i2 <= 10, 10 <= iMax. KESN returns the partioned elements in the cells of groups. bestPartition specifies the cut-off indices in the sorted vector.

Code updated and debugged on 12-10-2007. Note that the same value can occur in different partitions!

function [groups, partitions, crit, bestPartition] = KESN(varargin)

% function [groups, partitions, crit, bestPartition] = KESN(vec)
% function [groups, partitions, crit, bestPartition] = KESN(vec, maxNGroups)
%
% partitions: indices of last elements of partitions.
%
% T. E. Gladwin, 2007

vec = varargin{1};
if length(varargin) > 1,
    maxNGroups = varargin{2};
else,
    maxNGroups = [];
end;
[ord, sortIndices] = sort(vec);

partitions = partition(length(ord), maxNGroups);

for iPart = 1:length(partitions),
    crit(iPart) = calcCrit(ord, partitions{iPart});
    fprintf('%g ', partitions{iPart});
    fprintf('\t %g', crit(iPart))
    fprintf('\n');
end;
maxcrit = crit(1); % loop to get maxcrit with minimum number of partitions
best = 1;
for n = 2:length(crit),
    if crit(n) > maxcrit,
        maxcrit = crit(n);
        best = n;
    end;
end;
bestPartition = partitions{best};
groups = getGroups(vec, bestPartition);

% Plotting
if length(varargin) > 2,
    if varargin{3} == 1,
        figure(1);
        clf;
        subplot(3, 1, 1);
        plot(ord, 'k*');
        yl = get(gca, 'YLim');
        ybuff = (max(vec) - min(vec)) / 20;
        ylim([yl(1) - ybuff yl(2) + ybuff]);
        hold on;
        for n = 1:length(bestPartition),
            plot([bestPartition(n) bestPartition(n)], [yl(:) yl(:)], 'r');
        end;
        xl = get(gca, 'XLim');
        xbuff = length(vec) / 20;
        xlim([xl(1) - xbuff xl(2) + xbuff]);
        set(gca, 'XTick', 1:length(vec));
        subplot(3, 1, 2);
        plot(crit);
        subplot(3, 1, 3);
        hold on;
        symbols = 'ox+*sdv^<>ph';
        for iGroup = 1:length(groups),
            n = mod(iGroup, length(symbols));
            plot(groups{iGroup}, vec(groups{iGroup}), ['k' symbols(n)]);
        end;
        yl = get(gca, 'YLim');
        ybuff = (max(vec) - min(vec)) / 20;
        ylim([yl(1) - ybuff yl(2) + ybuff]);
        xl = get(gca, 'XLim');
        xbuff = length(vec) / 20;
        xlim([xl(1) - xbuff xl(2) + xbuff]);
        set(gca, 'XTick', 1:length(vec));
    end;
end;

function crit = calcCrit(ord, indices)
for n = 1:(length(indices) - 1),
    thisPartition = ord((indices(n) + 1):indices(n + 1));
    if length(thisPartition < 2),
        crit = 0;
    end;
end;
indices = [0 indices length(ord)];
within = [];
withinD = [];
m = [];
N = [];
inD = [];
outD = [];
for n = 1:(length(indices) - 1),
    thisPartition = ord((indices(n) + 1):(indices(n + 1)));
    N(n) = length(thisPartition);
    m(n) = mean(thisPartition);
    within(n) = var(thisPartition);
    
    for x = 1:length(thisPartition),
        in0 = thisPartition(x);
        for y = [1:indices(1) (indices(n + 1) + 1):length(ord)],
            out0 = ord(y);
            sqd0 = (in0 - out0) ^ 2;
            outD = [outD sqd0];
        end;
        for z = 1:length(thisPartition),
            if x == z,
                continue;
            else,
                in1 = thisPartition(z);
                sqd0 = (in0 - in1) ^ 2;
                inD = [inD sqd0];
            end;
        end;
    end;
end;
if mean(inD) == 0,
    crit = 0;
else,
    crit = prod(N - 1) * mean(outD) / mean(inD);
end;

function groups = getGroups(vec, bestPartition)
[ord, ind] = sort(vec);
indices = [0 bestPartition length(ord)];
for n = 1:(length(indices) - 1),
    thisPartition = (indices(n) + 1):indices(n + 1);
    groups{n} = vec(ind(thisPartition));
end;

function partitions = partition(varargin)

% function parts = partition(N)
% function parts = partition(N, nGroups)
% 
% return all partitions (of two to N - 1 equivalence groups) of a set of N
% elements.
%
% The i-th indices contain the last element of the i-th group.

N = varargin{1};
nGroupsVector = [];
if length(varargin) > 1,
    if ~isempty(varargin{2}),
        nGroupsVector = 2:min(varargin{2}, N - 1);
    end;
end;
if isempty(nGroupsVector),
    nGroupsVector = 2:(N - 1);
end;

iPart = 1;
for nGroups = nGroupsVector,
    indices = 1:(nGroups - 1);
    while ~isempty(indices),
        %fprintf('%g ',indices);
        %fprintf('\n');
        partitions{iPart} = indices;
        iPart = iPart + 1;
        [indices] = nextStep(indices, N - 1);
    end;
end;

function indices = nextStep(indices,  N)
indices(end) = indices(end) + 1;
if indices(end) > N,
    if length(indices) == 1,
        indices = [];
    else,
        partial = nextStep(indices(1:(end - 1)), N - 1);
        if ~isempty(partial),
            indices(1:(end - 1)) = partial;
            indices(end) = indices(end - 1) + 1;
        else,
            indices = [];
        end;
    end;
end;