class cv::ml::EM

Overview

The class implements the Expectation Maximization algorithm. More…

#include <ml.hpp>

class EM: public cv::ml::StatModel
{
public:
    // enums

    enum
    {
        DEFAULT_NCLUSTERS =5,
        DEFAULT_MAX_ITERS =100,
    };

    enum
    {
        START_E_STEP    =1,
        START_M_STEP    =2,
        START_AUTO_STEP =0,
    };

    enum Types;

    // methods

    virtual
    int
    getClustersNumber() const = 0;

    virtual
    int
    getCovarianceMatrixType() const = 0;

    virtual
    void
    getCovs(std::vector<Mat>& covs) const = 0;

    virtual
    Mat
    getMeans() const = 0;

    virtual
    TermCriteria
    getTermCriteria() const = 0;

    virtual
    Mat
    getWeights() const = 0;

    virtual
    float
    predict(
        InputArray samples,
        OutputArray results = noArray(),
        int flags = 0
        ) const = 0;

    virtual
    Vec2d
    predict2(
        InputArray sample,
        OutputArray probs
        ) const = 0;

    virtual
    void
    setClustersNumber(int val) = 0;

    virtual
    void
    setCovarianceMatrixType(int val) = 0;

    virtual
    void
    setTermCriteria(const TermCriteria& val) = 0;

    virtual
    bool
    trainE(
        InputArray samples,
        InputArray means0,
        InputArray covs0 = noArray(),
        InputArray weights0 = noArray(),
        OutputArray logLikelihoods = noArray(),
        OutputArray labels = noArray(),
        OutputArray probs = noArray()
        ) = 0;

    virtual
    bool
    trainEM(
        InputArray samples,
        OutputArray logLikelihoods = noArray(),
        OutputArray labels = noArray(),
        OutputArray probs = noArray()
        ) = 0;

    virtual
    bool
    trainM(
        InputArray samples,
        InputArray probs0,
        OutputArray logLikelihoods = noArray(),
        OutputArray labels = noArray(),
        OutputArray probs = noArray()
        ) = 0;

    static
    Ptr<EM>
    create();

    static
    Ptr<EM>
    load(
        const String& filepath,
        const String& nodeName = String()
        );
};

Inherited Members

public:
    // enums

    enum Flags;

    // methods

    virtual
    void
    clear();

    virtual
    bool
    empty() const;

    virtual
    String
    getDefaultName() const;

    virtual
    void
    read(const FileNode& fn);

    virtual
    void
    save(const String& filename) const;

    virtual
    void
    write(FileStorage& fs) const;

    template <typename _Tp>
    static
    Ptr<_Tp>
    load(
        const String& filename,
        const String& objname = String()
        );

    template <typename _Tp>
    static
    Ptr<_Tp>
    loadFromString(
        const String& strModel,
        const String& objname = String()
        );

    template <typename _Tp>
    static
    Ptr<_Tp>
    read(const FileNode& fn);

    virtual
    float
    calcError(
        const Ptr<TrainData>& data,
        bool test,
        OutputArray resp
        ) const;

    virtual
    bool
    empty() const;

    virtual
    int
    getVarCount() const = 0;

    virtual
    bool
    isClassifier() const = 0;

    virtual
    bool
    isTrained() const = 0;

    virtual
    float
    predict(
        InputArray samples,
        OutputArray results = noArray(),
        int flags = 0
        ) const = 0;

    virtual
    bool
    train(
        const Ptr<TrainData>& trainData,
        int flags = 0
        );

    virtual
    bool
    train(
        InputArray samples,
        int layout,
        InputArray responses
        );

    template <typename _Tp>
    static
    Ptr<_Tp>
    train(
        const Ptr<TrainData>& data,
        int flags = 0
        );

protected:
    // methods

    void
    writeFormat(FileStorage& fs) const;

Detailed Documentation

The class implements the Expectation Maximization algorithm.

See also:

Expectation Maximization

Methods

virtual
int
getClustersNumber() const = 0

The number of mixture components in the Gaussian mixture model. Default value of the parameter is EM::DEFAULT_NCLUSTERS =5. Some of EM implementation could determine the optimal number of mixtures within a specified value range, but that is not the case in ML yet.

See also:

setClustersNumber

virtual
int
getCovarianceMatrixType() const = 0

Constraint on covariance matrices which defines type of matrices. See EM::Types.

See also:

setCovarianceMatrixType

virtual
void
getCovs(std::vector<Mat>& covs) const = 0

Returns covariation matrices.

Returns vector of covariation matrices. Number of matrices is the number of gaussian mixtures, each matrix is a square floating-point matrix NxN, where N is the space dimensionality.

virtual
Mat
getMeans() const = 0

Returns the cluster centers (means of the Gaussian mixture)

Returns matrix with the number of rows equal to the number of mixtures and number of columns equal to the space dimensionality.

virtual
TermCriteria
getTermCriteria() const = 0

The termination criteria of the EM algorithm. The EM algorithm can be terminated by the number of iterations termCrit.maxCount (number of M-steps) or when relative change of likelihood logarithm is less than termCrit.epsilon. Default maximum number of iterations is EM::DEFAULT_MAX_ITERS =100.

See also:

setTermCriteria

virtual
Mat
getWeights() const = 0

Returns weights of the mixtures.

Returns vector with the number of elements equal to the number of mixtures.

virtual
float
predict(
    InputArray samples,
    OutputArray results = noArray(),
    int flags = 0
    ) const = 0

Returns posterior probabilities for the provided samples.

Parameters:

samples The input samples, floating-point matrix
results The optional output \(nSamples \times nClusters\) matrix of results. It contains posterior probabilities for each sample from the input
flags This parameter will be ignored
virtual
Vec2d
predict2(
    InputArray sample,
    OutputArray probs
    ) const = 0

Returns a likelihood logarithm value and an index of the most probable mixture component for the given sample.

The method returns a two-element double vector. Zero element is a likelihood logarithm value for the sample. First element is an index of the most probable mixture component for the given sample.

Parameters:

sample A sample for classification. It should be a one-channel matrix of \(1 \times dims\) or \(dims \times 1\) size.
probs Optional output matrix that contains posterior probabilities of each component given the sample. It has \(1 \times nclusters\) size and CV_64FC1 type.
virtual
void
setClustersNumber(int val) = 0

See also:

getClustersNumber

virtual
void
setCovarianceMatrixType(int val) = 0

See also:

getCovarianceMatrixType

virtual
void
setTermCriteria(const TermCriteria& val) = 0

See also:

getTermCriteria

virtual
bool
trainE(
    InputArray samples,
    InputArray means0,
    InputArray covs0 = noArray(),
    InputArray weights0 = noArray(),
    OutputArray logLikelihoods = noArray(),
    OutputArray labels = noArray(),
    OutputArray probs = noArray()
    ) = 0

Estimate the Gaussian mixture parameters from a samples set.

This variation starts with Expectation step. You need to provide initial means \(a_k\) of mixture components. Optionally you can pass initial weights \(\pi_k\) and covariance matrices \(S_k\) of mixture components.

Parameters:

samples Samples from which the Gaussian mixture model will be estimated. It should be a one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type it will be converted to the inner matrix of such type for the further computing.
means0 Initial means \(a_k\) of mixture components. It is a one-channel matrix of \(nclusters \times dims\) size. If the matrix does not have CV_64F type it will be converted to the inner matrix of such type for the further computing.
covs0 The vector of initial covariance matrices \(S_k\) of mixture components. Each of covariance matrices is a one-channel matrix of \(dims \times dims\) size. If the matrices do not have CV_64F type they will be converted to the inner matrices of such type for the further computing.
weights0 Initial weights \(\pi_k\) of mixture components. It should be a one-channel floating-point matrix with \(1 \times nclusters\) or \(nclusters \times 1\) size.
logLikelihoods The optional output matrix that contains a likelihood logarithm value for each sample. It has \(nsamples \times 1\) size and CV_64FC1 type.
labels The optional output “class label” for each sample: \(\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\) (indices of the most probable mixture component for each sample). It has \(nsamples \times 1\) size and CV_32SC1 type.
probs The optional output matrix that contains posterior probabilities of each Gaussian mixture component given the each sample. It has \(nsamples \times nclusters\) size and CV_64FC1 type.
virtual
bool
trainEM(
    InputArray samples,
    OutputArray logLikelihoods = noArray(),
    OutputArray labels = noArray(),
    OutputArray probs = noArray()
    ) = 0

Estimate the Gaussian mixture parameters from a samples set.

This variation starts with Expectation step. Initial values of the model parameters will be estimated by the k-means algorithm.

Unlike many of the ML models, EM is an unsupervised learning algorithm and it does not take responses (class labels or function values) as input. Instead, it computes the Maximum Likelihood Estimate of the Gaussian mixture parameters from an input sample set, stores all the parameters inside the structure: \(p_{i,k}\) in probs, \(a_k\) in means , \(S_k\) in covs[k], \(\pi_k\) in weights , and optionally computes the output “class label” for each sample: \(\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\) (indices of the most probable mixture component for each sample).

The trained model can be used further for prediction, just like any other classifier. The trained model is similar to the NormalBayesClassifier.

Parameters:

samples Samples from which the Gaussian mixture model will be estimated. It should be a one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type it will be converted to the inner matrix of such type for the further computing.
logLikelihoods The optional output matrix that contains a likelihood logarithm value for each sample. It has \(nsamples \times 1\) size and CV_64FC1 type.
labels The optional output “class label” for each sample: \(\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\) (indices of the most probable mixture component for each sample). It has \(nsamples \times 1\) size and CV_32SC1 type.
probs The optional output matrix that contains posterior probabilities of each Gaussian mixture component given the each sample. It has \(nsamples \times nclusters\) size and CV_64FC1 type.
virtual
bool
trainM(
    InputArray samples,
    InputArray probs0,
    OutputArray logLikelihoods = noArray(),
    OutputArray labels = noArray(),
    OutputArray probs = noArray()
    ) = 0

Estimate the Gaussian mixture parameters from a samples set.

This variation starts with Maximization step. You need to provide initial probabilities \(p_{i,k}\) to use this option.

Parameters:

samples Samples from which the Gaussian mixture model will be estimated. It should be a one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type it will be converted to the inner matrix of such type for the further computing.
probs0  
logLikelihoods The optional output matrix that contains a likelihood logarithm value for each sample. It has \(nsamples \times 1\) size and CV_64FC1 type.
labels The optional output “class label” for each sample: \(\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\) (indices of the most probable mixture component for each sample). It has \(nsamples \times 1\) size and CV_32SC1 type.
probs The optional output matrix that contains posterior probabilities of each Gaussian mixture component given the each sample. It has \(nsamples \times nclusters\) size and CV_64FC1 type.
static
Ptr<EM>
create()

Creates empty EM model. The model should be trained then using StatModel::train(traindata, flags) method. Alternatively, you can use one of the EM::train * methods or load it from file using Algorithm::load <EM>(filename).

static
Ptr<EM>
load(
    const String& filepath,
    const String& nodeName = String()
    )

Loads and creates a serialized EM from a file.

Use EM::save to serialize and store an EM to disk. Load the EM from this file again, by calling this function with the path to the file. Optionally specify the node for the file containing the classifier

Parameters:

filepath path to serialized EM
nodeName name of node containing the classifier