package weka.filters.unsupervised.attribute;

PrincipalComponents

属性:

  /** The data to transform analyse/transform. */
protected Instances m_TrainInstances; /** Keep a copy for the class attribute (if set). */
protected Instances m_TrainCopy; /** The header for the transformed data format. */
protected Instances m_TransformedFormat; /** Data has a class set. */
protected boolean m_HasClass; /** Class index. */
protected int m_ClassIndex; /** Number of attributes. */
protected int m_NumAttribs; /** Number of instances. */
protected int m_NumInstances; /** Correlation matrix for the original data. */
protected double[][] m_Correlation; /**
* If true, center (rather than standardize) the data and
* compute PCA from covariance (rather than correlation)
* matrix.
*/
private boolean m_center = false; /** Will hold the unordered linear transformations of the (normalized)
original data. */
protected double[][] m_Eigenvectors; /** Eigenvalues for the corresponding eigenvectors. */
protected double[] m_Eigenvalues = null; /** Sorted eigenvalues. */
protected int[] m_SortedEigens; /** sum of the eigenvalues. */
protected double m_SumOfEigenValues = 0.0; /** Filters for replacing missing values. */
protected ReplaceMissingValues m_ReplaceMissingFilter; /** Filter for turning nominal values into numeric ones. */
protected NominalToBinary m_NominalToBinaryFilter; /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */
protected Remove m_AttributeFilter; /** Filter for standardizing the data */
protected Standardize m_standardizeFilter; /** Filter for centering the data */
protected Center m_centerFilter; /** The number of attributes in the pc transformed data. */
protected int m_OutputNumAtts = -1; /** the amount of varaince to cover in the original data when
retaining the best n PC's. */
protected double m_CoverVariance = 0.95; /** maximum number of attributes in the transformed attribute name. */
protected int m_MaxAttrsInName = 5; /** maximum number of attributes in the transformed data (-1 for all). */
protected int m_MaxAttributes = -1;

计算协方差矩阵或相关系数矩阵

  protected void fillCovariance() throws Exception {    

    if (!m_center) {
fillCorrelation();
return;
} double[] att = new double[m_TrainInstances.numInstances()]; // now center the data by subtracting the mean
m_centerFilter = new Center();
m_centerFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_centerFilter); // now compute the covariance matrix
m_Correlation = new double[m_NumAttribs][m_NumAttribs]; for (int i = 0; i < m_NumAttribs; i++) {
for (int j = 0; j < m_NumAttribs; j++) { double cov = 0;
for (int k = 0; k < m_NumInstances; k++) { if (i == j) {
cov += (m_TrainInstances.instance(k).value(i) *
m_TrainInstances.instance(k).value(i));
} else {
cov += (m_TrainInstances.instance(k).value(i) *
m_TrainInstances.instance(k).value(j));
}
} cov /= (double)(m_TrainInstances.numInstances() - 1);
m_Correlation[i][j] = cov;
m_Correlation[j][i] = cov;
}
}
} /**
* Fill the correlation matrix.
*/
protected void fillCorrelation() throws Exception {
int i;
int j;
int k;
double[] att1;
double[] att2;
double corr; m_Correlation = new double[m_NumAttribs][m_NumAttribs];
att1 = new double [m_NumInstances];
att2 = new double [m_NumInstances]; for (i = 0; i < m_NumAttribs; i++) {
for (j = 0; j < m_NumAttribs; j++) {
for (k = 0; k < m_NumInstances; k++) {
att1[k] = m_TrainInstances.instance(k).value(i);
att2[k] = m_TrainInstances.instance(k).value(j);
}
if (i == j) {
m_Correlation[i][j] = 1.0;
}
else {
corr = Utils.correlation(att1,att2,m_NumInstances);
m_Correlation[i][j] = corr;
m_Correlation[j][i] = corr;
}
}
} // now standardize the input data
m_standardizeFilter = new Standardize();
m_standardizeFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);
}

处理数据

  /**
* Transform an instance in original (unormalized) format.
*
* @param instance an instance in the original (unormalized) format
* @return a transformed instance
* @throws Exception if instance can't be transformed
*/
protected Instance convertInstance(Instance instance) throws Exception {
Instance result;
double[] newVals;
Instance tempInst;
double cumulative;
int i;
int j;
double tempval;
int numAttsLowerBound; newVals = new double[m_OutputNumAtts];
tempInst = (Instance) instance.copy(); m_ReplaceMissingFilter.input(tempInst);
m_ReplaceMissingFilter.batchFinished();
tempInst = m_ReplaceMissingFilter.output(); m_NominalToBinaryFilter.input(tempInst);
m_NominalToBinaryFilter.batchFinished();
tempInst = m_NominalToBinaryFilter.output(); if (m_AttributeFilter != null) {
m_AttributeFilter.input(tempInst);
m_AttributeFilter.batchFinished();
tempInst = m_AttributeFilter.output();
} if (!m_center) {
m_standardizeFilter.input(tempInst);
m_standardizeFilter.batchFinished();
tempInst = m_standardizeFilter.output();
} else {
m_centerFilter.input(tempInst);
m_centerFilter.batchFinished();
tempInst = m_centerFilter.output();
} if (m_HasClass)
newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex()); if (m_MaxAttributes > 0)
numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
else
numAttsLowerBound = 0;
if (numAttsLowerBound < 0)
numAttsLowerBound = 0; cumulative = 0;
for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
tempval = 0.0;
for (j = 0; j < m_NumAttribs; j++)
tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j); newVals[m_NumAttribs - i - 1] = tempval;
cumulative += m_Eigenvalues[m_SortedEigens[i]];
if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
break;
} // create instance
if (instance instanceof SparseInstance)
result = new SparseInstance(instance.weight(), newVals);
else
result = new DenseInstance(instance.weight(), newVals); return result;
} /**
* Initializes the filter with the given input data.
*
* @param instances the data to process
* @throws Exception in case the processing goes wrong
* @see #batchFinished()
*/
protected void setup(Instances instances) throws Exception {
int i;
int j;
Vector<Integer> deleteCols;
int[] todelete;
double[][] v;
Matrix corr;
EigenvalueDecomposition eig;
Matrix V; m_TrainInstances = new Instances(instances); // make a copy of the training data so that we can get the class
// column to append to the transformed data (if necessary)
m_TrainCopy = new Instances(m_TrainInstances, 0); m_ReplaceMissingFilter = new ReplaceMissingValues();
m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter); m_NominalToBinaryFilter = new NominalToBinary();
m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter); // delete any attributes with only one distinct value or are all missing
deleteCols = new Vector<Integer>();
for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
if (m_TrainInstances.numDistinctValues(i) <= 1)
deleteCols.addElement(i);
} if (m_TrainInstances.classIndex() >=0) {
// get rid of the class column
m_HasClass = true;
m_ClassIndex = m_TrainInstances.classIndex();
deleteCols.addElement(new Integer(m_ClassIndex));
} // remove columns from the data if necessary
if (deleteCols.size() > 0) {
m_AttributeFilter = new Remove();
todelete = new int [deleteCols.size()];
for (i = 0; i < deleteCols.size(); i++)
todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();
m_AttributeFilter.setAttributeIndicesArray(todelete);
m_AttributeFilter.setInvertSelection(false);
m_AttributeFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);
} // can evaluator handle the processed data ? e.g., enough attributes?
getCapabilities().testWithFail(m_TrainInstances); m_NumInstances = m_TrainInstances.numInstances();
m_NumAttribs = m_TrainInstances.numAttributes(); //fillCorrelation();
fillCovariance(); // get eigen vectors/values
corr = new Matrix(m_Correlation);
eig = corr.eig();
V = eig.getV();
v = new double[m_NumAttribs][m_NumAttribs];
for (i = 0; i < v.length; i++) {
for (j = 0; j < v[0].length; j++)
v[i][j] = V.get(i, j);
}
m_Eigenvectors = (double[][]) v.clone();
m_Eigenvalues = (double[]) eig.getRealEigenvalues().clone(); // any eigenvalues less than 0 are not worth anything --- change to 0
for (i = 0; i < m_Eigenvalues.length; i++) {
if (m_Eigenvalues[i] < 0)
m_Eigenvalues[i] = 0.0;
}
m_SortedEigens = Utils.sort(m_Eigenvalues);
m_SumOfEigenValues = Utils.sum(m_Eigenvalues); m_TransformedFormat = determineOutputFormat(m_TrainInstances);
setOutputFormat(m_TransformedFormat); m_TrainInstances = null;
}

Weka——PrincipalComponents分析的更多相关文章

  1. Weka关联规则分析

    购物篮分析: Apriori算法: 参数设置: 1.car 如果设为真,则会挖掘类关联规则而不是全局关联规则. 2. classindex 类属性索引.如果设置为-1,最后的属性被当做类属性. 3. ...

  2. Weka算法Clusterers-DBSCAN源代码分析

    假设说世界上仅仅能存在一种基于密度的聚类算法的话.那么它必须是DBSCAN(Density-based spatial clustering of applications with noise).D ...

  3. Weka算法Clusterers-Xmeans源代码分析(一)

    <p></p><p><span style="font-size:18px">上几篇博客都是分析的分类器算法(有监督学习),这次就分 ...

  4. Weka学习之关联规则分析

    步骤: (一) 选择数据源 (二)选择要分析的字段 (三)选择需要的关联规则算法 (四)点击start运行 (五) 分析结果 算法选择: Apriori算法参数含义 1.car:如果设为真,则会挖掘类 ...

  5. Weka算法Classifier-meta-AdaBoostM1源代码分析(一)

    多分类器组合算法简单的来讲经常使用的有voting,bagging和boosting,当中就效果来说Boosting略占优势,而AdaBoostM1算法又相当于Boosting算法的"经典款 ...

  6. Weka算法Classifier-tree-J48源代码分析(一个)基本数据结构和算法

    大约一年,我没有照顾的博客,再次拿起笔不知从何写上,想来想去手从最近使用Weka要正确书写. Weka为一个Java基础上的机器学习工具.上手简单,并提供图形化界面.提供如分类.聚类.频繁项挖掘等工具 ...

  7. 数据挖掘:关联规则的apriori算法在weka的源码分析

    相对于机器学习,关联规则的apriori算法更偏向于数据挖掘. 1) 测试文档中调用weka的关联规则apriori算法,如下 try { File file = new File("F:\ ...

  8. Weka中数据挖掘与机器学习系列之Exploer界面(七)

    不多说,直接上干货! Weka的Explorer(探索者)界面,是Weka的主要图形化用户界面,其全部功能都可通过菜单选择或表单填写进行访问.本博客将详细介绍Weka探索者界面的图形化用户界面.预处理 ...

  9. Weka算法算法翻译(部分)

    目录 Weka算法翻译(部分) 1. 属性选择算法(select attributes) 1.1 属性评估方法 1.2 搜索方法 2. 分类算法 2.1 贝叶斯算法 2.2 Functions 2.3 ...

随机推荐

  1. Sencha Touch 实战开发培训 视频教程 第二期 第六节

    2014.4.18 晚上8:20左右开课. 本节课耗时没有超出一个小时. 本期培训一共八节,前两节免费,后面的课程需要付费才可以观看. 本节内容: 图片展示 利用list展示图片: 扩展Carouse ...

  2. Sencha Touch 实战开发培训 视频教程 第二期 第一节

    经过忙碌的准备,终于在2014.4.7晚上8:10分开课. 本来预定在8点开课的,不过电脑出了点问题,推迟了. 本期培训一共八节,前两节免费,后面的课程需要付费才可以观看. 本节内容: 了解Sench ...

  3. 【CF840D】Destiny 分治(线段树)

    [CF840D]Destiny 题意:给你一个长度为n的序列,q次询问,每次指定l r k,求[l,r]中出现次数$>\frac {r-l+1} k$的所有数中最小的那个数. $n,q\le 3 ...

  4. [工具] 知网(CNKI)文献下载工具

    https://github.com/amyhaber/cnki-downloader 用于免费搜索,下载CNKI上的各类文献资料

  5. zookeeper 安装的三种模式

    Zookeeper安装 zookeeper的安装分为三种模式:单机模式.集群模式和伪集群模式. 单机模式 首先,从Apache官网下载一个Zookeeper稳定版本,本次教程采用的是zookeeper ...

  6. LCA最近公共祖先(least common ancestors)

    #include"stdio.h" #include"string.h" #include"iostream" #include" ...

  7. 使用SQL手动创建数据库并创建一个具有该数据库所有权限的用户

    $ mysql -u adminusername -p Enter password: Welcome to the MySQL monitor. Commands end with ; or \g. ...

  8. Freetds 连接数据库问题

    今天一个项目,需要用到连接SQLSERVER数据库,获取数据,按照以往的做法 ,安装了LNMP,装完之后在安装Freetds,然后在独立添加PHP的MSSQL的模块,./configure  make ...

  9. HDU 4578 - Transformation - [加强版线段树]

    题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4578 Problem Description Yuanfang is puzzled with the ...

  10. laravel5.1接收ajax数据

    前台: $.ajax({ type: 'POST', url: '{!! url('aw/data') !!}', data:{'_token':'<?php echo csrf_token() ...