Weka——PrincipalComponents分析
package weka.filters.unsupervised.attribute;
PrincipalComponents
属性:
/** The data to transform analyse/transform. */
protected Instances m_TrainInstances; /** Keep a copy for the class attribute (if set). */
protected Instances m_TrainCopy; /** The header for the transformed data format. */
protected Instances m_TransformedFormat; /** Data has a class set. */
protected boolean m_HasClass; /** Class index. */
protected int m_ClassIndex; /** Number of attributes. */
protected int m_NumAttribs; /** Number of instances. */
protected int m_NumInstances; /** Correlation matrix for the original data. */
protected double[][] m_Correlation; /**
* If true, center (rather than standardize) the data and
* compute PCA from covariance (rather than correlation)
* matrix.
*/
private boolean m_center = false; /** Will hold the unordered linear transformations of the (normalized)
original data. */
protected double[][] m_Eigenvectors; /** Eigenvalues for the corresponding eigenvectors. */
protected double[] m_Eigenvalues = null; /** Sorted eigenvalues. */
protected int[] m_SortedEigens; /** sum of the eigenvalues. */
protected double m_SumOfEigenValues = 0.0; /** Filters for replacing missing values. */
protected ReplaceMissingValues m_ReplaceMissingFilter; /** Filter for turning nominal values into numeric ones. */
protected NominalToBinary m_NominalToBinaryFilter; /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */
protected Remove m_AttributeFilter; /** Filter for standardizing the data */
protected Standardize m_standardizeFilter; /** Filter for centering the data */
protected Center m_centerFilter; /** The number of attributes in the pc transformed data. */
protected int m_OutputNumAtts = -1; /** the amount of varaince to cover in the original data when
retaining the best n PC's. */
protected double m_CoverVariance = 0.95; /** maximum number of attributes in the transformed attribute name. */
protected int m_MaxAttrsInName = 5; /** maximum number of attributes in the transformed data (-1 for all). */
protected int m_MaxAttributes = -1;
计算协方差矩阵或相关系数矩阵
protected void fillCovariance() throws Exception { if (!m_center) {
fillCorrelation();
return;
} double[] att = new double[m_TrainInstances.numInstances()]; // now center the data by subtracting the mean
m_centerFilter = new Center();
m_centerFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_centerFilter); // now compute the covariance matrix
m_Correlation = new double[m_NumAttribs][m_NumAttribs]; for (int i = 0; i < m_NumAttribs; i++) {
for (int j = 0; j < m_NumAttribs; j++) { double cov = 0;
for (int k = 0; k < m_NumInstances; k++) { if (i == j) {
cov += (m_TrainInstances.instance(k).value(i) *
m_TrainInstances.instance(k).value(i));
} else {
cov += (m_TrainInstances.instance(k).value(i) *
m_TrainInstances.instance(k).value(j));
}
} cov /= (double)(m_TrainInstances.numInstances() - 1);
m_Correlation[i][j] = cov;
m_Correlation[j][i] = cov;
}
}
} /**
* Fill the correlation matrix.
*/
protected void fillCorrelation() throws Exception {
int i;
int j;
int k;
double[] att1;
double[] att2;
double corr; m_Correlation = new double[m_NumAttribs][m_NumAttribs];
att1 = new double [m_NumInstances];
att2 = new double [m_NumInstances]; for (i = 0; i < m_NumAttribs; i++) {
for (j = 0; j < m_NumAttribs; j++) {
for (k = 0; k < m_NumInstances; k++) {
att1[k] = m_TrainInstances.instance(k).value(i);
att2[k] = m_TrainInstances.instance(k).value(j);
}
if (i == j) {
m_Correlation[i][j] = 1.0;
}
else {
corr = Utils.correlation(att1,att2,m_NumInstances);
m_Correlation[i][j] = corr;
m_Correlation[j][i] = corr;
}
}
} // now standardize the input data
m_standardizeFilter = new Standardize();
m_standardizeFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);
}
处理数据
/**
* Transform an instance in original (unormalized) format.
*
* @param instance an instance in the original (unormalized) format
* @return a transformed instance
* @throws Exception if instance can't be transformed
*/
protected Instance convertInstance(Instance instance) throws Exception {
Instance result;
double[] newVals;
Instance tempInst;
double cumulative;
int i;
int j;
double tempval;
int numAttsLowerBound; newVals = new double[m_OutputNumAtts];
tempInst = (Instance) instance.copy(); m_ReplaceMissingFilter.input(tempInst);
m_ReplaceMissingFilter.batchFinished();
tempInst = m_ReplaceMissingFilter.output(); m_NominalToBinaryFilter.input(tempInst);
m_NominalToBinaryFilter.batchFinished();
tempInst = m_NominalToBinaryFilter.output(); if (m_AttributeFilter != null) {
m_AttributeFilter.input(tempInst);
m_AttributeFilter.batchFinished();
tempInst = m_AttributeFilter.output();
} if (!m_center) {
m_standardizeFilter.input(tempInst);
m_standardizeFilter.batchFinished();
tempInst = m_standardizeFilter.output();
} else {
m_centerFilter.input(tempInst);
m_centerFilter.batchFinished();
tempInst = m_centerFilter.output();
} if (m_HasClass)
newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex()); if (m_MaxAttributes > 0)
numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
else
numAttsLowerBound = 0;
if (numAttsLowerBound < 0)
numAttsLowerBound = 0; cumulative = 0;
for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
tempval = 0.0;
for (j = 0; j < m_NumAttribs; j++)
tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j); newVals[m_NumAttribs - i - 1] = tempval;
cumulative += m_Eigenvalues[m_SortedEigens[i]];
if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
break;
} // create instance
if (instance instanceof SparseInstance)
result = new SparseInstance(instance.weight(), newVals);
else
result = new DenseInstance(instance.weight(), newVals); return result;
} /**
* Initializes the filter with the given input data.
*
* @param instances the data to process
* @throws Exception in case the processing goes wrong
* @see #batchFinished()
*/
protected void setup(Instances instances) throws Exception {
int i;
int j;
Vector<Integer> deleteCols;
int[] todelete;
double[][] v;
Matrix corr;
EigenvalueDecomposition eig;
Matrix V; m_TrainInstances = new Instances(instances); // make a copy of the training data so that we can get the class
// column to append to the transformed data (if necessary)
m_TrainCopy = new Instances(m_TrainInstances, 0); m_ReplaceMissingFilter = new ReplaceMissingValues();
m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter); m_NominalToBinaryFilter = new NominalToBinary();
m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter); // delete any attributes with only one distinct value or are all missing
deleteCols = new Vector<Integer>();
for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
if (m_TrainInstances.numDistinctValues(i) <= 1)
deleteCols.addElement(i);
} if (m_TrainInstances.classIndex() >=0) {
// get rid of the class column
m_HasClass = true;
m_ClassIndex = m_TrainInstances.classIndex();
deleteCols.addElement(new Integer(m_ClassIndex));
} // remove columns from the data if necessary
if (deleteCols.size() > 0) {
m_AttributeFilter = new Remove();
todelete = new int [deleteCols.size()];
for (i = 0; i < deleteCols.size(); i++)
todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();
m_AttributeFilter.setAttributeIndicesArray(todelete);
m_AttributeFilter.setInvertSelection(false);
m_AttributeFilter.setInputFormat(m_TrainInstances);
m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);
} // can evaluator handle the processed data ? e.g., enough attributes?
getCapabilities().testWithFail(m_TrainInstances); m_NumInstances = m_TrainInstances.numInstances();
m_NumAttribs = m_TrainInstances.numAttributes(); //fillCorrelation();
fillCovariance(); // get eigen vectors/values
corr = new Matrix(m_Correlation);
eig = corr.eig();
V = eig.getV();
v = new double[m_NumAttribs][m_NumAttribs];
for (i = 0; i < v.length; i++) {
for (j = 0; j < v[0].length; j++)
v[i][j] = V.get(i, j);
}
m_Eigenvectors = (double[][]) v.clone();
m_Eigenvalues = (double[]) eig.getRealEigenvalues().clone(); // any eigenvalues less than 0 are not worth anything --- change to 0
for (i = 0; i < m_Eigenvalues.length; i++) {
if (m_Eigenvalues[i] < 0)
m_Eigenvalues[i] = 0.0;
}
m_SortedEigens = Utils.sort(m_Eigenvalues);
m_SumOfEigenValues = Utils.sum(m_Eigenvalues); m_TransformedFormat = determineOutputFormat(m_TrainInstances);
setOutputFormat(m_TransformedFormat); m_TrainInstances = null;
}
Weka——PrincipalComponents分析的更多相关文章
- Weka关联规则分析
购物篮分析: Apriori算法: 参数设置: 1.car 如果设为真,则会挖掘类关联规则而不是全局关联规则. 2. classindex 类属性索引.如果设置为-1,最后的属性被当做类属性. 3. ...
- Weka算法Clusterers-DBSCAN源代码分析
假设说世界上仅仅能存在一种基于密度的聚类算法的话.那么它必须是DBSCAN(Density-based spatial clustering of applications with noise).D ...
- Weka算法Clusterers-Xmeans源代码分析(一)
<p></p><p><span style="font-size:18px">上几篇博客都是分析的分类器算法(有监督学习),这次就分 ...
- Weka学习之关联规则分析
步骤: (一) 选择数据源 (二)选择要分析的字段 (三)选择需要的关联规则算法 (四)点击start运行 (五) 分析结果 算法选择: Apriori算法参数含义 1.car:如果设为真,则会挖掘类 ...
- Weka算法Classifier-meta-AdaBoostM1源代码分析(一)
多分类器组合算法简单的来讲经常使用的有voting,bagging和boosting,当中就效果来说Boosting略占优势,而AdaBoostM1算法又相当于Boosting算法的"经典款 ...
- Weka算法Classifier-tree-J48源代码分析(一个)基本数据结构和算法
大约一年,我没有照顾的博客,再次拿起笔不知从何写上,想来想去手从最近使用Weka要正确书写. Weka为一个Java基础上的机器学习工具.上手简单,并提供图形化界面.提供如分类.聚类.频繁项挖掘等工具 ...
- 数据挖掘:关联规则的apriori算法在weka的源码分析
相对于机器学习,关联规则的apriori算法更偏向于数据挖掘. 1) 测试文档中调用weka的关联规则apriori算法,如下 try { File file = new File("F:\ ...
- Weka中数据挖掘与机器学习系列之Exploer界面(七)
不多说,直接上干货! Weka的Explorer(探索者)界面,是Weka的主要图形化用户界面,其全部功能都可通过菜单选择或表单填写进行访问.本博客将详细介绍Weka探索者界面的图形化用户界面.预处理 ...
- Weka算法算法翻译(部分)
目录 Weka算法翻译(部分) 1. 属性选择算法(select attributes) 1.1 属性评估方法 1.2 搜索方法 2. 分类算法 2.1 贝叶斯算法 2.2 Functions 2.3 ...
随机推荐
- Sencha Touch 实战开发培训 视频教程 第二期 第六节
2014.4.18 晚上8:20左右开课. 本节课耗时没有超出一个小时. 本期培训一共八节,前两节免费,后面的课程需要付费才可以观看. 本节内容: 图片展示 利用list展示图片: 扩展Carouse ...
- Sencha Touch 实战开发培训 视频教程 第二期 第一节
经过忙碌的准备,终于在2014.4.7晚上8:10分开课. 本来预定在8点开课的,不过电脑出了点问题,推迟了. 本期培训一共八节,前两节免费,后面的课程需要付费才可以观看. 本节内容: 了解Sench ...
- 【CF840D】Destiny 分治(线段树)
[CF840D]Destiny 题意:给你一个长度为n的序列,q次询问,每次指定l r k,求[l,r]中出现次数$>\frac {r-l+1} k$的所有数中最小的那个数. $n,q\le 3 ...
- [工具] 知网(CNKI)文献下载工具
https://github.com/amyhaber/cnki-downloader 用于免费搜索,下载CNKI上的各类文献资料
- zookeeper 安装的三种模式
Zookeeper安装 zookeeper的安装分为三种模式:单机模式.集群模式和伪集群模式. 单机模式 首先,从Apache官网下载一个Zookeeper稳定版本,本次教程采用的是zookeeper ...
- LCA最近公共祖先(least common ancestors)
#include"stdio.h" #include"string.h" #include"iostream" #include" ...
- 使用SQL手动创建数据库并创建一个具有该数据库所有权限的用户
$ mysql -u adminusername -p Enter password: Welcome to the MySQL monitor. Commands end with ; or \g. ...
- Freetds 连接数据库问题
今天一个项目,需要用到连接SQLSERVER数据库,获取数据,按照以往的做法 ,安装了LNMP,装完之后在安装Freetds,然后在独立添加PHP的MSSQL的模块,./configure make ...
- HDU 4578 - Transformation - [加强版线段树]
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4578 Problem Description Yuanfang is puzzled with the ...
- laravel5.1接收ajax数据
前台: $.ajax({ type: 'POST', url: '{!! url('aw/data') !!}', data:{'_token':'<?php echo csrf_token() ...