Weka——PrincipalComponents分析

package weka.filters.unsupervised.attribute;

PrincipalComponents

属性：

  /** The data to transform analyse/transform. */

  protected Instances m_TrainInstances;

  /** Keep a copy for the class attribute (if set). */

  protected Instances m_TrainCopy;

  /** The header for the transformed data format. */

  protected Instances m_TransformedFormat;

  /** Data has a class set. */

  protected boolean m_HasClass;

  /** Class index. */

  protected int m_ClassIndex;

  /** Number of attributes. */

  protected int m_NumAttribs;

  /** Number of instances. */

  protected int m_NumInstances;

  /** Correlation matrix for the original data. */

  protected double[][] m_Correlation;

  /**

   * If true, center (rather than standardize) the data and

   * compute PCA from covariance (rather than correlation)

   * matrix.

   */

  private boolean m_center = false;

  /** Will hold the unordered linear transformations of the (normalized)

      original data. */

  protected double[][] m_Eigenvectors;

  /** Eigenvalues for the corresponding eigenvectors. */

  protected double[] m_Eigenvalues = null;

  /** Sorted eigenvalues. */

  protected int[] m_SortedEigens;

  /** sum of the eigenvalues. */

  protected double m_SumOfEigenValues = 0.0;

  /** Filters for replacing missing values. */

  protected ReplaceMissingValues m_ReplaceMissingFilter;

  /** Filter for turning nominal values into numeric ones. */

  protected NominalToBinary m_NominalToBinaryFilter;

  /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */

  protected Remove m_AttributeFilter;

  /** Filter for standardizing the data */

  protected Standardize m_standardizeFilter;

  /** Filter for centering the data */

  protected Center m_centerFilter;

  /** The number of attributes in the pc transformed data. */

  protected int m_OutputNumAtts = -1;  

  /** the amount of varaince to cover in the original data when

      retaining the best n PC's. */

  protected double m_CoverVariance = 0.95;

  /** maximum number of attributes in the transformed attribute name. */

  protected int m_MaxAttrsInName = 5;

  /** maximum number of attributes in the transformed data (-1 for all). */

  protected int m_MaxAttributes = -1;

计算协方差矩阵或相关系数矩阵

  protected void fillCovariance() throws Exception {    

    if (!m_center) {

      fillCorrelation();

      return;

    }

    double[] att = new double[m_TrainInstances.numInstances()];

    // now center the data by subtracting the mean

    m_centerFilter = new Center();

    m_centerFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_centerFilter);

    // now compute the covariance matrix

    m_Correlation = new double[m_NumAttribs][m_NumAttribs];

    for (int i = 0; i < m_NumAttribs; i++) {

      for (int j = 0; j < m_NumAttribs; j++) {

        double cov = 0;

        for (int k = 0; k < m_NumInstances; k++) {

          if (i == j) {

            cov += (m_TrainInstances.instance(k).value(i) *

                m_TrainInstances.instance(k).value(i));

          } else {

          cov += (m_TrainInstances.instance(k).value(i) *

              m_TrainInstances.instance(k).value(j));

          }

        }

        cov /= (double)(m_TrainInstances.numInstances() - 1);

        m_Correlation[i][j] = cov;

        m_Correlation[j][i] = cov;

      }

    }

  }

  /**

   * Fill the correlation matrix.

   */

  protected void fillCorrelation() throws Exception {

    int        i;

    int        j;

    int        k;

    double[]     att1;

    double[]     att2;

    double     corr;

    m_Correlation = new double[m_NumAttribs][m_NumAttribs];

    att1          = new double [m_NumInstances];

    att2          = new double [m_NumInstances];

    for (i = 0; i < m_NumAttribs; i++) {

      for (j = 0; j < m_NumAttribs; j++) {

        for (k = 0; k < m_NumInstances; k++) {

          att1[k] = m_TrainInstances.instance(k).value(i);

          att2[k] = m_TrainInstances.instance(k).value(j);

        }

    if (i == j) {

      m_Correlation[i][j] = 1.0;

    }

    else {

      corr = Utils.correlation(att1,att2,m_NumInstances);

      m_Correlation[i][j] = corr;

      m_Correlation[j][i] = corr;

    }

      }

    }

    // now standardize the input data

    m_standardizeFilter = new Standardize();

    m_standardizeFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);

  }

处理数据

  /**

   * Transform an instance in original (unormalized) format.

   *

   * @param instance     an instance in the original (unormalized) format

   * @return         a transformed instance

   * @throws Exception     if instance can't be transformed

   */

  protected Instance convertInstance(Instance instance) throws Exception {

    Instance    result;

    double[]     newVals;

    Instance     tempInst;

    double     cumulative;

    int        i;

    int        j;

    double     tempval;

    int        numAttsLowerBound;

    newVals  = new double[m_OutputNumAtts];

    tempInst = (Instance) instance.copy();

    m_ReplaceMissingFilter.input(tempInst);

    m_ReplaceMissingFilter.batchFinished();

    tempInst = m_ReplaceMissingFilter.output();    

    m_NominalToBinaryFilter.input(tempInst);

    m_NominalToBinaryFilter.batchFinished();

    tempInst = m_NominalToBinaryFilter.output();

    if (m_AttributeFilter != null) {

      m_AttributeFilter.input(tempInst);

      m_AttributeFilter.batchFinished();

      tempInst = m_AttributeFilter.output();

    }

    if (!m_center) {

      m_standardizeFilter.input(tempInst);

      m_standardizeFilter.batchFinished();

      tempInst = m_standardizeFilter.output();

    } else {

      m_centerFilter.input(tempInst);

      m_centerFilter.batchFinished();

      tempInst = m_centerFilter.output();

    }

    if (m_HasClass)

      newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());

    if (m_MaxAttributes > 0)

      numAttsLowerBound = m_NumAttribs - m_MaxAttributes;

    else

      numAttsLowerBound = 0;

    if (numAttsLowerBound < 0)

      numAttsLowerBound = 0;

    cumulative = 0;

    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {

      tempval = 0.0;

      for (j = 0; j < m_NumAttribs; j++)

    tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j);

      newVals[m_NumAttribs - i - 1] = tempval;

      cumulative += m_Eigenvalues[m_SortedEigens[i]];

      if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)

    break;

    }

    // create instance

    if (instance instanceof SparseInstance)

      result = new SparseInstance(instance.weight(), newVals);

    else

      result = new DenseInstance(instance.weight(), newVals);

    return result;

  }

  /**

   * Initializes the filter with the given input data.

   *

   * @param instances   the data to process

   * @throws Exception  in case the processing goes wrong

   * @see               #batchFinished()

   */

  protected void setup(Instances instances) throws Exception {

    int                i;

    int                j;

    Vector<Integer>         deleteCols;

    int[]             todelete;

    double[][]             v;

    Matrix             corr;

    EigenvalueDecomposition     eig;

    Matrix             V;

    m_TrainInstances = new Instances(instances);

    // make a copy of the training data so that we can get the class

    // column to append to the transformed data (if necessary)

    m_TrainCopy = new Instances(m_TrainInstances, 0);

    m_ReplaceMissingFilter = new ReplaceMissingValues();

    m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter);

    m_NominalToBinaryFilter = new NominalToBinary();

    m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);

    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter);

    // delete any attributes with only one distinct value or are all missing

    deleteCols = new Vector<Integer>();

    for (i = 0; i < m_TrainInstances.numAttributes(); i++) {

      if (m_TrainInstances.numDistinctValues(i) <= 1)

    deleteCols.addElement(i);

    }

    if (m_TrainInstances.classIndex() >=0) {

      // get rid of the class column

      m_HasClass = true;

      m_ClassIndex = m_TrainInstances.classIndex();

      deleteCols.addElement(new Integer(m_ClassIndex));

    }

    // remove columns from the data if necessary

    if (deleteCols.size() > 0) {

      m_AttributeFilter = new Remove();

      todelete = new int [deleteCols.size()];

      for (i = 0; i < deleteCols.size(); i++)

    todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();

      m_AttributeFilter.setAttributeIndicesArray(todelete);

      m_AttributeFilter.setInvertSelection(false);

      m_AttributeFilter.setInputFormat(m_TrainInstances);

      m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);

    }

    // can evaluator handle the processed data ? e.g., enough attributes?

    getCapabilities().testWithFail(m_TrainInstances);

    m_NumInstances = m_TrainInstances.numInstances();

    m_NumAttribs   = m_TrainInstances.numAttributes();

    //fillCorrelation();

    fillCovariance();

    // get eigen vectors/values

    corr = new Matrix(m_Correlation);

    eig  = corr.eig();

    V    = eig.getV();

    v    = new double[m_NumAttribs][m_NumAttribs];

    for (i = 0; i < v.length; i++) {

      for (j = 0; j < v[0].length; j++)

        v[i][j] = V.get(i, j);

    }

    m_Eigenvectors = (double[][]) v.clone();

    m_Eigenvalues  = (double[]) eig.getRealEigenvalues().clone();

    // any eigenvalues less than 0 are not worth anything --- change to 0

    for (i = 0; i < m_Eigenvalues.length; i++) {

      if (m_Eigenvalues[i] < 0)

    m_Eigenvalues[i] = 0.0;

    }

    m_SortedEigens     = Utils.sort(m_Eigenvalues);

    m_SumOfEigenValues = Utils.sum(m_Eigenvalues);

    m_TransformedFormat = determineOutputFormat(m_TrainInstances);

    setOutputFormat(m_TransformedFormat);

    m_TrainInstances = null;

  }

Weka——PrincipalComponents分析的更多相关文章

Weka关联规则分析
购物篮分析: Apriori算法: 参数设置: 1.car 如果设为真,则会挖掘类关联规则而不是全局关联规则. 2. classindex 类属性索引.如果设置为-1,最后的属性被当做类属性. 3. ...
Weka算法Clusterers-DBSCAN源代码分析
假设说世界上仅仅能存在一种基于密度的聚类算法的话.那么它必须是DBSCAN(Density-based spatial clustering of applications with noise).D ...
Weka算法Clusterers-Xmeans源代码分析（一）
<p></p><p><span style="font-size:18px">上几篇博客都是分析的分类器算法(有监督学习),这次就分 ...
Weka学习之关联规则分析
步骤: (一) 选择数据源 (二)选择要分析的字段 (三)选择需要的关联规则算法 (四)点击start运行 (五) 分析结果算法选择: Apriori算法参数含义 1.car:如果设为真,则会挖掘类 ...
Weka算法Classifier-meta-AdaBoostM1源代码分析（一）
多分类器组合算法简单的来讲经常使用的有voting,bagging和boosting,当中就效果来说Boosting略占优势,而AdaBoostM1算法又相当于Boosting算法的"经典款 ...
Weka算法Classifier-tree-J48源代码分析（一个）基本数据结构和算法
大约一年,我没有照顾的博客,再次拿起笔不知从何写上,想来想去手从最近使用Weka要正确书写. Weka为一个Java基础上的机器学习工具.上手简单,并提供图形化界面.提供如分类.聚类.频繁项挖掘等工具 ...
数据挖掘：关联规则的apriori算法在weka的源码分析
相对于机器学习,关联规则的apriori算法更偏向于数据挖掘. 1) 测试文档中调用weka的关联规则apriori算法,如下 try { File file = new File("F:\ ...
Weka中数据挖掘与机器学习系列之Exploer界面（七）
不多说,直接上干货! Weka的Explorer(探索者)界面,是Weka的主要图形化用户界面,其全部功能都可通过菜单选择或表单填写进行访问.本博客将详细介绍Weka探索者界面的图形化用户界面.预处理 ...
Weka算法算法翻译（部分）
目录 Weka算法翻译(部分) 1. 属性选择算法(select attributes) 1.1 属性评估方法 1.2 搜索方法 2. 分类算法 2.1 贝叶斯算法 2.2 Functions 2.3 ...

随机推荐

Sencha Touch 实战开发培训视频教程第二期第六节
2014.4.18 晚上8:20左右开课. 本节课耗时没有超出一个小时. 本期培训一共八节,前两节免费,后面的课程需要付费才可以观看. 本节内容: 图片展示利用list展示图片: 扩展Carouse ...
Sencha Touch 实战开发培训视频教程第二期第一节
经过忙碌的准备,终于在2014.4.7晚上8:10分开课. 本来预定在8点开课的,不过电脑出了点问题,推迟了. 本期培训一共八节,前两节免费,后面的课程需要付费才可以观看. 本节内容: 了解Sench ...
【CF840D】Destiny 分治(线段树)
[CF840D]Destiny 题意:给你一个长度为n的序列,q次询问,每次指定l r k,求[l,r]中出现次数$>\frac {r-l+1} k$的所有数中最小的那个数. $n,q\le 3 ...
[工具] 知网(CNKI)文献下载工具
https://github.com/amyhaber/cnki-downloader 用于免费搜索,下载CNKI上的各类文献资料
zookeeper 安装的三种模式
Zookeeper安装 zookeeper的安装分为三种模式:单机模式.集群模式和伪集群模式. 单机模式首先,从Apache官网下载一个Zookeeper稳定版本,本次教程采用的是zookeeper ...
LCA最近公共祖先（least common ancestors）
#include"stdio.h" #include"string.h" #include"iostream" #include" ...
使用SQL手动创建数据库并创建一个具有该数据库所有权限的用户
$ mysql -u adminusername -p Enter password: Welcome to the MySQL monitor. Commands end with ; or \g. ...
Freetds 连接数据库问题
今天一个项目,需要用到连接SQLSERVER数据库,获取数据,按照以往的做法 ,安装了LNMP,装完之后在安装Freetds,然后在独立添加PHP的MSSQL的模块,./configure make ...
HDU 4578 - Transformation - [加强版线段树]
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=4578 Problem Description Yuanfang is puzzled with the ...
laravel5.1接收ajax数据
前台: $.ajax({ type: 'POST', url: '{!! url('aw/data') !!}', data:{'_token':'<?php echo csrf_token() ...

Weka——PrincipalComponents分析

Weka——PrincipalComponents分析的更多相关文章

随机推荐

热门专题