-#!/usr/bin/env python

-# # # -- coding: utf-8 --

-# # # @Time : 2019.5.22 14:09

-# # # @Author : AndrewCHH

-# # # @File : after_class.py

1. 获取数据

使用MNIST数据集练习分类任务

from __future__ import print_function

import pandas as pd

# 导入后加入以下列，再显示时显示完全。

pd.set_option('display.max_rows', 500)

pd.set_option('display.max_columns', 500)

pd.set_option('display.width', 1000)

from sklearn.datasets import fetch_mldata

from scipy.io import loadmat

mnist = fetch_mldata('MNIST original', transpose_data=True, data_home='files')

print("mnist",mnist)

# *DESCR为description,即数据集的描述

# *CLO_NAMES为列名

# *target键，带有标记的数组

# *data键，每个实例为一行，每个特征为1列

# 共七万张图片，每张图片784个特征点

X, y = mnist["data"], mnist["target"]

print(X.shape, y.shape)

print(type(X))

# 显示图片

import matplotlib

import matplotlib.pyplot as plt

some_digit = X[36001]

some_digit_image = some_digit.reshape(28, 28)  # 将一维数组转化为28*28的数组

# cmap->颜色图谱（colormap)

# interpolation: 图像插值参数，图像插值就是利用已知邻近像素点的灰度值（或rgb图像中的三色值）来产生未知像素点的灰度值，以便由原始图像再生出具有更高分辨率的图像。

# * If interpolation is None, it defaults to the image.interpolation rc parameter.

# If the interpolation is 'none', then no interpolation is performed for the Agg, ps and pdf backends. Other backends will default to 'nearest'.

# For the Agg, ps and pdf backends, interpolation = 'none' works well when a big image is scaled down,

# while interpolation = 'nearest' works well when a small image is scaled up.

plt.imshow(some_digit_image, cmap=matplotlib.cm.binary,

           interpolation="nearest")

plt.axis("off")

plt.show()

print(y[36001])

D:\Anaconda3\lib\site-packages\sklearn\utils\deprecation.py:77: DeprecationWarning: Function fetch_mldata is deprecated; fetch_mldata was deprecated in version 0.20 and will be removed in version 0.22

  warnings.warn(msg, category=DeprecationWarning)

D:\Anaconda3\lib\site-packages\sklearn\utils\deprecation.py:77: DeprecationWarning: Function mldata_filename is deprecated; mldata_filename was deprecated in version 0.20 and will be removed in version 0.22

  warnings.warn(msg, category=DeprecationWarning)

mnist {'DESCR': 'mldata.org dataset: mnist-original', 'COL_NAMES': ['label', 'data'], 'target': array([0., 0., 0., ..., 9., 9., 9.]), 'data': array([[0, 0, 0, ..., 0, 0, 0],

       [0, 0, 0, ..., 0, 0, 0],

       [0, 0, 0, ..., 0, 0, 0],

       ...,

       [0, 0, 0, ..., 0, 0, 0],

       [0, 0, 0, ..., 0, 0, 0],

       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)}

(70000, 784) (70000,)

<class 'numpy.ndarray'>

<Figure size 640x480 with 1 Axes>

5.0

2. 创建测试集训练集

x_train, x_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

2.1 数据洗牌（注意数据的顺序敏感性）

import numpy as np

# Randomly permute a sequence, or return a permuted range.

shuffle_index = np.random.permutation(60000)

x_train, y_train = x_train[shuffle_index], y_train[shuffle_index]

# # Q1：

# ## 1.1 创建KNN分类器

y_train_5 =(y_train==5)

from sklearn.neighbors import KNeighborsClassifier

# knn_clf = KNeighborsClassifier()

# knn_clf = KNeighborsClassifier(n_jobs=-1, weights='distance', n_neighbors=4)

# knn_clf.fit(x_train, y_train)

# array = knn_clf.predict(x_test)

# print("knnResult",array)

# # ## 1.2 对knn执行网格搜索

# from sklearn.model_selection import GridSearchCV

# para_grid = [

#     {'n_neighbors':[3,4,5,6],'weights':["uniform","distance",]}

# ]

# knn_clf = KNeighborsClassifier()

# grid_search = GridSearchCV(knn_clf,para_grid,cv=5,verbose=3,n_jobs=-1,scoring="neg_mean_squared_error")

# grid_search.fit(x_train,y_train)

# print(grid_search.best_params_)

# print(grid_search.best_estimator_)

# from sklearn.model_selection import GridSearchCV

#

# param_grid = [{'weights': ["uniform", "distance"], 'n_neighbors': [3, 4, 5]}]

#

# knn_clf = KNeighborsClassifier()

# grid_search = GridSearchCV(knn_clf, param_grid, cv=5, verbose=3, n_jobs=-1)

# grid_search.fit(x_train, y_train)

# print(grid_search.best_params_)

# print(grid_search.best_estimator_)

# ## 1.2 评估准确性

# y_knn_pred = knn_clf.predict(x_test)

# from sklearn.metrics import accuracy_score

# a = accuracy_score(y_test,y_knn_pred)

# print(a)

# # Q2:

# 使用shaift方法移动图片中的像素，注意，self传进来的X[1]是一维数组，要使用reshape变成28*28的数组。

# cval参数指的是移动图片后填补的像素值

from scipy.ndimage.interpolation import shift

def movePiexOfImage(self,dx,dy,new=0):

    return shift(self.reshape(28,28),[dx,dy],cval=new)

# 图片显示维度错误检测

def valid_imshow_data(data):

    data = np.asarray(data)

    if data.ndim == 2:

        return True

    elif data.ndim == 3:

        if 3 <= data.shape[2] <= 4:

            return True

        else:

            print('The "data" has 3 dimensions but the last dimension '

                  'must have a length of 3 (RGB) or 4 (RGBA), not "{}".'

                  ''.format(data.shape[2]))

            return False

    else:

        print('To visualize an image the data must be 2 dimensional or '

              '3 dimensional, not "{}".'

              ''.format(data.ndim))

        return False

print("image:",X[1])

imageShift=movePiexOfImage(X[1],5,1,new=100)

imageShift = imageShift.reshape(28,28)

plt.imshow(imageShift,cmap=matplotlib.cm.binary)

plt.show()

print(len(x_train))

for i in range(len(x_train)):

    moveLeft  = movePiexOfImage(x_train[i],1,0,new=100)

    # moveDown  = movePiexOfImage(x_train[i],0,1,new=100)

    # moveRight = movePiexOfImage(x_train[i],-1,0,new=100)

    # moveUp    = movePiexOfImage(x_train[i],0,-1,new=100)

    # moveDown = moveDown.reshape(1,784)

    moveLeft = moveLeft.reshape(1,784)

    # moveRight = moveRight.reshape(1,784)

    # moveUp = moveUp.reshape(1,784)

    x_train = np.concatenate((x_train,moveLeft),axis=0)

print(len(x_train))

image: [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0  64 253 255  63   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0  96 205 251 253 205 111

   4   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0  96 189 251 251 253 251 251  31   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0  16  64 223 244 251 251 211 213

 251 251  31   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0  80 181 251 253 251 251 251  94  96 251 251  31   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0  92 253 253 253 255 253 253 253

  95  96 253 253  31   0   0   0   0   0   0   0   0   0   0   0   0   0

   0  92 236 251 243 220 233 251 251 243  82  96 251 251  31   0   0   0

   0   0   0   0   0   0   0   0   0   0  80 253 251 251 188   0  96 251

 251 109   0  96 251 251  31   0   0   0   0   0   0   0   0   0   0   0

   0  96 240 253 243 188  42   0  96 204 109   4   0  12 197 251  31   0

   0   0   0   0   0   0   0   0   0   0   0 221 251 253 121   0   0   0

  36  23   0   0   0   0 190 251  31   0   0   0   0   0   0   0   0   0

   0   0  48 234 253   0   0   0   0   0   0   0   0   0   0   0 191 253

  31   0   0   0   0   0   0   0   0   0   0  44 221 251 251   0   0   0

   0   0   0   0   0   0   0  12 197 251  31   0   0   0   0   0   0   0

   0   0   0 190 251 251 251   0   0   0   0   0   0   0   0   0   0  96

 251 251  31   0   0   0   0   0   0   0   0   0   0 190 251 251 113   0

   0   0   0   0   0   0   0   0  40 234 251 219  23   0   0   0   0   0

   0   0   0   0   0 190 251 251  94   0   0   0   0   0   0   0   0  40

 217 253 231  47   0   0   0   0   0   0   0   0   0   0   0 191 253 253

 253   0   0   0   0   0   0  12 174 253 253 219  39   0   0   0   0   0

   0   0   0   0   0   0   0  67 236 251 251 191 190 111  72 190 191 197

 251 243 121  39   0   0   0   0   0   0   0   0   0   0   0   0   0   0

  63 236 251 253 251 251 251 251 253 251 188  94   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0  27 129 253 251 251 251 251

 229 168  15   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0  95 212 251 211  94  59   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0

   0   0   0   0   0   0   0   0   0   0]

Hand on Machine Learning第三章课后作业(2)：其余小练习的更多相关文章

Hand on Machine Learning第三章课后作业(1)：垃圾邮件分类
import os import email import email.policy 1. 读取邮件数据 SPAM_PATH = os.path.join( "E:\\3.Study\\机器 ...
C++第三章课后作业答案及解析---指针的使用
今天继续完成上周没有完成的习题---C++第三章课后作业,本章题涉及指针的使用,有指向对象的指针做函数参数,对象的引用以及友元类的使用方法等它们具体的使用方法在下面的题目中会有具体的解析(解析标注在 ...
Hand on Machine Learning 第三章：分类器
1. 获取数据使用MNIST数据集练习分类任务 from sklearn.datasets import fetch_mldata from scipy.io import loadmat mnis ...
CSAPP深入理解计算机系统(第二版)第三章家庭作业答案
<深入理解计算机系统(第二版)>CSAPP 第三章家庭作业这一章介绍了AT&T的汇编指令比较重要本人完成了<深入理解计算机系统(第二版)>(以下简称CSAPP) ...
機器學習基石(Machine Learning Foundations) 机器学习基石课后习题链接汇总
大家好,我是Mac Jiang,非常高兴您能在百忙之中阅读我的博客!这个专题我主要讲的是Coursera-台湾大学-機器學習基石(Machine Learning Foundations)的课后习题解 ...
JAVA第三周课后作业
JAVA课后作业一.枚举类型代码: enum Size{SMALL,MEDIUM,LARGE}; public cl ass EnumTest { public static void main( ...
中级Perl 第三章课后习题
3. 10. 1. 练习1 [25 分钟] 读当前目录的文件列表并转换成全路径.不能用shell 命令或外部程序读当前目录.Perl 的File::Spec 和Cwd 两个模块对这个程序有帮助.每个 ...
Python核心编程2第三章课后练习
1. 标识符.为什么Python 中不需要变量名和变量类型声明? Python中的变量不需要声明,变量的赋值操作既是变量声明和定义的过程.每个变量在内存中创建,都包括变量的标识,名称和数据这些信息.每 ...
Machine Learning 第三周
ML week3 逻辑回归 Logistic Function h_\theta(x)=g(\theta^Tx) g(t)=\frac{1}{1+e^{-z}} 当t大于0, 即下面公式成立时,y=1 ...

随机推荐

Codeforces 981 共同点路径覆盖树构造 BFS/DP书架&最大值
A /*Huyyt*/ #include<bits/stdc++.h> #define mem(a,b) memset(a,b,sizeof(a)) #define pb push_bac ...
后台运行任务nohup xxxxxx &
转载:https://www.cnblogs.com/baby123/p/6477429.html https://blog.csdn.net/davidhzq/article/details/102 ...
rmq——同步、异步、单向、rocketMQ console、消费模式
官网上下载:rocketmq-all-4.5.0-bin-release 配置环境变量:
linux 系统下 tar 的压缩与解压缩命令
1.压缩 [small@sun shine]# tar -zcvf java.tar.gz java java/ java/default/ java/default/THIRDPARTYLICENS ...
Windows和Linux下搭建J2sdk的环境
J2SDK 作为jsp系统配置中必不可少的组件,越来越多的得到应用.下来是我整理的以往工作时搜集的资料.使用时方便查询,希望对广大的工程师有帮助. windows服务器环境下 j2sdk 的安装和环境 ...
oracle SQL查询number字段精度丢失之解决方法
解决办法: -- 3.3:表示原始数据 --fm9999999990.0000:表示保留到小数点后4位,若不存在则用0补位. ),'fm9999999990.0000') as demo from d ...
超实用的PHP代码片段！
摘要:本文分享了九个超级有用的PHP代码片段,当你在开发网站.应用或者博客时,利用这些代码能为你节省大量的时间.你可以直接拿来用! 此前,研发频道曾发布<直接拿来用,10个PHP代码片段> ...
测试常用命令之awk篇
awk/gawk 1,内置变量 FILENAME:输入文件名称 FNR:当前数据文件中的数据行数 NF:数据文件中的字段总数 NR:已处理的输入数据行数目 FS:输入数据段分隔符 RS:输入数据行分隔 ...
Spring MVC过滤器HiddenHttpMethodFilter
浏览器form表单只支持GET与POST请求,而DELETE.PUT等method并不支持,spring3.0添加了一个过滤器,可以将这些请求转换为标准的http方法,使得支持GET.POST.PUT ...
php语法标识符
php语法标识符一.总结一句话总结: 常用<?php //这里写代码 ?>:其它要么不常用,要么需要开配置二.PHP四大标识符(语法环境) 参考或转自:PHP四大标识符(语法环境) ...

Hand on Machine Learning第三章课后作业(2)：其余小练习

1. 获取数据

2. 创建测试集训练集

2.1 数据洗牌（注意数据的顺序敏感性）

Hand on Machine Learning第三章课后作业(2)：其余小练习的更多相关文章

随机推荐

热门专题