02.Numpy
01.array
- # -*- coding: utf-8 -*-
- """
- Numpy 패키지 특징
- - 선형대수(벡터, 행렬) 연산에 효과적인 함수 제공
- - list 차이점 : 다차원 배열, 선형대수 연산, 속도 고속
- - Series 공통점
- -> 수학/통계 함수
- -> 범위 수정, 블럭 연산
- -> indexing/slicing 기능
- - n차원 배열 객체 생성 함수
- 1. random 함수
- 2. array 함수
- 3. sampling 함수
- 4. arange 함수
- """
- import numpy as np
- # numpy 1차원 자료구조
- lst = [1,2,3]
- arr = np.array(lst)
- print(lst) # [1, 2, 3]
- #print(lst**2) # error
- print(arr) # [1 2 3]
- print(arr**2) # [1 4 9]
- print(type(arr)) # <class 'numpy.ndarray'>
- lst2 = [1, "two", False]
- print(lst2) # [1, 'two', False]
- arr2 = np.array(lst2)
- print(arr2) # ['1' 'two' 'False']
- print(arr2.shape) # (3,)
- # 1. random 함수 : 난수 생성
- #help(np.random.randn)
- data = np.random.randn(3, 4) # 3행4열-12난수 생성
- print(data)
- '''
- [[ 0.21625386 -1.11271239 1.26352269 -0.29090546]
- [ 1.19998039 -0.93916248 -0.96475192 -0.71720834]
- [-0.35985917 0.46820202 0.20267762 0.56218989]]
- '''
- for row in data :
- print('행 평균 :', row.mean())
- print('행 합계 :', row.sum())
- # 1) 수학/통계 함수
- print('합계=', data.sum())
- print('평균=', data.mean())
- print('분산=',data.var())
- print('표준편차=',data.std())
- # 2) 블럭연산
- print(data + data) # 2배
- # 3) indexing
- print(data[2,2]) # 3행3열
- print(data[:,2]) # 3열 전체
- # 2. array 함수 : 다차원 배열
- # 1) 단일 list - 1차원
- lst1 = [3, 5.6, 4, 7, 8]
- arr1 = np.array(lst1)
- print(arr1)
- # 분산/표준편차
- print('분산=', arr1.var())
- print('표준편차=', arr1.std())
- '''
- 분산= 3.4016000000000006
- 표준편차= 1.8443427013437608
- 모집단 분산, 표준편차
- 분산 = sum((x-avg)**2) / n
- 표본 분산
- 분산 = sum((x-avg)**2) / n-1
- '''
- avg = arr1.mean()
- diff = arr1 - avg # braodcast
- var_result = sum(diff**2) / len(arr1)
- print('분산=', var_result)
- # 분산= 3.4016000000000006
- std_result = np.sqrt(var_result)
- print('표준편차=', std_result)
- '''
- 분산= 3.4016000000000006
- 표준편차= 1.8443427013437608
- '''
- # 2) 중첩 list -> 2차원
- lst2 = [[1,2,3,4,5], [6,7,8,9,10]]
- print(lst2)
- arr2 = np.array(lst2)
- print(arr2)
- print(arr2.shape) # (2, 5)
- print(np.shape(arr2)) # (2, 5)
- # ppt. 21
- print(arr2[1,:]) # 2행 전체 - [ 6 7 8 9 10]
- print(arr2[:,2]) # 3열 전체 - [3 8]
- print(arr2[1,2]) # 2행3열 - 8
- print(arr2[:,1:3])
- '''
- [[2 3]
- [7 8]]
- '''
- # broadcast 연산(선형대수)
- # - 작은 차원이 큰 차원으로 늘어남
- # 1) scala(0) vs vector(1)
- print(arr1) # [3. 5.6 4. 7. 8. ]
- print(arr1 * 0.5) # [1.5 2.8 2. 3.5 4. ]
- # 2) scala(0) vs matrix(2)
- print(arr2 * 0.5)
- # 3) vector(1) vs matrix(2)
- print(arr1 + arr2)
- '''
- [3. 5.6 4. 7. 8. ]
- +
- [[ 1 2 3 4 5]
- [ 6 7 8 9 10]]
- =
- [[ 4. 7.6 7. 11. 13. ]
- [ 9. 12.6 12. 16. 18. ]]
- '''
- # 3. sampling 함수
- # 1) choice함수 : 관측치 행 번호 추출
- num = list(range(11)) # 0~10
- print(num)
- n = len(num)
- #np.random.choice : 패키지.모듈.함수()
- idx = np.random.choice(n, 5, replace=False)
- # replace=False : 비복원
- print(idx) # [4 3 1 5 0]
- import pandas as pd
- score = pd.read_csv("../data/score_iq.csv")
- print(score.info())
- '''
- RangeIndex: 150 entries, 0 to 149
- Data columns (total 6 columns):
- '''
- # train : 70%, test : 30%
- n=len(score)
- idx = np.random.choice(n, int(n*0.7), replace=False)
- # pandas -> numpy
- np_score = np.array(score)
- print(np_score.shape) # (150, 6)
- train_set = np_score[idx, :]
- print(train_set.shape) # (105, 6)
- # test set : list+for
- # [실행문-3 for-1 if-2]
- test_idx = [i for i in range(150) if i not in idx]
- print(test_idx)
- test_set = np_score[test_idx, :]
- print(test_set.shape) # (45, 6)
- # 2) shuffle 함수
- #help(np.random.shuffle) # x : array or list
- print(np_score[:10,:])
- np.random.shuffle(np_score) # array
- print(np_score[:10,:])
- # 4. arange 함수 : range(n) : 0~n-1 동일
- zerr = np.zeros((3, 5))
- print(zerr)
- '''
- [[0. 0. 0. 0. 0.]
- [0. 0. 0. 0. 0.]
- [0. 0. 0. 0. 0.]]
- '''
- cnt = 0
- for i in np.arange(3) : # 0~2
- for j in np.arange(5) : # 0~4
- cnt += 1 # 카운터
- zerr[i,j] = cnt
- print(zerr)
- '''
- [[ 1. 2. 3. 4. 5.]
- [ 6. 7. 8. 9. 10.]
- [11. 12. 13. 14. 15.]]
- '''
- cnt = 0
- for i in range(3) : # 0~2
- for j in range(5) : # 0~4
- cnt += 1 # 카운터
- zerr[i,j] = cnt
- print(zerr)
- '''
- [[ 1. 2. 3. 4. 5.]
- [ 6. 7. 8. 9. 10.]
- [11. 12. 13. 14. 15.]]
- '''
02.indexing
- # -*- coding: utf-8 -*-
- """
- numpy indexing
- - 2,3차원 indexing
- - boolean indexing
- """
- import numpy as np
- # 1. indexing
- '''
- 1차원 : obj[index]
- 2차원 : obj[row, col] - row default
- 3차원 : obj[side, row, col] - side default
- '''
- # 2차원 indexing
- arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]])
- print(arr2d.shape) # (3, 3)
- print(arr2d[1]) # 2행 전체 - [4 5 6]
- print(arr2d[:,1]) # 2열 전체 - [2 5 8]
- print(arr2d[1,2]) # 2행3열 - 6
- # 3차원 indexing
- arr3d = np.array([[[1,2,3], [4,5,6]], [[7,8,9], [10,11,12]]])
- print(arr3d)
- print(arr3d.shape) # (2, 2, 3)
- print(arr3d[1]) # 2면 전체
- print(arr3d[1, 0]) # 2면 1행 전체
- print(arr3d[1, 0, 2]) # 2면 1행 3열
- '''
- [[ 7 8 9]
- [10 11 12]]
- [7 8 9]
- 9
- '''
- print(arr3d[1, :, :2])
- '''
- [[ 7 8]
- [10 11]]
- '''
- # 4. boolean indexing
- data = np.random.randn(3, 4) # 12개
- print(data)
- # 부울리언 색인
- result = data[data >= 0.7]
- print(result)
- # 0.3 ~ 0.7
- #result = data[data >= 0.3 and data <= 0.7]
- result2 = data[np.logical_and(data >= 0.3, data <= 0.7)]
- print(result2)
- '''
- [1.06451721 0.9287353 ]
- [0.6895027]
- '''
03.reshape
- # -*- coding: utf-8 -*-
- """
- reshape : 모양 변경
- - 1차원 배열 -> 2차원 배열
- - 2차원 배열 -> 다른 모양 변경
- T : 전치행렬(행렬 위치 변경)
- swapaxis : 축 변경
- transpose : 축 번호 순서에 의해서 구조 변경
- """
- import numpy as np
- # 1. reshape
- lst = range(1,13) # 1~12
- # 1차원 -> 2차원
- arr2d = np.array(lst).reshape(3,4) # 1차원 -> 2차원
- print(arr2d)
- print(arr2d.shape) # (3, 4)
- # 2차원 모양 변경
- arr2d = np.array(arr2d).reshape(2,6) # 주의 : 수 일치
- print(arr2d.shape) # (2, 6)
- # 2차원 -> 3차원
- arr3d = np.array(arr2d).reshape(1,4,3)
- print(arr3d)
- print(arr3d.shape) # (1, 4, 3)
- '''
- [[[ 1 2 3]
- [ 4 5 6]
- [ 7 8 9]
- [10 11 12]]]
- '''
- # 2. 전치행렬(행<->열)
- print(arr2d.T)
- print(arr2d.T.shape) # (6, 2)
- # 3. swapaxes
- # axis = 0(행), axis=1(열)
- print(arr2d.swapaxes(0,1)) # (6, 2)
- # 4. transpose
- '''
- 1차원 : 효과 없음
- 2차원 : 행<-열 교환 = 전치행렬
- 3차원 : 축 순서에 의해서 구조 변경(o)
- '''
- arr3d = np.arange(1,25).reshape(4, 2, 3) # 1~24
- print(arr3d)
- print(arr3d.shape) # (4, 2, 3) - (면,행,열)
- # 3차원 : (0,1,2) -> (2,1,0) : 역순
- arr3d_def = arr3d.transpose() # default : (2,1,0)
- print(arr3d_def)
- print(arr3d_def.shape) # (3, 2, 4)
- # (0,1,2) -> (2,0,1)
- arr3d_user = arr3d.transpose(2,0,1)
- print(arr3d_user)
- print(arr3d_user.shape) # (3, 4, 2)
04.axis_dot
- # -*- coding: utf-8 -*-
- """
- 1. axis : 행축, 열축
- 2. np.dot() : 행렬곱 - tf.matmul()
- 3. ANN에서 행렬곱
- - 은닉층(H) = 입력(X) * 가중치(W) + 편향(B)
- 4. 회귀분석 모델 행렬곱
- - 예측치(Y) = 입력(X) * 기울기(a) + 절편(b)
- """
- import numpy as np
- # 1. axis : ppt. 56
- # 행축 : 열들의 모임(열 단위)
- # 열축 : 행들의 모음(행 단위)
- arr = np.arange(1,21).reshape(5,4)
- print(arr)
- print('전체 합계=',arr.sum())
- print('열 단위 합계=',arr.sum(axis=0)) # 열 단위
- print('행 단위 합계=',arr.sum(axis=1)) # 행 단위
- '''
- 전체 합계= 210
- 열 단위 합계= [45 50 55 60]
- 행 단위 합계= [10 26 42 58 74]
- '''
- # 2. np.dot(a, b) # a,b : 행렬
- a = np.array([[1,1], [0,1]])
- print(a.shape) # (2, 2)
- b = np.array([[2,3], [1,5]])
- print(b.shape) # (2, 2)
- '''
- 행렬곱 조건
- 1. a,b 모두 행렬
- 2. a(열) == b(행) : 수 일치
- '''
- c = np.dot(a, b)
- print(c)
- print(c.shape)
- '''
- a(r,c) * b(r,c) = c(a(r), b(c))
- [[3 8]
- [1 5]]
- (2, 2)
- '''
- print(np.ndim(a), np.ndim(b), np.ndim(c))
- # 2 2 2
- # 1) 1개 관측치 : x(1,2) * w(2,2) = h(1,2)
- x = np.array([[0.1, 0.2]])
- w = np.array([[1,2], [2,3]])
- print(x.shape) # (1, 2)
- print(w.shape) # (2, 2)
- h = np.dot(x, w)
- print('h=', h) # h= [[0.5 0.8]]
- print(h.shape) # (1, 2)
- # 2) 2개 관측치 : x(2,2) * w(2,3) = h(2,3)
- x = np.array([[0.1, 0.2], [0.3, 0.4]]) # (2,2)
- w = np.array([[1,2,3],[2,3,4]]) # (2,3)
- h = np.dot(x, w)
- print('h=')
- print(h)
- print(h.shape) # (2, 3)
05.dot_example
- # -*- coding: utf-8 -*-
- """
- ANN Model example
- """
- import numpy as np
- # 1. ANN model
- '''
- input x : image(28x28)
- hidden node : 32개
- weight : 28x32
- '''
- # x data 생성
- print('>>> x image data <<<')
- x_img = np.random.randint(0,2, 784) # image vector(0 or 1)
- x_img2d = x_img.reshape(28, 28) # matrix
- print(x_img2d)
- print(x_img2d.shape) # (28, 28)
- # weight data 생성
- print('>>> weight data <<<')
- weight = np.random.randn(28, 32)
- print(weight)
- print(weight.shape) # (28, 32)
- # hidden node 생성
- print('>>> hidden node <<<')
- # (28, 28) * (28, 32) = (28, 32)
- hidden = np.dot(x_img2d, weight)
- print(hidden)
- print(hidden.shape)
02.Numpy的更多相关文章
- numpy学习笔记02
简介 numpy.array() 数组对象,可以表示普通的一维数组,或者二维矩阵,或者任意数据:并且它可以对数组中的数据进行非常高效的运算,如:数据统计.图像处理.线性代数等 numpy 之所以能运行 ...
- [Pandas] 02 - Tutorial of NumPy
Ref: NumPy 教程 这里主要是查缺补漏一些常用方法. 初步认识 矩阵常见知识点 矩阵操作 Ref: [Python] 01 - Number and Matrix[总结过一部分] 一.矩阵 ( ...
- [学习笔记] [数据分析] 02、NumPy入门与应用
01.NumPy基本功能 ※ 数据类型的转换在实际操作过程中很重要!!! ※ ※ ndarray的基本索引与切片 ※ 布尔型数组的长度必须跟被索引的轴长度一致 花式索引是利用“整数数组”进行索引. 整 ...
- Numpy | 02 Ndarray 对象
NumPy 最重要的一个特点是其 N 维数组对象 ndarray,它是一系列同类型数据的集合,以 0 下标为开始进行集合中元素的索引. ndarray 对象是用于存放同类型元素的多维数组. ndarr ...
- NumPy 学习(3): 通用函数
1. 元素级别的函数 元素级别的函数也就是函数对数组中的每一个元素进行运算.例如: In [10]: arr = np.arange(10) In [11]: np.sqrt(arr) Out[11 ...
- numpy数组、向量、矩阵运算
可以来我的Github看原文,欢迎交流. https://github.com/AsuraDong/Blog/blob/master/Articles/%E6%9C%BA%E5%99%A8%E5%AD ...
- numpy初识
1,机器学习numpy 初识 1)numpy初识 import numpy num1= numpy.array([1,2,3]) dtype('num1') #查找类型 num1.dtype num1 ...
- Numpy - 多维数组(上)
一.实验说明 numpy 包为 Python 提供了高性能的向量,矩阵以及高阶数据结构.由于它们是由 C 和 Fortran 实现的,所以在操作向量与矩阵时性能非常优越. 1. 环境登录 无需密码自动 ...
- numpy的初探
# data = numpy.genfromtxt("C:\\Users\\Admin\Desktop\\111.txt", delimiter='\t', dtype='str' ...
随机推荐
- Python pip Unable--
It is possible that pip does not get installed by default. One potential fix is: python -m ensurepip ...
- [模板] CDQ分治&&BZOJ3262:陌上花开
简介 CDQ分治是分治的一种, 可以看做归并排序的扩展, 利用离线将一些 \(O(n)\) 的暴力优化到 \(O(log n)\). 它可以用来顶替一些高级(log)数据结构等. 一般地, CDQ分治 ...
- ZOJ 4097 Rescue the Princess
在这个物欲横流的社会 oj冷漠无情 只有这xx还有些温度 越界就越界吧 wrong 怎么回事.... 给出一个图 然后给出q次询问 问是否存在v和w分别到u的路径且边不重复 在边双连通分量中 任意两 ...
- LoadRunner开发ftp协议接口之上传文件脚本
Action() { //建立一个ftp对象 FTP ftp1=0; //建立FTP连接并登录 ftp_logon_ex(&ftp1,"ftpLogon", "U ...
- tensorflow 语音识别报错
cuDnn由7.1版本改为7.4.2.24版本,成功
- Mock6 moco框架中如何加入header
新建一个 startupWithHeader.json,这次在request里面添加了headers属性 [ { "description": "这是一个带header的 ...
- docker基本概念
详细参考https://www.jianshu.com/p/9deb6f41d5bd
- Entity Framework入门教程(3)---EF中的上下文简介
1.DbContext(上下文类) 在DbFirst模式中,我们添加一个EDM(Entity Data Model)后会自动生成一个.edmx文件,这个文件中包含一个继承DbContext类的上下文实 ...
- 关于设计项目UI界面的软件工具
关于画UI界面的软件,我在网上找了几个,今天式用这几款软件还可以 1.墨刀:国产的,这个专门画APP界面的,用起来比较简单,有免费版的,要注册才能用,提供云存储,收费版的云存储空间会多一些.网站: h ...
- [数分提高]2014-2015-2第6教学周第1次课讲义 3.3 Taylor 公式
1. (Taylor 公式). 设 $f^{(n)}$ 在 $[a,b]$ 上连续, $f^{(n+1)}$ 在 $(a,b)$ 内存在, 试证: $ \forall\ x,x_0\in [a,b], ...