# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in import matplotlib.pyplot as plt
import statsmodels.tsa.seasonal as smt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import datetime as dt
from sklearn import linear_model
from sklearn.metrics import mean_absolute_error
import plotly # import the relevant Keras modules
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout # Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory from subprocess import check_output
import os
os.chdir('F:\\kaggleDataSet\\price-volume\\Stocks')
#read data
# kernels let us navigate through the zipfile as if it were a directory # trying to read a file of size zero will throw an error, so skip them
# filenames = [x for x in os.listdir() if x.endswith('.txt') and os.path.getsize(x) > 0]
# filenames = random.sample(filenames,1)
filenames = ['prk.us.txt', 'bgr.us.txt', 'jci.us.txt', 'aa.us.txt', 'fr.us.txt', 'star.us.txt', 'sons.us.txt', 'ipl_d.us.txt', 'sna.us.txt', 'utg.us.txt']
filenames = [filenames[1]]
print(filenames)
data = []
for filename in filenames:
df = pd.read_csv(filename, sep=',')
label, _, _ = filename.split(sep='.')
df['Label'] = filename
df['Date'] = pd.to_datetime(df['Date'])
data.append(df)

traces = []
for df in data:
clr = str(r()) + str(r()) + str(r())
df = df.sort_values('Date')
label = df['Label'].iloc[0]
trace = plotly.graph_objs.Scattergl(x=df['Date'],y=df['Close'])
traces.append(trace) layout = plotly.graph_objs.Layout(title='Plot',)
fig = plotly.graph_objs.Figure(data=traces, layout=layout)
plotly.offline.init_notebook_mode(connected=True)
plotly.offline.iplot(fig, filename='dataplot')

df = data[0]
window_len = 10 #Create a data point (i.e. a date) which splits the training and testing set
split_date = list(data[0]["Date"][-(2*window_len+1):])[0] #Split the training and test set
training_set, test_set = df[df['Date'] < split_date], df[df['Date'] >= split_date]
training_set = training_set.drop(['Date','Label', 'OpenInt'], 1)
test_set = test_set.drop(['Date','Label','OpenInt'], 1) #Create windows for training
LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
temp_set = training_set[i:(i+window_len)].copy() for col in list(temp_set):
temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1
LSTM_training_inputs.append(temp_set)
LSTM_training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1 LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs) #Create windows for testing
LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
temp_set = test_set[i:(i+window_len)].copy() for col in list(temp_set):
temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1
LSTM_test_inputs.append(temp_set)
LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1 LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)
def build_model(inputs, output_size, neurons, activ_func="linear",dropout=0.10, loss="mae", optimizer="adam"):
model = Sequential()
model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
model.add(Dropout(dropout))
model.add(Dense(units=output_size))
model.add(Activation(activ_func))
model.compile(loss=loss, optimizer=optimizer)
return model
# initialise model architecture
nn_model = build_model(LSTM_training_inputs, output_size=1, neurons = 32)
# model output is next price normalised to 10th previous closing price
# train model on data
# note: eth_history contains information on the training error per epoch
nn_history = nn_model.fit(LSTM_training_inputs, LSTM_training_outputs, epochs=5, batch_size=1, verbose=2, shuffle=True)

plt.plot(LSTM_test_outputs, label = "actual")
plt.plot(nn_model.predict(LSTM_test_inputs), label = "predicted")
plt.legend()
plt.show()
MAE = mean_absolute_error(LSTM_test_outputs, nn_model.predict(LSTM_test_inputs))
print('The Mean Absolute Error is: {}'.format(MAE))

#https://github.com/llSourcell/How-to-Predict-Stock-Prices-Easily-Demo/blob/master/lstm.py
def predict_sequence_full(model, data, window_size):
#Shift the window by 1 new prediction each time, re-run predictions on new window
curr_frame = data[0]
predicted = []
for i in range(len(data)):
predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
return predicted predictions = predict_sequence_full(nn_model, LSTM_test_inputs, 10) plt.plot(LSTM_test_outputs, label="actual")
plt.plot(predictions, label="predicted")
plt.legend()
plt.show()
MAE = mean_absolute_error(LSTM_test_outputs, predictions)
print('The Mean Absolute Error is: {}'.format(MAE))

结论
LSTM不能解决时间序列预测问题。对一个时间步长的预测并不比滞后模型好多少。如果我们增加预测的时间步长,性能下降的速度就不会像其他更传统的方法那么快。然而,在这种情况下,我们的误差增加了大约4.5倍。它随着我们试图预测的时间步长呈超线性增长。

吴裕雄--天生自然 PYTHON数据分析:所有美国股票和etf的历史日价格和成交量分析的更多相关文章

  1. 吴裕雄--天生自然 PYTHON数据分析:糖尿病视网膜病变数据分析(完整版)

    # This Python 3 environment comes with many helpful analytics libraries installed # It is defined by ...

  2. 吴裕雄--天生自然 python数据分析:健康指标聚集分析(健康分析)

    # This Python 3 environment comes with many helpful analytics libraries installed # It is defined by ...

  3. 吴裕雄--天生自然 python数据分析:葡萄酒分析

    # import pandas import pandas as pd # creating a DataFrame pd.DataFrame({'Yes': [50, 31], 'No': [101 ...

  4. 吴裕雄--天生自然 PYTHON数据分析:人类发展报告——HDI, GDI,健康,全球人口数据数据分析

    import pandas as pd # Data analysis import numpy as np #Data analysis import seaborn as sns # Data v ...

  5. 吴裕雄--天生自然 python数据分析:医疗费数据分析

    import numpy as np import pandas as pd import os import matplotlib.pyplot as pl import seaborn as sn ...

  6. 吴裕雄--天生自然 PYTHON数据分析:基于Keras的CNN分析太空深处寻找系外行星数据

    #We import libraries for linear algebra, graphs, and evaluation of results import numpy as np import ...

  7. 吴裕雄--天生自然 python数据分析:基于Keras使用CNN神经网络处理手写数据集

    import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mp ...

  8. 吴裕雄--天生自然 PYTHON数据分析:钦奈水资源管理分析

    df = pd.read_csv("F:\\kaggleDataSet\\chennai-water\\chennai_reservoir_levels.csv") df[&quo ...

  9. 吴裕雄--天生自然 PYTHON数据分析:医疗数据分析

    import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.rea ...

随机推荐

  1. One Stage目标检测

    在计算机视觉中,目标检测是一个难题.在大型项目中,首先需要先进行目标检测,得到对应类别和坐标后,才进行之后的各种分析.如人脸识别,通常是首先人脸检测,得到人脸的目标框,再对此目标框进行人脸识别.如果该 ...

  2. java架构之路-(微服务专题)feign的基本使用和nacos的配置中心

    上次回归: 上次我们说了ribbon的基本使用,包括里面的内部算法,算法的细粒度配置,还有我们自己如何实现我们自己的算法,主要还是一些基本使用的知识,还不会使用ribbon的小伙伴可以回去看一下上一篇 ...

  3. [Effective Java 读书笔记] 第三章类和接口 第二十三-- ??条

    第二十三条 请不要再新代码中使用原生态类型 1 使用原生态类型,就失去了泛型在安全性和表述性方面的所有优势,所以新代码中不要使用原生态类型 2 List<String>可以传递给List作 ...

  4. Zookeeper 应用实例

    配置管理 程序总是需要配置的,如果程序分散部署在多台机器上,要逐个改变配置就变得困难.好吧,现在把这些配置全部放到zookeeper上去,保存在 Zookeeper 的某个目录节点中,然后所有相关应用 ...

  5. centos7安装bind(DNS服务)

    环境介绍 公网IP:149.129.92.239 内网IP:172.17.56.249 系统:CentOS 7.4 一.安装 yum install bind bind-utils -y 二.修改bi ...

  6. 再次小结windows服务的编写

    2013-03-23 21:05 (分类:计算机程序) 其实很简单 void mian() { //服务的分派表    SERVICE_TABLE_ENTRY DispatchTable[] ={   ...

  7. Linux系统下常见的数据盘分区丢失的问题以及对应的处理方法

    在修复数据前,您必须先对分区丢失的数据盘创建快照,在快照创建完成后再尝试修复.如果在修复过程中出现问题,您可以通过快照回滚将数据盘还原到修复之前的状态. 前提条件 在修复数据前,您必须先对分区丢失的数 ...

  8. vux-- Vue.js 移动端 UI 组件库

    1.使用 安装或更新: npm install vux --save npm install vux-loader --save 如果没有安装less: npm install less less-l ...

  9. [LeetCode] [链表] 相关题目总结

    刷完了LeetCode链表相关的经典题目,总结一下用到的技巧: 技巧 哑节点--哑节点可以将很多特殊case(比如:NULL或者单节点问题)转化为一般case进行统一处理,这样代码实现更加简洁,优雅 ...

  10. 基于Travis CI实现 Gitbook在 Github 和 Coding 的同步部署

    前言 最近发现自己的博客在使用vpn的情况下打开很慢,百度站点也抓取失败,于是将自己的博客借助hexo-deploy 插件很容易同步部署到了coding上.只需要在你的hexo配置文件_config. ...