我的代码-data pulling
# coding: utf-8
import datetime
import time
from sqlalchemy.engine import create_engine
from sqlalchemy.pool import NullPool
import pyodbc
import pandas as pd
import numpy as np
import shutil
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
conn = create_engine("mssql+pyodbc://sedaldr:s1e6daldr@MSEDA101\MSEDA/SSMC_IEDA_DEV2?driver=ODBC+Driver+11+for+SQL+Server",connect_args={'connect_timeout':15},poolclass=NullPool)
def main():
#examples for normalizer
def encode_eqpid(eqpid):
return int(eqpid[-2:])-1
def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1
def encode_wafer(wafer):
if wafer > 0:
return wafer-1
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#data=data.drop(columns=['lotid','defect_count','stage','Recipie_Name','finishtime'])
#data=data.drop(columns=['eqpid','Chamber','slotid'])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)
#pulling data
now = datetime.datetime.now()
print("1st time to pull the data",now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5df = now-datetime.timedelta(minutes=5)
now_5af = now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5df.year,now_5df.month,now_5df.day,now_5df.hour,now_5df.minute,now_5df.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
#put SQL query part as string
query_part1 = "SELECT DISTINCT b.eqpid, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +1,1) as Chamber, b.lotid,a.slotid, a.waferid, LEFT(b.ParameterName, CHARINDEX('-', b.ParameterName) -1) as Param_Name, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +3,+1) as Step, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +5, LEN( b.parametername)) as Recipie_Name, b.parametername, a.[data] as data1,a.finishtime,a.ooc,a.oos FROM [SSMC_RTM].[dbo].[rtm_tbl_massdata] a INNER JOIN [SSMC_RTM].[dbo].[rtm_tbl_datahist] b ON a.[datahist_fno]=b.[fno] WHERE b.parametername LIKE '%-4-%' AND b.eqpid LIKE 'AEM2%' AND a.finishtime BETWEEN"
query_part2 = "AND b.parametername NOT LIKE '%-$' AND( b.parametername LIKE 'ETCM_PHA4_A%' OR b.parametername LIKE 'ETCM_PHA4_B%' OR b.parametername LIKE 'ETCM_PHB4_A%' OR b.parametername LIKE 'ETCM_PHB4_B%' OR b.parametername LIKE 'ETCM_PHC4_A%' OR b.parametername LIKE 'ETCM_PHC4_B%' OR b.parametername LIKE 'HELK_MEAN_A%' OR b.parametername LIKE 'HELK_MEAN_B%' OR b.parametername LIKE 'LOWERCHM_PRESS_A%' OR b.parametername LIKE 'LOWERCHM_PRESS_B%' OR b.parametername LIKE 'PBK4_A%' OR b.parametername LIKE 'PBK4_B%' OR b.parametername LIKE 'RR23_MEAN_A%' OR b.parametername LIKE 'RR23_MEAN_B%' OR b.parametername LIKE 'RR23_MAX._A%' OR b.parametername LIKE 'RR23_MAX._B%' OR b.parametername LIKE 'RR13_MEAN_A%' OR b.parametername LIKE 'RR13_MEAN_B%' OR b.parametername LIKE 'RR13_MAX._A%' OR b.parametername LIKE 'RR13_MAX._B%' OR b.parametername LIKE 'THR3_MAX._A%' OR b.parametername LIKE 'THR3_MAX._B%' OR b.parametername LIKE 'THR3_MAX._DIFF_A%' OR b.parametername LIKE 'THR3_MAX._DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_A%' OR b.parametername LIKE 'THR3_MEAN_B%' OR b.parametername LIKE 'THR3_MEAN_DIFF_A%' OR b.parametername LIKE 'THR3_MEAN_DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_A%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_B%' )"
#sum 3 strings to get query string
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
query_sum
#run SQL query by python
data=pd.read_sql_query(query_sum,conn)
#save the dataset
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('1st time to pull the data successfully')
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
var=1
while var==1:
for i in range(999999):
print ('%d time try to pull the data today'%(i+2))
# Decide if the time equals to what we setting
correct_time=datetime.datetime.strptime(now_5af_string,'%m-%d %H:%M:%S')
mth = now_5af.month
day = now_5af.day
hour = now_5af.hour
mins = now_5af.minute
print('5 mins after last start:',day,hour,mins)
lastfi_time1 = datetime.datetime.strptime(last_fini_time1,'%m-%d %H:%M:%S')
delta= correct_time - lastfi_time1
print('delta=',delta)
if delta.days == 0:
x=1
while x==1:
now= datetime.datetime.now()
if now.hour == hour and now.minute == mins and now.day == day:
break
# if not equal, sleep 20 seconds then start again
time.sleep(20)
print ('%d time try to pull the data today,test again...'%(i+2),now)
#run SQL query by python
print ('%d time try to pull the data is running,5mins'%(i+2),now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5bf = now-datetime.timedelta(minutes=5)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5bf.year,now_5bf.month,now_5bf.day,now_5bf.hour,now_5bf.minute,now_5bf.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
data=pd.read_sql_query(query_sum,conn)
else:
#run SQL query by python
print('%d time to pull the data is running,>5mins'%(i+2),now)
query_time = ''' '%s' AND '20%s' '''%(now_string,last_fini_time)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
now= datetime.datetime.now()
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
data=pd.read_sql_query(query_sum,conn)
#save the dataset
fo = open("time.txt", "w")
fo.write('')
fo.close()
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('%d time to pull the data successfully'%(i+2))
i=+1
#time loop setting
#mins_timing = 5*i
#mins = mins+(mins_timing-((mins_timing//60)*60))
#hour = hour+(mins//60)
#if mins >59:
#mins = mins-60
#else :
#a=1
#if mth != now.month:
#mth = now.month
#day = now.day
#else:
#day = day +(hour//24)
#hour = hour%24
#year=now.year
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
main()
import os
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "w")
fo.write('')
fo.close()
#fo = open("time.txt","r+")
#last_fini_time = fo.read()
#last_fini_time1= last_fini_time[3:17]
#fo.close()
#os.remove(path=r"C:\Users\sgg91044\time.txt")
now= datetime.datetime.now()
now_5af=now+datetime.timedelta(minutes=5)
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
query_time = ''' '18-%s' AND '%s' '''%(now_5af_string,last_fini_time)
query_time
import threading
def pulldata()
print('start to pull data')
print('son thread runs successfully')
if __name__ == '__main__':
print('we are in main thread ')
print('pulling data part run in son thread')
sub_thread = threading.Thread(target=pulldata)
print('set up son thread and run')
sub_thread.setDaemon(True)
sub_thread.start()
print('let main thread wait son thread for 3s')
sub_thread.join(3)
print('main thread finished')
我的代码-data pulling的更多相关文章
- iview table 勾选当前行代码 data key _checked: true
给 data 项设置特殊 key _checked: true 可以默认选中当前项
- JS魔法堂:Data URI Scheme介绍
一.前言 上周五公司内部的Any Topic Conf.上我和同事们分享了这个主题,有同事说这个有用,有同事说这个没啥用,后来还延伸到网站性能的话题上,大家讨论的激烈程度让我觉得这次选题还不错.本篇先 ...
- BaseHttpListActivity,几行代码搞定Android Http列表请求、加载和缓存
Android开发中,向服务器请求一个列表并显示是非常常见的需求,但实现起来比较麻烦,代码繁杂. 随着应用的更新迭代,这种需求越来越多,我渐渐发现了实现这种需求的代码的共同点. 于是我将Activit ...
- data:image/png;base64
大家可能注意到了,网页上有些图片的src或css背景图片的url后面跟了一大串字符,比如:  ...
- 通过data:image/png;base64把图片直接写在src里
从网上下了个源文件查看时候发现了引用图片的地址不是在本地上的,而是后面跟了一大串字符data:image/png;base64...查了一下资料分析如下: 关于用base64存储图片 网页上有些图片的 ...
- data:image/png;base64是什么
大家可能注意到了,网页上有些图片的src或css背景图片的url后面跟了一大串字符,比如:  ...
- html image -- data:image/png;base64
1, data:image/png;base64 <!DOCTYPE HTML> <html> <head> <meta http-equiv=" ...
- 【转】浏览器中的data类型的Url格式,data:image/png,data:image/jpeg!
所谓"data"类型的Url格式,是在RFC2397中 提出的,目的对于一些"小"的数据,可以在网页中直接嵌入,而不是从外部文件载入.例如对于img这个Tag, ...
- 使用Visual Studio 2010写Data Url生成工具C#版本
声明:本文系本人按照真实经历原创.未经许可,谢绝转载. 此文百度经验版本号:怎样用Visual Studio 2010打造Data Url生成工具 源代码下载:用Visual Studio 2010编 ...
随机推荐
- Codeforces 939C - Convenient For Everybody
2018-03-03 http://codeforces.com/problemset/problem/939/C C. Convenient For Everybody time limit per ...
- Ingenious Lottery Tickets 【排序】
问题 I: Ingenious Lottery Tickets 时间限制: 1 Sec 内存限制: 128 MB 提交: 590 解决: 135 [提交] [状态] [命题人:admin] 题目描 ...
- faker 模块
faker是python的一个第三方模块,是一个github上的开源项目. 主要用来创建一些测试用的随机数据 文档:https://faker.readthedocs.io/en/master/ind ...
- mpvue构建小程序(步骤+地址)
mpvue 是一个使用 Vue.js 开发小程序的前端框架(美团的开源项目).框架基于 Vue.js 核心,mpvue 修改了 Vue.js 的 runtime 和 compiler 实现,使其可以运 ...
- Petrozavodsk Winter Camp, Day 8, 2014, Mosaic
给你三个数字a,b,c,让你用1-m的数字凑出来 结论:有2个1和2个2肯定凑不出来,然后就搜索 #include <bits/stdc++.h> using namespace std; ...
- Codeforces Round #542 [Alex Lopashev Thanks-Round] (Div. 1)C. Morse Code
题意:给你n个01字符,每次问你前缀的所有本质不同的子串,由摩斯密码组成的方案数和. 题解:离线处理,把字符建sam,通过topo序来dp计算每个节点表示的子串方案数的和.统计答案时,把n个字符挨个匹 ...
- Codeforces Beta Round #19C. Deletion of Repeats
题意:给一个数组,每次会删去连续重复两次的左侧部分及前面,有多个重复部分找长度最小和最靠左的部分,重复的数字最多10次 题解:根据重复数字只有10次,我们离散化后,以每两个相同数字作为起点能确定这重复 ...
- react-thunk的使用流程
react-thunk作用:使我们可以在action中返回函数,而不是只能返回一个对象.然后我们可以在函数中做很多事情,比如发送异步的ajax请求. 这就是react-thunk的使用方法.接受一个d ...
- nginx中try_files
location / { try_files $uri $uri/ /index.php?$query_string; } 当用户请求 http://localhost/example 时,这里的 $ ...
- SpringCloud调用服务原理