# coding: utf-8

import datetime
import time
from sqlalchemy.engine import create_engine
from sqlalchemy.pool import NullPool
import pyodbc
import pandas as pd
import numpy as np
import shutil
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer

conn = create_engine("mssql+pyodbc://sedaldr:s1e6daldr@MSEDA101\MSEDA/SSMC_IEDA_DEV2?driver=ODBC+Driver+11+for+SQL+Server",connect_args={'connect_timeout':15},poolclass=NullPool)

def main():
#examples for normalizer
def encode_eqpid(eqpid):
return int(eqpid[-2:])-1

def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1

def encode_wafer(wafer):
if wafer > 0:
return wafer-1
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#data=data.drop(columns=['lotid','defect_count','stage','Recipie_Name','finishtime'])
#data=data.drop(columns=['eqpid','Chamber','slotid'])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)
#pulling data
now = datetime.datetime.now()
print("1st time to pull the data",now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5df = now-datetime.timedelta(minutes=5)
now_5af = now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5df.year,now_5df.month,now_5df.day,now_5df.hour,now_5df.minute,now_5df.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
#put SQL query part as string
query_part1 = "SELECT DISTINCT b.eqpid, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +1,1) as Chamber, b.lotid,a.slotid, a.waferid, LEFT(b.ParameterName, CHARINDEX('-', b.ParameterName) -1) as Param_Name, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +3,+1) as Step, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +5, LEN( b.parametername)) as Recipie_Name, b.parametername, a.[data] as data1,a.finishtime,a.ooc,a.oos FROM [SSMC_RTM].[dbo].[rtm_tbl_massdata] a INNER JOIN [SSMC_RTM].[dbo].[rtm_tbl_datahist] b ON a.[datahist_fno]=b.[fno] WHERE b.parametername LIKE '%-4-%' AND b.eqpid LIKE 'AEM2%' AND a.finishtime BETWEEN"
query_part2 = "AND b.parametername NOT LIKE '%-$' AND( b.parametername LIKE 'ETCM_PHA4_A%' OR b.parametername LIKE 'ETCM_PHA4_B%' OR b.parametername LIKE 'ETCM_PHB4_A%' OR b.parametername LIKE 'ETCM_PHB4_B%' OR b.parametername LIKE 'ETCM_PHC4_A%' OR b.parametername LIKE 'ETCM_PHC4_B%' OR b.parametername LIKE 'HELK_MEAN_A%' OR b.parametername LIKE 'HELK_MEAN_B%' OR b.parametername LIKE 'LOWERCHM_PRESS_A%' OR b.parametername LIKE 'LOWERCHM_PRESS_B%' OR b.parametername LIKE 'PBK4_A%' OR b.parametername LIKE 'PBK4_B%' OR b.parametername LIKE 'RR23_MEAN_A%' OR b.parametername LIKE 'RR23_MEAN_B%' OR b.parametername LIKE 'RR23_MAX._A%' OR b.parametername LIKE 'RR23_MAX._B%' OR b.parametername LIKE 'RR13_MEAN_A%' OR b.parametername LIKE 'RR13_MEAN_B%' OR b.parametername LIKE 'RR13_MAX._A%' OR b.parametername LIKE 'RR13_MAX._B%' OR b.parametername LIKE 'THR3_MAX._A%' OR b.parametername LIKE 'THR3_MAX._B%' OR b.parametername LIKE 'THR3_MAX._DIFF_A%' OR b.parametername LIKE 'THR3_MAX._DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_A%' OR b.parametername LIKE 'THR3_MEAN_B%' OR b.parametername LIKE 'THR3_MEAN_DIFF_A%' OR b.parametername LIKE 'THR3_MEAN_DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_A%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_B%' )"
#sum 3 strings to get query string
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
query_sum
#run SQL query by python
data=pd.read_sql_query(query_sum,conn)
#save the dataset
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('1st time to pull the data successfully')
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
var=1
while var==1:
for i in range(999999):
print ('%d time try to pull the data today'%(i+2))
# Decide if the time equals to what we setting
correct_time=datetime.datetime.strptime(now_5af_string,'%m-%d %H:%M:%S')
mth = now_5af.month
day = now_5af.day
hour = now_5af.hour
mins = now_5af.minute
print('5 mins after last start:',day,hour,mins)
lastfi_time1 = datetime.datetime.strptime(last_fini_time1,'%m-%d %H:%M:%S')
delta= correct_time - lastfi_time1
print('delta=',delta)
if delta.days == 0:
x=1
while x==1:
now= datetime.datetime.now()
if now.hour == hour and now.minute == mins and now.day == day:
break
# if not equal, sleep 20 seconds then start again
time.sleep(20)
print ('%d time try to pull the data today,test again...'%(i+2),now)
#run SQL query by python
print ('%d time try to pull the data is running,5mins'%(i+2),now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5bf = now-datetime.timedelta(minutes=5)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5bf.year,now_5bf.month,now_5bf.day,now_5bf.hour,now_5bf.minute,now_5bf.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
data=pd.read_sql_query(query_sum,conn)
else:
#run SQL query by python
print('%d time to pull the data is running,>5mins'%(i+2),now)
query_time = ''' '%s' AND '20%s' '''%(now_string,last_fini_time)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
now= datetime.datetime.now()
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
data=pd.read_sql_query(query_sum,conn)

#save the dataset
fo = open("time.txt", "w")
fo.write('')
fo.close()
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('%d time to pull the data successfully'%(i+2))
i=+1
#time loop setting
#mins_timing = 5*i
#mins = mins+(mins_timing-((mins_timing//60)*60))
#hour = hour+(mins//60)
#if mins >59:
#mins = mins-60
#else :
#a=1
#if mth != now.month:
#mth = now.month
#day = now.day
#else:
#day = day +(hour//24)
#hour = hour%24
#year=now.year
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]

main()

import os
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "w")
fo.write('')
fo.close()
#fo = open("time.txt","r+")
#last_fini_time = fo.read()
#last_fini_time1= last_fini_time[3:17]
#fo.close()
#os.remove(path=r"C:\Users\sgg91044\time.txt")

now= datetime.datetime.now()
now_5af=now+datetime.timedelta(minutes=5)
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]

query_time = ''' '18-%s' AND '%s' '''%(now_5af_string,last_fini_time)
query_time

import threading
def pulldata()
print('start to pull data')
print('son thread runs successfully')
if __name__ == '__main__':
print('we are in main thread ')
print('pulling data part run in son thread')
sub_thread = threading.Thread(target=pulldata)
print('set up son thread and run')
sub_thread.setDaemon(True)
sub_thread.start()
print('let main thread wait son thread for 3s')
sub_thread.join(3)
print('main thread finished')

我的代码-data pulling的更多相关文章

  1. iview table 勾选当前行代码 data key _checked: true

    给 data 项设置特殊 key _checked: true 可以默认选中当前项

  2. JS魔法堂:Data URI Scheme介绍

    一.前言 上周五公司内部的Any Topic Conf.上我和同事们分享了这个主题,有同事说这个有用,有同事说这个没啥用,后来还延伸到网站性能的话题上,大家讨论的激烈程度让我觉得这次选题还不错.本篇先 ...

  3. BaseHttpListActivity,几行代码搞定Android Http列表请求、加载和缓存

    Android开发中,向服务器请求一个列表并显示是非常常见的需求,但实现起来比较麻烦,代码繁杂. 随着应用的更新迭代,这种需求越来越多,我渐渐发现了实现这种需求的代码的共同点. 于是我将Activit ...

  4. data:image/png;base64

    大家可能注意到了,网页上有些图片的src或css背景图片的url后面跟了一大串字符,比如:  ...

  5. 通过data:image/png;base64把图片直接写在src里

    从网上下了个源文件查看时候发现了引用图片的地址不是在本地上的,而是后面跟了一大串字符data:image/png;base64...查了一下资料分析如下: 关于用base64存储图片 网页上有些图片的 ...

  6. data:image/png;base64是什么

    大家可能注意到了,网页上有些图片的src或css背景图片的url后面跟了一大串字符,比如:  ...

  7. html image -- data:image/png;base64

    1,  data:image/png;base64 <!DOCTYPE HTML> <html> <head> <meta http-equiv=" ...

  8. 【转】浏览器中的data类型的Url格式,data:image/png,data:image/jpeg!

    所谓"data"类型的Url格式,是在RFC2397中 提出的,目的对于一些"小"的数据,可以在网页中直接嵌入,而不是从外部文件载入.例如对于img这个Tag, ...

  9. 使用Visual Studio 2010写Data Url生成工具C#版本

    声明:本文系本人按照真实经历原创.未经许可,谢绝转载. 此文百度经验版本号:怎样用Visual Studio 2010打造Data Url生成工具 源代码下载:用Visual Studio 2010编 ...

随机推荐

  1. newcoder 小A的柱状图(单调栈)题解

    题目描述 柱状图是有一些宽度相等的矩形下端对齐以后横向排列的图形,但是小A的柱状图却不是一个规范的柱状图,它的每个矩形下端的宽度可以是不相同的一些整数,分别为a[i] 每个矩形的高度是h[i] ,现在 ...

  2. UVA1401 Remember the Word

    思路 用trie树优化dp 设f[i]表示到第i个的方案数,则有\(f[i]=\sum_{x}f[i+len[x]]\)(x是s[i,n]的一个前缀),所以需要快速找出所有前缀,用Trie树即可 代码 ...

  3. 【Entity Framework】Revert the database to specified migration.

    本文涉及的相关问题,如果你的问题或需求有与下面所述相似之处,请阅读本文 [Entity Framework] Revert the database to specified migration. [ ...

  4. 由 UWP 版网易云音乐闪退引发的博文

    今天,不知怎么的.网易云音乐出现了一打开就闪退的情况.百度了好些时候未果,就直接 Windows + i 打开 Windows 设置 > 应用 在应用和功能列表中找到网易云音乐,在展开的 高级选 ...

  5. Yarn 踩坑 : Could not find or load main class org.apache.hadoop.mapreduce.v2.app.MRAppMaster

    原因:yarn-site.xml 中,yarn.application.classpath 未配置 解决:其中 hadoop 版本对应更改 <property> <name>y ...

  6. hdu 1895 Sum Zero hash

    Sum Zero Time Limit: 10000/5000 MS (Java/Others)    Memory Limit: 65535/32768 K (Java/Others) Proble ...

  7. 第一个Python窗体程序

    新建ui_MainWindow.py文件 # encoding: UTF-8 # Qt相关和十字光标 from PyQt4.QtGui import * from PyQt4.QtCore impor ...

  8. linux软件管理之源码包管理

    源码包管理tarball ====================================================================================tar ...

  9. javascript中的词法分析

    词法分析 JavaScript中在调用函数的那一瞬间,会先进行词法分析. 词法分析的过程: 当函数调用的前一瞬间,会先形成一个激活对象:Avtive Object(AO),并会分析以下3个方面: 1: ...

  10. Jboss解决只能通过localhost访问而不能使用IP访问项目的问题

    之前项目都是前后端完全分离,很少使用到后端语言的开发工具,最近使用intellij+Jboss进行项目部署开发,初始用发现项目启动后只能使用localhost进行项目访问,IP地址访问则提示页面404 ...