# -*- coding: utf-8 -*-
# __author__ = 'JieYap'
from biocluster.agent import Agent
from biocluster.tool import Tool
import os
import types
import subprocess
from biocluster.core.exceptions import OptionError class OtunetworkAgent(Agent):
"""
需要calc_otu_network.py
version 1.0
author: JieYao
last_modified:2016.8.1
""" def __init__(self, parent):
super(OtunetworkAgent, self).__init__(parent)
options = [
{"name": "otutable", "type": "infile", "format": "meta.otu.otu_table, meta.otu.tax_summary_dir"},
{"name": "level", "type": "string", "default": "otu"},
{"name": "envtable", "type": "infile", "format": "meta.otu.group_table"},
{"name": "envlabs", "type": "string", "default": ""}
]
self.add_option(options)
self.step.add_steps('OtunetworkAnalysis')
self.on('start', self.step_start)
self.on('end', self.step_end) def step_start(self):
self.step.OtunetworkAnalysis.start()
self.step.update() def step_end(self):
self.step.OtunetworkAnalysis.finish()
self.step.update() def gettable(self):
"""
根据输入的otu表和分类水平计算新的otu表
:return:
"""
if self.option('otutable').format == "meta.otu.tax_summary_dir":
return self.option('otutable').get_table(self.option('level'))
else:
return self.option('otutable').prop['path'] def check_options(self):
"""
重写参数检查
"""
if not self.option('otutable').is_set:
raise OptionError('必须提供otu表')
self.option('otutable').get_info()
if self.option('otutable').prop['sample_num'] < 2:
raise OptionError('otu表的样本数目少于2,不可进行网络分析')
if self.option('envtable').is_set:
self.option('envtable').get_info()
if self.option('envlabs'):
labs = self.option('envlabs').split(',')
for lab in labs:
if lab not in self.option('envtable').prop['group_scheme']:
raise OptionError('envlabs中有不在物种(环境因子)表中存在的因子:%s' % lab)
else:
pass
if len(self.option('envtable').prop['sample']) < 2:
raise OptionError('物种(环境因子)表的样本数目少于2,不可进行网络分析')
samplelist = open(self.gettable()).readline().strip().split('\t')[1:]
if self.option('envtable').is_set:
self.option('envtable').get_info()
if len(self.option('envtable').prop['sample']) > len(samplelist):
raise OptionError('OTU表中的样本数量:%s少于物种(环境因子)表中的样本数量:%s' % (len(samplelist),
len(self.option('envtable').prop['sample'])))
for sample in self.option('envtable').prop['sample']:
if sample not in samplelist:
raise OptionError('物种(环境因子)表的样本中存在OTU表中未知的样本%s' % sample)
table = open(self.gettable())
if len(table.readlines()) < 4 :
raise OptionError('数据表信息少于3行')
table.close()
return True def set_resource(self):
"""
设置所需资源
"""
self._cpu = 2
self._memory = '' def end(self):
result_dir = self.add_upload_dir(self.output_dir)
result_dir.add_relpath_rules([
[".", "", "OTU网络分析结果输出目录"],
["./real_node_table.txt", "txt", "OTU网络节点属性表"],
["./real_edge_table.txt", "txt", "OTU网络边集属性表"],
["./real_dc_otu_degree.txt", "txt", "OTU网络OTU节点度分布表"],
["./real_dc_sample_degree.txt", "txt", "OTU网络sample节点度分布表"],
["./real_dc_sample_otu_degree.txt", "txt", "OTU网络节点度分布表"],
["./network_centrality.txt", "txt", "OTU网络中心系数表"],
["./network_attributes.txt", "txt", "OTU网络单值属性表"],
])
print self.get_upload_files()
super(OtunetworkAgent, self).end() class OtunetworkTool(Tool):
def __init__(self, config):
super(OtunetworkTool, self).__init__(config)
self._version = "1.0.1"
self.cmd_path = self.config.SOFTWARE_DIR + '/bioinfo/meta/scripts/calc_otu_network.py'
self.env_table = self.get_new_env()
self.otu_table = self.get_otu_table()
self.out_files = ['real_node_table.txt', 'real_edge_table.txt', 'real_dc_otu_degree.txt', 'real_dc_sample_degree.txt', 'real_dc_sample_otu_degree.txt', 'network_centrality.txt', 'network_attributes.txt'] def get_otu_table(self):
"""
根据调用的level参数重构otu表
:return:
"""
if self.option('otutable').format == "meta.otu.tax_summary_dir":
otu_path = self.option('otutable').get_table(self.option('level'))
else:
otu_path = self.option('otutable').prop['path']
if self.option('envtable').is_set:
return self.filter_otu_sample(otu_path, self.option('envtable').prop['sample'],
os.path.join(self.work_dir, 'temp_filter.otutable'))
else:
return otu_path def filter_otu_sample(self, otu_path, filter_samples, newfile):
if not isinstance(filter_samples, types.ListType):
raise Exception('过滤otu表样本的样本名称应为列表')
try:
with open(otu_path, 'rb') as f, open(newfile, 'wb') as w:
one_line = f.readline()
all_samples = one_line.rstrip().split('\t')[1:]
if not ((set(all_samples) & set(filter_samples)) == set(filter_samples)):
raise Exception('提供的过滤样本集合中存在otu表中不存在的样本all:%s,filter_samples:%s' % (all_samples, filter_samples))
if len(all_samples) == len(filter_samples):
return otu_path
samples_index = [all_samples.index(i) + 1 for i in filter_samples]
w.write('OTU\t' + '\t'.join(filter_samples) + '\n')
for line in f:
all_values = line.rstrip().split('\t')
new_values = [all_values[0]] + [all_values[i] for i in samples_index]
w.write('\t'.join(new_values) + '\n')
return newfile
except IOError:
raise Exception('无法打开OTU相关文件或者文件不存在') def get_new_env(self):
"""
根据envlabs生成新的envtable
"""
if self.option('envlabs'):
new_path = self.work_dir + '/temp_env_table.xls'
self.option('envtable').sub_group(new_path, self.option('envlabs').split(','))
return new_path
else:
return self.option('envtable').path def run(self):
"""
运行
"""
super(OtunetworkTool, self).run()
self.run_otu_network_py() def formattable(self, tablepath):
alllines = open(tablepath).readlines()
if alllines[0][0] == '#':
newtable = open(os.path.join(self.work_dir, 'temp_format.table'), 'w')
newtable.write(alllines[0].lstrip('#'))
newtable.writelines(alllines[1:])
newtable.close()
return os.path.join(self.work_dir, 'temp_format.table')
else:
return tablepath def run_otu_network_py(self):
"""
运行calc_otu_network.py
"""
real_otu_path = self.formattable(self.otu_table)
cmd = self.config.SOFTWARE_DIR + '/program/Python/bin/python '
cmd += self.cmd_path
cmd += ' -i %s -o %s' % (real_otu_path, self.work_dir + '/otu_network')
if self.option('envtable').is_set:
cmd += ' -m %s' % (self.env_table)
self.logger.info('开始运行calc_otu_network生成OTU网络并进行计算') try:
subprocess.check_output(cmd, shell=True)
self.logger.info('OTU_Network计算完成')
except subprocess.CalledProcessError:
self.logger.info('OTU_Network计算失败')
self.set_error('运行calc_otu_network.py失败')
allfiles = self.get_filesname()
for i in range(len(self.out_files)):
self.linkfile(allfiles[i], self.out_files[i])
self.end() def linkfile(self, oldfile, newname):
"""
link文件到output文件夹
:param oldfile: 资源文件路径
:param newname: 新的文件名
:return:
"""
newpath = os.path.join(self.output_dir, newname)
if os.path.exists(newpath):
os.remove(newpath)
os.link(oldfile, newpath) def get_filesname(self):
files_status = [None, None, None, None, None, None, None]
for paths,d,filelist in os.walk(self.work_dir + '/otu_network'):
for filename in filelist:
name = os.path.join(paths, filename)
for i in range(len(self.out_files)):
if self.out_files[i] in name:
files_status[i] = name
for i in range(len(self.out_files)):
if not files_status[i]:
self.set_error('未知原因,结果文件生成出错或丢失')
return files_status
 # -*- coding: utf-8 -*-
# __author__ = 'JieYao'
import os
import argparse
from biocluster.config import Config
import shutil
import networkx def make_env_table(inFile, outFile):
with open(inFile, "r") as tmp_file:
samples_name = tmp_file.readline().rstrip().split('\t')
with open('group.txt' , "w") as tmp_file:
tmp_file.write("#sample\tgroup\n")
for i in range(1,len(samples_name)):
tmp_file.write(samples_name[i]+"\tSTD\n")
return './group.txt' parser = argparse.ArgumentParser(description='输入OTU表格,生成OTU网络信息')
parser.add_argument('-i', "--otu_matrix", help="输入的OTU表", required = True)
parser.add_argument('-o', "--output", help="输出文件位置", required = True)
parser.add_argument('-m', "--env_table", help="样本分类表", required = False)
args = vars(parser.parse_args()) flag = False
inFile = args["otu_matrix"]
outFile = args["output"]
if not args["env_table"]:
env_table = make_env_table(inFile, outFile)
flag = True
else:
env_table = args["env_table"]
if os.path.exists(outFile):
shutil.rmtree(outFile) """
执行make_otu_network.py 计算otu网络的相关信息并生成文件
完成后由于make_otu_network.py生成的是一个文件夹,使用os和shutil的命令将文件全部移动到输出路径下
"""
command = Config().SOFTWARE_DIR + '/program/Python/bin/python '
command += Config().SOFTWARE_DIR + '/program/Python/bin/make_otu_network.py'
command += ' -i %s -o %s -m %s' %(inFile, outFile, env_table)
os.system(command)
if flag:
os.remove("./group.txt")
for paths,d,filelist in os.walk(outFile):
for filename in filelist:
name = os.path.join(paths, filename)
if "reduced" in name:
os.remove(name)
elif "/otu_network/" in name:
shutil.move(name, outFile)
shutil.rmtree(outFile + '/otu_network')
for paths,d,filelist in os.walk(outFile):
for filename in filelist:
name = os.path.join(paths, filename)
if "props" in name:
os.remove(name) """
根据node表建立{节点名字---节点编号}的字典
"""
node_name = [""]
node_dictionary = {}
with open(outFile + '/real_node_table.txt', "r") as node_file:
informations = node_file.readlines()
for i in range(1, len(informations)):
tmp = informations[i].rstrip().split("\t")
node_dictionary[tmp[0]] = i
node_name += [tmp[0]]
"""
开始使用Networkx包建立图
计算OTU网络的属性信息
"""
G = networkx.Graph()
with open(outFile + "/real_edge_table.txt" , "r") as edge_file:
informations = edge_file.readlines()
for i in range(1, len(informations)):
tmp = informations[i].rstrip().split("\t")
G.add_edge(node_dictionary[tmp[0]], node_dictionary[tmp[1]], weight = eval(tmp[2])) """
用实践测试单独对Sample或者是OTU构图的构图方法,
结果证明这样的构图出来的结果基本上Sample是完全图,
OTU单独构图的意义则不大,所以这种想法……失败了。
"""
"""
H = networkx.Graph()
with open(outFile + "/real_node_table.txt" , "r") as node_file:
informations = node_file.readlines()
for i in range(1,len(informations)):
tmp = informations[i].rstrip().split("\t")
if tmp[2] == "otu_node":
break
position = i
for i in range(position):
for j in range(position):
H.add_edge(i,j,weight=0)
for k in range(position,len(G)):
if G.get_edge_data(i,k) and G.get_edge_data(j,k):
H.edge[i][j]['weight'] += min(G.edge[i][k]['weight']+G.edge[j][k]['weight'])
if H.edge[i][j]['weight'] == 0:
H.remove_edge(i,j) minx = 32767
for i in range(position):
for j in range(position):
if (i in H)and(j in H)and(H.get_edge_data(i,j)):
minx = min(minx, H.edge[i][j]['weight']) for i in range(position):
for j in range(position):
if (i in H)and(j in H)and(H.get_edge_data(i,j)):
H.edge[i][j]['weight'] -= minx
if H.edge[i][j]['weight'] <=0:
H.remove_edge(i,j)
print H.edges() H = networkx.Graph()
with open(outFile + "/real_node_table.txt" , "r") as node_file:
informations = node_file.readlines()
for i in range(1,len(informations)):
tmp = informations[i].rstrip().split("\t")
if tmp[2] == "otu_node":
break
position = i
for i in range(position,len(G)):
for j in range(position,len(G)):
H.add_edge(i,j,weight=0)
for k in range(position):
if G.get_edge_data(i,k) and G.get_edge_data(j,k):
H.edge[i][j]['weight'] += 1
if H.edge[i][j]['weight'] == 0:
H.remove_edge(i,j)
print len(H)
print len(H.edges())
print H.edges() minx = 32767
for i in range(position,len(G)):
for j in range(position,len(G)):
if (i in H)and(j in H)and(H.get_edge_data(i,j)):
minx = min(minx, H.edge[i][j]['weight']) for i in range(position):
for j in range(position):
if (i in H)and(j in H)and(H.get_edge_data(i,j)):
H.edge[i][j]['weight'] -= minx
if H.edge[i][j]['weight'] <=0:
H.remove_edge(i,j)
""" """3计算属性表,分本3""" #节点度中心系数,表示节点在图中的重要性
Degree_Centrality = networkx.degree_centrality(G)
#节点距离中心系数,值越大表示到其他节点距离越近,中心性越高
Closeness_Centrality = networkx.closeness_centrality(G)
#节点介数中心系数,值越大表示通过该节点的最短路径越多,中心性越高
Betweenness_Centrality = networkx.betweenness_centrality(G)
with open(os.path.join(args["output"], "network_centrality.txt"), "w") as tmp_file:
tmp_file.write("Node_ID\tNode_Name\tDegree_Centrality\t")
tmp_file.write("Closeness_Centrality\tBetweenness_Centrality\n")
for i in range(1, len(G)+1):
tmp_file.write(str(i)+"\t"+node_name[i]+"\t")
tmp_file.write(str(Degree_Centrality[i])+"\t")
tmp_file.write(str(Closeness_Centrality[i])+"\t")
tmp_file.write(str(Betweenness_Centrality[i])+"\n") #网络传递性,二分图中应该为0,否则有问题
Transitivity = networkx.transitivity(G)
#网络直径
Diameter = networkx.diameter(G)
#网络平均最短路长度
Average_shortest_path = networkx.average_shortest_path_length(G)
with open(os.path.join(args["output"], "network_attributes.txt"), "w") as tmp_file:
tmp_file.write("Transitivity:"+str(Transitivity)+"\n")
tmp_file.write("Diameter:"+str(Diameter)+"\n")
tmp_file.write("Average_shortest_path_length:"+str(Average_shortest_path)+"\n")

OTU_Network&calc_otu的更多相关文章

随机推荐

  1. [BZOJ 1562] 变换序列

    Link: BZOJ 1562 传送门 Solution: 一道比较考对$Hungry$算法理解的题目 首先可以轻松看出原序列和答案序列的对应关系,从而建出二分图匹配模型 下面的关键在于如何保证字典序 ...

  2. 4425: [Nwerc2015]Assigning Workstations分配工作站

    4425: [Nwerc2015]Assigning Workstations分配工作站 Description Penelope is part of the admin team of the n ...

  3. keytool工具生成自签名证书并且通过浏览器导入证书

    1.生成服务器证书库 keytool -genkey -alias tomcat -keypass changeit -keyalg RSA -keysize 1024 -validity 365 - ...

  4. iOS中深拷贝、浅拷贝和retain的区别

    浅拷贝:浅拷贝是对object对象的指针拷贝,让指针指向同一块内存地址,“对象永远只有一个",浅拷贝使对象的引用计数器+1.代码如下: 可以看出不可变字符串的指针指向了同一地址,并没有重新开 ...

  5. KVM工具libvirt、virsh、virt-manager的简单介绍

    KVM虚拟化中libvirt是目前使用最为广泛的对KVM虚拟机进行管理的工具和应用程序接口,而且一些常用的虚拟机管理工具(virsh.virt-install.virt-manager等)和云计算框架 ...

  6. 新浪微博宋琦:PHP在微博优化中的“大显身手”

    摘要:2013中国软件开发者大会编程语言与工具专题论坛中,新浪微博架构师宋琦介绍了PHP在新浪微博中的应用,并且分享了很多微博主站所做的性能优化的工作. [CSDN报道] 2013中国软件开发者大会( ...

  7. MathType中带上下标字符不对其

    如图,上面的好看,下面的就不好看的. 上面的图使用下图下面的形式,下面的图是用的是上面的形式. 如图可以看出,右侧的更好. 比如UiTVj这样的,需要分别都用下面的形式,不能UiT用上面的,Vj直接输 ...

  8. 明尼苏达推荐系统导论(第一课 欢迎来到RS)

    一.RS介绍 1.显示评分:直接从用户来 隐式评分:从用户活动推测得到的 2.预测是偏好的估计,是预测缺失值,推荐是从其他用户推荐项目,是推荐感兴趣的项目. 3.协同表示利用其它用户的数据 二.欢迎来 ...

  9. 探索 vuex 2.0 以及使用 vuejs 2.0 + vuex 2.0 构建记事本应用23

    前言 首先说明这并不是一个教程贴,而记事本应用是网上早有的案例,对于学习 vuex 非常有帮助.我的目的是探索 vuex 2.0 ,然后使用 vuejs 2.0 + vuex 2.0 重写这个应用,其 ...

  10. 莫名其妙的float:left; 不能使元素紧贴父级的坑

    这是项目中遇到的一个CSS的坑,做个记录,主要的原因还是浮动后脱离文档流,两个浮动的元素处于同一文档流中会相互影响位置的问题: 先上代码吧: 效果预览地址:浮动不能靠左的情况; 原本红色模块应该处于蓝 ...