<html> <head> <script type="text/javascript" language="javascript"> var idTmr; function method1(tableid) {//整个表格拷贝到EXCEL中 var curTbl = document.getElementById(tableid); var oXL = new ActiveXObject("Excel.Applicat
# -*- coding: utf-8 -*- import urllib2 import re import time import jieba url="http://www.baidu.com" html=urllib2.urlopen(url).read() html=unicode(html,'utf-8') word=re.findall(ur"[\u4e00-\u9fa5]+",html) s="" for w in word: s
function getInfo(html){ //去掉注释 html=html.replace(/<!--.+?-->/g,"") var arrP=[] var reg=/<(p|h1|h2|h3|h4|h5|pre|blockquote|table)( +[^>]*>|>)[\d\D]*?<\/\1>/gi html.replace(reg,function(m){ if(/^<p/.test(m)&&/<