多了不说,直接贴出相关部分的实现代码
加密部分实现:
package token.exe; import java.math.BigInteger;
import java.util.Random; import org.apache.commons.codec.binary.Base64; public class WeiboEncoder { private static BigInteger n = null;
private static BigInteger e = null; /**
* 使用Base64加密用户名(su的获取)
* @param account
* @return
*/
@SuppressWarnings("deprecation")
public static String encodeAccount(String account){
return new String(Base64.encodeBase64(account.getBytes()));
} /**
* 使用RSAEncrypt对用户密码进行加密(sp的获取)
* @param pwd
* @param nStr
* @param eStr
* @return
*/
public static String RSAEncrypt(String pwd, String nStr, String eStr){
n = new BigInteger(nStr,16);
e = new BigInteger(eStr,16); BigInteger r = RSADoPublic(pkcs1pad2(pwd,(n.bitLength()+7)>>3));
String sp = r.toString(16);
if((sp.length()&1) != 0 )
sp = "0" + sp;
return sp;
} private static BigInteger RSADoPublic(BigInteger x){
return x.modPow(e, n);
} private static BigInteger pkcs1pad2(String s, int n){
if(n < s.length() + 11) { // TODO: fix for utf-8
System.err.println("Message too long for RSA");
return null;
}
byte[] ba = new byte[n];
int i = s.length()-1;
while(i >= 0 && n > 0) {
int c = s.codePointAt(i--);
if(c < 128) { // encode using utf-8
ba[--n] = new Byte(String.valueOf(c));
}
else if((c > 127) && (c < 2048)) {
ba[--n] = new Byte(String.valueOf((c & 63) | 128));
ba[--n] = new Byte(String.valueOf((c >> 6) | 192));
}
else {
ba[--n] = new Byte(String.valueOf((c & 63) | 128));
ba[--n] = new Byte(String.valueOf(((c >> 6) & 63) | 128));
ba[--n] = new Byte(String.valueOf((c >> 12) | 224));
}
}
ba[--n] = new Byte("0"); byte[] temp = new byte[1];
Random rdm = new Random(47L); while(n > 2) { // random non-zero pad
temp[0] = new Byte("0");
while(temp[0] == 0)
rdm.nextBytes(temp);
ba[--n] = temp[0];
}
ba[--n] = 2;
ba[--n] = 0; return new BigInteger(ba);
} } 参数实体:
package token.def; import java.io.Serializable; public class LoginParams implements Serializable { private static final long serialVersionUID = -5775728968372860382L;
private String pcid;
private String servertime;
private String nonce;
private String rsakv;
private String imgUrl;
private String sp;
private String code;
private boolean isLogin = true; public String getPcid() {
return pcid;
} public void setPcid(String pcid) {
this.pcid = pcid;
} public String getServertime() {
return servertime;
} public void setServertime(String servertime) {
this.servertime = servertime;
} public String getNonce() {
return nonce;
}
public void setNonce(String nonce) {
this.nonce = nonce;
} public String getRsakv() {
return rsakv;
} public void setRsakv(String rsakv) {
this.rsakv = rsakv;
} public String getImgUrl() {
return imgUrl;
} public void setImgUrl(String imgUrl) {
this.imgUrl = imgUrl;
} public String getSp() {
return sp;
} public void setSp(String sp) {
this.sp = sp;
} public String getCode() {
return code;
} public void setCode(String code) {
this.code = code;
} public boolean isLogin() {
return isLogin;
} public void setLogin(boolean isLogin) {
this.isLogin = isLogin;
} @Override
public String toString() {
return "LoginParams [pcid=" + pcid + ", servertime=" + servertime
+ ", nonce=" + nonce + ", rsakv=" + rsakv + ", imgUrl="
+ imgUrl + ", sp=" + sp + ", code=" + code + ", isLogin="
+ isLogin + "]";
} } 登陆部分实现:
package token.exe; import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.Scanner; import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.HttpVersion;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.apache.commons.httpclient.protocol.Protocol;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import token.SinaWeiboOAuth;
import token.def.LoginParams;
import weibo4j.model.MySSLSocketFactory; public class WeiboLoginer { private HttpClient httpClient; //httpClient实例初始化 public WeiboLoginer() { //httpclient连接配置
MultiThreadedHttpConnectionManager httpManager = new MultiThreadedHttpConnectionManager();
HttpConnectionManagerParams connectParams = httpManager.getParams();
connectParams.setConnectionTimeout(3000);
connectParams.setDefaultMaxConnectionsPerHost(100);
connectParams.setSoTimeout(3000);
//httpclient参数配置
HttpClientParams httpParams = new HttpClientParams();
httpParams.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
httpParams.setVersion(HttpVersion.HTTP_1_1);
//设置默认Header
List<Header> headers = new ArrayList<Header>();
headers.add(new Header("Content-Type", "application/x-www-form-urlencoded"));
headers.add(new Header("Host", "login.sina.com.cn"));
headers.add(new Header("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"));
headers.add(new Header("API-RemoteIP", "192.168.0.1"));//伪造新浪验证IP
headers.add(new Header("X-Forwarded-For","192.168.0.1"));//伪造真实IP
headers.add(new Header("CLIENT-IP", "192.168.0.1"));//伪造客户端IP
//初始化httpclient
httpClient = new HttpClient(httpParams, httpManager);
httpClient.getHostConfiguration().getParams().setParameter("http.default-headers", headers);
//设置ssl协议
Protocol protocol = new Protocol("https",new MySSLSocketFactory(), 443);
Protocol.registerProtocol("https", protocol);
//设置代理
// httpClient.getHostConfiguration().setProxy("", 0);
// httpClient.getParams().setAuthenticationPreemptive(false);
} /**
* 登陆并获取code值,如果出现验证码则返回还有验证码的参数信息
* @return
*/
public LoginParams doLogin(String username, String password) { Properties properties = initProperties();
String base64UserCount = WeiboEncoder.encodeAccount(username);
HashMap<String, String> pubkeyMap = null;
String sp = null;
String imgUrl = null;
LoginParams loginParams = new LoginParams();
try {
pubkeyMap = pubKeyMap(base64UserCount);
sp = WeiboEncoder.RSAEncrypt(password, pubkeyMap.get("pubkey"),"10001");
imgUrl = getPin(pubkeyMap);
if (imgUrl != null) {
loginParams.setPcid(pubkeyMap.get("pcid"));
loginParams.setNonce(pubkeyMap.get("nonce"));
loginParams.setServertime(pubkeyMap.get("servertime"));
loginParams.setRsakv(pubkeyMap.get("rsakv"));
loginParams.setImgUrl(imgUrl);
loginParams.setSp(sp);
return loginParams;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} HashMap<String, String> ticketMap = null;
try {
ticketMap = getTicket(base64UserCount, sp, pubkeyMap);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} //确认在最终登陆后是否再需要验证码(账号为新浪的注册邮箱)
String vcUrl = isHasPinAgain(pubkeyMap, ticketMap);
if (vcUrl != null) {
loginParams.setPcid(pubkeyMap.get("pcid"));
loginParams.setNonce(pubkeyMap.get("nonce"));
loginParams.setServertime(pubkeyMap.get("servertime"));
loginParams.setRsakv(pubkeyMap.get("rsakv"));
loginParams.setImgUrl(imgUrl);
loginParams.setSp(sp);
return loginParams;
} try {
String code = authorize(ticketMap.get("ticket"), properties.getProperty("authorizeURL"),
properties.getProperty("redirect_URI"), properties.getProperty("client_ID"),
username, ticketMap.get("uid")); loginParams.setCode(code);
} catch (KeyManagementException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NoSuchAlgorithmException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return loginParams; } /**
* 有验证码时登陆
* @param sp
* @param pin
* @param pcid
* @param servertime
* @param nonce
* @param rsakv
* @return
*/
public LoginParams doLoginByPin(String username, String sp, String pin, String pcid,
String servertime,String nonce,String rsakv ) { Properties properties = initProperties();
String base64UserCount = WeiboEncoder.encodeAccount(username);
HashMap<String, String> ticketMap = null;
LoginParams params = new LoginParams();
try {
ticketMap = getTicket(base64UserCount, sp, pin, pcid,
servertime, nonce, rsakv);
if (ticketMap.containsKey("reason")) {
//意为"输入的验证码不正确"
String reply = "\\u8f93\\u5165\\u7684\\u9a8c\\u8bc1\\u7801\\u4e0d\\u6b63\\u786e";
String reasonStr = ticketMap.get("reason");
if (reasonStr.equals(reply)) {
params.setLogin(false);
return params;
}
}
String code = authorize(ticketMap.get("ticket"), properties.getProperty("authorizeURL"),
properties.getProperty("redirect_URI"), properties.getProperty("client_ID"),
username, ticketMap.get("uid"));
params.setCode(code);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} return params;
} /**
* 模拟新浪授权
* @param ticket ticket参数
* @param redirectURI 回调地址
* @param clientId appKey
* @param username 用户名
* @return token
* @throws IOException
* @throws KeyManagementException
* @throws NoSuchAlgorithmException
*/
private String authorize(String ticket, String authorizeURL, String redirectURI,
String clientId, String username, String uid) throws IOException,
KeyManagementException, NoSuchAlgorithmException { String code = null;
String url = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&forcelogin=true";
String regCallback = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&display=default&from=&with_cookie=";
PostMethod post = new PostMethod(authorizeURL);
//模拟申请token的链接,如果不添加,那么回调地址返回则为空
post.setRequestHeader("Referer",url);
// 模拟登录时所要提交的参数信息
NameValuePair[] formpPairs=new NameValuePair[]{
new NameValuePair("action", "login"),
new NameValuePair("userId",username),
new NameValuePair("ticket", ticket),
new NameValuePair("response_type", "code"),
new NameValuePair("redirect_uri", redirectURI),
new NameValuePair("client_id", clientId),
new NameValuePair("regCallback", URLEncoder.encode(regCallback, "UTF-8"))
};
post.setRequestBody(formpPairs);
int status = httpClient.executeMethod(post);
if (status == HttpStatus.SC_OK) {
byte[] htmlDatas = post.getResponseBody();
code = authorizeAgain(htmlDatas, ticket, authorizeURL,
redirectURI, clientId, username, uid);
}else if (status == 302) {
Header locationHeader = post.getResponseHeader("location");
String location = locationHeader.getValue();
code = location.substring(location.indexOf("=")+1);
} return code;
} /**
* 二次提交授权申请
* @param htmlDatas 第一次授权申请返回的页面数据
* @return
* @throws IOException
* @throws HttpException
*/
private String authorizeAgain(byte[] htmlDatas, String ticket, String authorizeURL,
String redirectURI,String clientId, String username,
String uid) throws HttpException, IOException { String verifyToken = null;
String html = new String(htmlDatas, "utf-8");
Document doc = Jsoup.parse(html);
Element verifyTokeneElement = doc.select("input[name=verifyToken]").first();
verifyToken = verifyTokeneElement.attr("value"); String code = null;
String url = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&forcelogin=true";
String regCallback = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&display=default&from=&with_cookie=";
PostMethod post = new PostMethod(authorizeURL);
//模拟申请token的链接,如果不添加,那么回调地址返回则为空
post.setRequestHeader("Referer",authorizeURL);
// 模拟登录时所要提交的参数信息
NameValuePair[] formpPairs=new NameValuePair[]{
new NameValuePair("action", "authorize"),
new NameValuePair("uid",uid),
new NameValuePair("url", url),
new NameValuePair("response_type", "code"),
new NameValuePair("redirect_uri", redirectURI),
new NameValuePair("client_id", clientId),
new NameValuePair("verifyToken", verifyToken),
new NameValuePair("regCallback", URLEncoder.encode(regCallback, "UTF-8"))
};
post.setRequestBody(formpPairs);
int status = httpClient.executeMethod(post);
if (status == 302) {
Header locationHeader = post.getResponseHeader("location");
String location = locationHeader.getValue();
if (location == null) {
throw new NullPointerException("redirect_uri is null");
}
code = location.substring(location.indexOf("=")+1);
}
return code;
} /**
* 模拟用户预登录
* @param unameBase64
* @return
* @throws IOException
*/
private HashMap<String, String> pubKeyMap(String unameBase64)
throws IOException { String url = "https://login.sina.com.cn/sso/prelogin.php?"
+ "entry=openapi&"
+ "callback=sinaSSOController.preloginCallBack&" + "su="
+ unameBase64 + "&" + "rsakt=mod&" + "checkpin=1&"
+ "client=ssologin.js(v1.4.5)" + "&_=" + new Date().getTime();
return getParaFromResult(get(url));
} /**
* 预登陆是否需要验证码
* @param pubkeyMap
* @return
*/
private String getPin(HashMap<String, String> pubkeyMap) { String imgUrl = null;
int isShowpin = 0;
if (pubkeyMap != null) {
String showpin = pubkeyMap.get("showpin");
if (showpin != null) {
isShowpin = Integer.parseInt(showpin);
if (isShowpin == 1) {
String url = "https://login.sina.com.cn/cgi/pin.php?"
+ "r=" + Math.floor(Math.random() * 100000000)
+ "&s=0"
+ "&p=" + pubkeyMap.get("pcid"); imgUrl = url;
}
}
}
return imgUrl;
} /**
* 确认登陆后是否需要再验证
* @return
*/
private String isHasPinAgain(HashMap<String, String> pubkeyMap,
HashMap<String, String> ticketMap) { String imgUrl = null;
int isHasPin = 0;
if ((pubkeyMap != null) && (ticketMap != null)) {
//意为"为了您的帐号安全,请输入验证码"
String str = "\\u4e3a\\u4e86\\u60a8\\u7684\\u5e10\\u53f7\\u5b89" +
"\\u5168\\uff0c\\u8bf7\\u8f93\\u5165\\u9a8c\\u8bc1\\u7801"; if (ticketMap.containsKey("reason")) {
String reasonStr = ticketMap.get("reason");
if (reasonStr.equals(str)) {
isHasPin = 1;
String url = "https://login.sina.com.cn/cgi/pin.php?"
+ "r=" + Math.floor(Math.random() * 100000000)
+ "&s=0"
+ "&p=" + pubkeyMap.get("pcid"); imgUrl = url;
}
}
}
return imgUrl;
} /**
* 获取验证码
*/
public String getVCode(String pcid) { String imgUrl = null;
if (pcid != null) {
String url = "https://login.sina.com.cn/cgi/pin.php?"
+ "r=" + Math.floor(Math.random() * 100000000)
+ "&s=0"
+ "&p=" + pcid; imgUrl = url;
}
return imgUrl;
} /**
* 保存验证码
* @param url 验证码链接
*/
public void saveVCodeImg(String url) { GetMethod getImages = new GetMethod(url);
try {
int status = httpClient.executeMethod(getImages);
if (status == HttpStatus.SC_OK) {
FileOutputStream outputStream = new FileOutputStream("vc.jpg");
outputStream.write(getImages.getResponseBody());
outputStream.close();
}
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} } /**
* 无验证码时模拟用户登录,并获取ticket
* @param usernameBase64 使用Base64加密的用户名
* @param sp 使用SHA1加密后的用户密码
* @return
* @throws Exception
*/
private HashMap<String, String> getTicket(String usernameBase64,
String sp, HashMap<String, String> pubkeyMap) throws Exception {
String url = null;
if (pubkeyMap != null) {
url = "https://login.sina.com.cn/sso/login.php?"
+ "entry=openapi&"
+ "gateway=1&"
+ "from=&"
+ "savestate=0&"
+ "useticket=1&"
+ "pagerefer=&"
+ "ct=1800&"
+ "s=1&"
+ "vsnf=1&"
+ "vsnval=&"
+ "door=&"
+ "su="+ usernameBase64
+ "&"
+ "service=miniblog&"
+ "servertime="+ pubkeyMap.get("servertime")
+ "&"
+ "nonce="+ pubkeyMap.get("nonce")
+ "&"
+ "pwencode=rsa&"
+ "rsakv="+ pubkeyMap.get("rsakv")
+ "&"
+ "sp="+ sp
+ "&"
+ "encoding=UTF-8&"
+ "callback=sinaSSOController.loginCallBack&"
+ "cdult=2&"
+ "domain=weibo.com&"
+ "prelt=37&"
+ "returntype=TEXT&"
+ "client=ssologin.js(v1.4.5)&" + "_=" + new Date().getTime(); }
return getParaFromResult(get(url));
} /**
* 有验证码时模拟用户登录,并获取ticket
* @param usernameBase64
* @param sp
* @param pin
* @param pcid
* @param servertime
* @param nonce
* @param rsakv
* @return
* @throws Exception
*/
public HashMap<String, String> getTicket(String usernameBase64, String sp, String pin,
String pcid, String servertime,String nonce,String rsakv) throws Exception { String url = "https://login.sina.com.cn/sso/login.php?"
+ "entry=openapi&"
+ "gateway=1&"
+ "from=&"
+ "savestate=0&"
+ "useticket=1&"
+ "pagerefer=&"
+ "pcid=" + pcid + "&"
+ "ct=1800&"
+ "s=1&"
+ "vsnf=1&"
+ "vsnval=&"
+ "door=" + pin + "&"
+ "su="+ usernameBase64
+ "&"
+ "service=miniblog&"
+ "servertime="+ servertime
+ "&"
+ "nonce="+ nonce
+ "&"
+ "pwencode=rsa&"
+ "rsakv="+ rsakv
+ "&"
+ "sp="+ sp
+ "&"
+ "encoding=UTF-8&"
+ "callback=sinaSSOController.loginCallBack&"
+ "cdult=2&"
+ "domain=weibo.com&"
+ "prelt=37&"
+ "returntype=TEXT&"
+ "client=ssologin.js(v1.4.5)&" + "_=" + new Date().getTime(); return getParaFromResult(get(url));
} /**
* 分析结果,取出所需参数
* @param result 页面内容
* @return
*/
private HashMap<String, String> getParaFromResult(String result) { HashMap<String, String> hm = new HashMap<String, String>();
result = result.substring(result.indexOf("{") + 1, result.indexOf("}"));
String[] r = result.split(",");
String[] temp;
for (int i = 0; i < r.length; i++) {
temp = r[i].split(":");
for (int j = 0; j < 2; j++) {
if (temp[j].contains("\""))
temp[j] = temp[j].substring(1, temp[j].length() - 1);
}
hm.put(temp[0], temp[1]);
}
return hm;
} /**
* 执行给定的URL,并输出目标URL返回的页面结果
* @param url
* @return
* @throws IOException
*/
private String get(String url) throws IOException { String surl = null;
GetMethod getMethod = new GetMethod(url);
int status = httpClient.executeMethod(getMethod);
if (status == HttpStatus.SC_OK) {
surl = new String(getMethod.getResponseBody(), "UTF-8");
}
getMethod.releaseConnection();
return surl;
} /**
* 配置信息初始化
* @return
*/
private Properties initProperties() { Properties prop = new Properties();
try {
prop.load(Thread.currentThread().getContextClassLoader().
getResourceAsStream("config.properties")); } catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return prop;
} /**
* @param args
*/
public static void main(String[] args) { WeiboLoginer loginer = new WeiboLoginer();
LoginParams loginParams = loginer.doLogin("","");
//有验证码时
if (loginParams.getCode() == null) {
String pcid = loginParams.getPcid();
String nonce = loginParams.getNonce();
String rsakv = loginParams.getRsakv();
String servertime = loginParams.getServertime();
String sp = loginParams.getSp(); System.err.println(loginParams.getImgUrl());
//再次获取验证码
System.err.println(loginer.getVCode(pcid)); Scanner input = new Scanner(System.in);
String pin = input.nextLine(); LoginParams loginResult = loginer.doLoginByPin("",sp, pin, pcid, servertime, nonce, rsakv);
if (!loginResult.isLogin()) {
System.err.println("验证码错误!重新录入"); //获取验证码并保存(测试)
String imgUrl = loginer.getVCode(pcid);
loginer.saveVCodeImg(imgUrl); Scanner input1= new Scanner(System.in);
String pin1 = input1.nextLine(); String code = loginer.doLoginByPin("",sp, pin1, pcid, servertime, nonce, rsakv).getCode();
System.out.println(SinaWeiboOAuth.getToken(code));
} }else {
//无验证码时
String code = loginParams.getCode();
System.out.println(SinaWeiboOAuth.getToken(code));
}
} } 上述代码完整模拟了微博登陆的全过程,并获得最终授权

新浪微博数据抓取(java实现)的更多相关文章

  1. 新浪微博模拟登陆+数据抓取(java实现)

    模拟登陆部分实现: package token.exe; import java.math.BigInteger; import java.util.Random; import org.apache ...

  2. 腾讯微博模拟登陆+数据抓取(java实现)

    不多说,贴出相关代码. 参数实体: package token.def; import java.io.Serializable; import java.util.Properties; publi ...

  3. 腾讯微博数据抓取(java实现)

    不多说,同样贴出相关代码 参数实体: package token.def; import java.io.Serializable; import java.util.Properties; publ ...

  4. Java模拟新浪微博登陆抓取数据

    前言:  兄弟们来了来了,最近有人在问如何模拟新浪微博登陆抓取数据,我听后默默地抽了一口老烟,暗暗的对自己说,老汉是时候该你出场了,所以今天有时间就整理整理,浅谈一二. 首先:  要想登陆新浪微博需要 ...

  5. Java实现多种方式的http数据抓取

    前言: 时下互联网第一波的浪潮已消逝,随着而来的基于万千数据的物联网时代,因而数据成为企业的重要战略资源之一.基于数据抓取技术,本文介绍了java相关抓取工具,并附上demo源码供感兴趣的朋友测试! ...

  6. 大数据抓取采集框架(摘抄至http://blog.jobbole.com/46673/)

    摘抄至http://blog.jobbole.com/46673/ 随着BIG DATA大数据概念逐渐升温,如何搭建一个能够采集海量数据的架构体系摆在大家眼前.如何能够做到所见即所得的无阻拦式采集.如 ...

  7. [nodejs,expressjs,angularjs2] LOL英雄列表数据抓取及查询显示应用

    新手练习,尝试使用angularjs2 [angularjs2 数据绑定,监听数据变化自动修改相应dom值,非常方便好用,但与传统js(jquery)的使用方法会很不同,Dom操作也不太习惯] 应用效 ...

  8. [原创.数据可视化系列之十二]使用 nodejs通过async await建立同步数据抓取

    做数据分析和可视化工作,最重要的一点就是数据抓取工作,之前使用Java和python都做过简单的数据抓取,感觉用的很不顺手. 后来用nodejs发现非常不错,通过js就可以进行数据抓取工作,类似jqu ...

  9. python爬虫(一)_爬虫原理和数据抓取

    本篇将开始介绍Python原理,更多内容请参考:Python学习指南 为什么要做爬虫 著名的革命家.思想家.政治家.战略家.社会改革的主要领导人物马云曾经在2015年提到由IT转到DT,何谓DT,DT ...

随机推荐

  1. 笔试——JAVA相关

    1. String 和 StringBuffer 的区别(转自http://pengcqu.iteye.com/blog/487538) Java笔试题经常考到“String和StringBuffer ...

  2. OpenGL的gluPerspective和gluLookAt的关系[转]

    函数原型void gluLookAt(GLdoble eyex, GLdouble eyey, GLdouble eyez,  GLdouble centerx, GLdouble centery, ...

  3. [Selenium]点击Calendar控件后,Calendar dialog很快消失

    有的日历控件使用了“opacity:0”透明度加上“display:none”来控制其是否显示,使用moveToElement方法不能移动到dialog上,可以把这两个属性的值进行修改,使其可见,使用 ...

  4. 转!Java关键字final、static使用总结

    Java关键字final.static使用总结   一.final 根据程序上下文环境,Java关键字final有“这是无法改变的”或者“终态的”含义,它可以修饰非抽象类.非抽象类成员方法和变量.你可 ...

  5. HBase之创建表

    import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; impo ...

  6. 程序猿必知会的JavaScript 的遍历方式

    不管是移动移动端开发还是web端开发,我们对JS的使用频率都在增加,今天小编将要和大家分享的就是JavaScript中,遍历方式的一些实现方法,个人感觉还是很有用的,有兴趣的童鞋可以一起来看看. 为了 ...

  7. LF CRLF

    在git提交的时候 有时候会提示这个 LF will be replaced by CRLF 这是因为window的结束符是:回车和换行 crlfmac和linux的结束符是 lf, 于是当代码在这两 ...

  8. 关于eclipse中maven项目的问题

    问题1: 严重: Error configuring application listener of class org.springframework.web.context.ContextLoad ...

  9. canvas调节视频颜色

    <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...

  10. c语言详解sizeof

    一.sizeof的概念   sizeof是C语言的一种单目操作符,如C语言的其他操作符++.--等.     它并不是函数.     sizeof操作符以字节形式给出了其操作数的存储大小.     操 ...