先来看我们的web.xml文件,如下

 <!DOCTYPE web-app PUBLIC
"-//Sun Microsystems, Inc.//DTD Web Application 2.3//EN"
"http://java.sun.com/dtd/web-app_2_3.dtd" > <web-app>
<display-name>MySinaSpider</display-name>
<listener>
<listener-class>main.java.sina.spider.StartSpiderLisenter</listener-class>
</listener>
</web-app>

这样的配置当启动tomcat的时候,就会运行爬虫,然后再看我们的StartSpiderLisenter类,如下

 package main.java.sina.spider;

 import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import main.java.sina.bean.info.LoginInfo;
import main.java.sina.utils.Constant; public class StartSpiderLisenter implements ServletContextListener{ public void contextDestroyed(ServletContextEvent arg0) { } public void contextInitialized(ServletContextEvent arg0) {
Constant.personalHomePage = "http://weibo.com/zhaoyao2012/home"; //填写你自己的新浪微博个人主页
LoginInfo.username = "***"; //填写你的新浪微博用户名
LoginInfo.password = "***"; //填写你的新浪微博密码
Constant.enableProxy = false; //是否使用代理
Spider.start();
} }

很明显我们看到StartSpiderLisenter 类是继承自ServletContextListener这个接口,一定要实现它的两个方法,contextInitialized和contextDestroyed.它们分别在初始化和销毁的时候被容器调用。我们看到在contextInitialized初始化上下文的方法中调用了Spider.start()方法。那么我们来看看Spider这个类,如下:

 package main.java.sina.spider;

 import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.quartz.JobBuilder;
import org.quartz.JobDetail;
import org.quartz.Scheduler;
import org.quartz.SchedulerException;
import org.quartz.SchedulerFactory;
import org.quartz.SimpleScheduleBuilder;
import org.quartz.SimpleTrigger;
import org.quartz.TriggerBuilder;
import org.quartz.impl.StdSchedulerFactory;
import main.java.sina.bean.info.LoginInfo;
import main.java.sina.httpclient.LoginSina;
import main.java.sina.httpclient.SpiderSina;
import main.java.sina.job.KeywordSearchJob;
import main.java.sina.utils.Constant;
import main.java.sina.utils.HttpHelper;
import main.java.test.SpiderTest; public class Spider { public static void main(String[] args) { Constant.personalHomePage = "****";
LoginInfo.username = "****";
LoginInfo.password = "****";
Constant.enableProxy = false;
Constant.hourbefore = 0; //这个参数用于设置时差
start(); }
public static void start() { final SchedulerFactory factory = new StdSchedulerFactory();
try {
Scheduler scheduler = factory.getScheduler();
JobDetail jobDetail = JobBuilder.newJob(KeywordSearchJob.class)
.withIdentity("keywordSearch", "weibo").build();
SimpleTrigger trigger = TriggerBuilder.newTrigger()
.withIdentity("keywordSearch", "weibo")
.withSchedule(SimpleScheduleBuilder.repeatHourlyForever())
.build();
scheduler.scheduleJob(jobDetail, trigger);
scheduler.start();
} catch (SchedulerException e) {
e.printStackTrace();
}
} public static SpiderSina createSpider() {
LoginSina ls = new LoginSina(LoginInfo.username, LoginInfo.password);
ls.dologinSina();
ls.redirect();
SpiderSina spider = new SpiderSina(ls); return spider;
} public static void sendMidsofDays(SpiderSina spider,String keyword, String fromdate,
String todate) { try {
String midsString = "";
for (int i = 1; i <= 50; i++) {
String htmlContent = spider
.search(keyword, i, fromdate, todate);
if (htmlContent.contains("noresult_support")) {
break;
}
System.out.println(i);
Pattern pattern = Pattern.compile("<div mid=\"([0-9]*)\""); String start = "\"pid\":\"pl_weibo_direct\"";
try {
htmlContent = htmlContent.substring(htmlContent
.indexOf(start));
} catch (Exception e) {
htmlContent = htmlContent.substring(1);
}
htmlContent = htmlContent.replace("\\\"", "\"");
htmlContent = htmlContent.replace("\\/", "/");
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
System.out.println(matcher.group(1));
midsString += matcher.group(1) + ",";
}
if (i == 37) {
try {
Thread.sleep(1000 * 60 * 30);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
System.out.println(midsString);
HttpHelper.getLiveData(midsString, Constant.CommentUrl);
} catch (IOException e) {
e.printStackTrace();
} }
}

我们在Spider.start()方法中,看到了作业KeywordSearchJob.class,那么我们来看看这个KeywordSearchJob类的实现,如下:

 package main.java.sina.job;

 import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import main.java.sina.httpclient.SpiderSina;
import main.java.sina.spider.Spider;
import main.java.sina.utils.Constant;
import main.java.sina.utils.Utils; public class KeywordSearchJob implements Job { public void execute(JobExecutionContext arg0) throws JobExecutionException { Constant.enableProxy = false; //我的爬虫中没有使用代理,故值设为false.
String keyword = "%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6";//被编码后的关键字
String datehour = Utils.getDateOfSpecifiedPreHour(Constant.hourbefore);//这个工具类实现了时差格式的转换
SpiderSina spider = Spider.createSpider();
spider.forwardToWeiboPage();
Spider.sendMidsofDays(spider,keyword,datehour,datehour);
} }

接下来,我们看几个工具类的实现:首先来看下Utils.java这个类,如下:它实现了日期的格式的一些转换

 package main.java.sina.utils;

 import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties; import org.htmlparser.Parser;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.util.DefaultParserFeedback;
// I/O操作类
public class Utils { public static Date getDateFromString(String dtext,Date fileCreateDate) {
Date date=null;
int y,mm,se;
Calendar c = Calendar.getInstance();
c.setTime(fileCreateDate);
y = c.get(Calendar.YEAR); //年
//d = c.get(Calendar.DAY_OF_MONTH); //日
mm = c.get(Calendar.MINUTE); //分
se = c.get(Calendar.SECOND);//秒
if(dtext.contains("秒前")){
int end=0;
for(int i=0;i<dtext.length();i++){
if(dtext.charAt(i)>='0' && dtext.charAt(i)<='9'){
end++;
}else{
break;
}
}
dtext=dtext.substring(0,end);
int second=Integer.parseInt(dtext);
c.set(Calendar.SECOND, se-second);
date=c.getTime();
}
else if(dtext.contains("分钟前")){
int end=0;
for(int i=0;i<dtext.length();i++){
if(dtext.charAt(i)>='0' && dtext.charAt(i)<='9'){
end++;
}else{
break;
}
}
dtext=dtext.substring(0,end);
int minute=Integer.parseInt(dtext);
c.set(Calendar.MINUTE, mm-minute);
date=c.getTime();
}else if(dtext.contains("今天")){
dtext=dtext.replace("今天 ", "").trim();
String ss[]=dtext.split(":");
if(ss!=null && ss.length==2){
c.set(Calendar.HOUR_OF_DAY, Integer.parseInt(ss[0]));
c.set(Calendar.MINUTE, Integer.parseInt(ss[1]));
date=c.getTime();
}
}else if(dtext.contains("月")){
dtext=y+"年".concat(dtext);
SimpleDateFormat sf=new SimpleDateFormat("yyyy年MM月dd日 HH:mm");
try {
date=sf.parse(dtext);
} catch (ParseException e) {
e.printStackTrace();
}
}else if(dtext.contains("-")){
SimpleDateFormat sf=new SimpleDateFormat("yyyy-MM-dd HH:mm");
try {
date=sf.parse(dtext);
} catch (ParseException e) {
e.printStackTrace();
}
}
return date;
}
public static void writeFileFromStream(String filename,InputStream in){
if(filename==null || filename.trim().length()==0)
return;
File file=new File(filename);
if(!file.exists()){
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
FileOutputStream fou=null;
try {
fou = new FileOutputStream(file);
byte []buffer=new byte[1024*4];
int len=-1;
while((len=in.read(buffer))!=-1){
fou.write(buffer,0,len);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(in!=null)
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
if(fou!=null)
try {
fou.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void writeFileFromString(String filename,String str){
if(filename==null || filename.trim().length()==0)
filename="tmp.txt";
File file=new File(filename);
if(!file.exists()){
try {
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}
BufferedWriter writer=null;
BufferedReader reader=null;
try {
writer=new BufferedWriter(new FileWriter(file));
reader=new BufferedReader(new StringReader(str));
String tmp=null;
StringBuffer buffer=new StringBuffer();
while((tmp=reader.readLine())!=null)
buffer.append(tmp+"\n");
writer.write(buffer.toString()); } catch (IOException e) {
e.printStackTrace();
}finally{
try {
reader.close();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
} } public static String getStringFromStream(InputStream in) {
BufferedReader reader=null;
reader = new BufferedReader(new InputStreamReader(in));
StringBuffer buffer=new StringBuffer();
String str=null;
try{
while((str=reader.readLine())!=null){
buffer.append(str+"\n");
}
reader.close();
}catch(Exception ex){
ex.printStackTrace();
}
try {
return new String(buffer.toString().getBytes(),"utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return "error:"+e.getMessage();
}
}
//得到数据库的配置信息
public static Properties getDBconfig(){
Properties properties=new Properties();
InputStream in = null;
try {
in = new FileInputStream(new File("config/dbconfig.ini"));
properties.load(in);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(in!=null)
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return properties;
} public static Parser createParser(String inputHTML) {
Lexer mLexer = new Lexer(new Page(inputHTML));
Parser parser = new Parser(mLexer, new DefaultParserFeedback(
DefaultParserFeedback.QUIET));
return parser;
} public static String getDateOfSpecifiedPreHour(int hourNum){
SimpleDateFormat sdFormat = new SimpleDateFormat("yyyy-MM-dd-HH");
Date date = new Date();
System.out.println("date -" +date + " " + hourNum);
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
calendar.add(Calendar.HOUR_OF_DAY, -1 * hourNum);
System.out.println("date2 -" +sdFormat.format(calendar.getTime()));
return sdFormat.format(calendar.getTime());
}
}

再来看一下ThreadPool.java这个类,如下:这是一个线程工具类,定义了线程的一些动作

 package main.java.sina.utils;

 import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; /** * 线程池工具类
*/
public class ThreadPool {
private ExecutorService service;
private List<Thread> threadList; public ThreadPool(int limite, List<Thread> threadList) {
this.service = Executors.newFixedThreadPool(limite);
this.threadList = threadList;
} public void execute() {
if(threadList==null ||threadList.size()==0) return ;
for (int index = 0; index < threadList.size(); index++) {
Thread t=threadList.get(index);
service.execute(t);
}
}
public boolean isTerminated(){
return service.isTerminated();
} public void shutDown() {
service.shutdown();
}
}

然后再看一下Constant.java这个常量类,如下:常量类把系统总用到的一些常量写在这里,以后项目维护需要更改的时候,方便维护更改

package main.java.sina.utils;

/**
* @ClassName: Constant
*
*/
public class Constant {
public static boolean enableProxy = false;
public static String liveCommentUrl = "http://localhost:8080/social-hub-connector/loadingLiveData";
public static String CommentUrl = "http://localhost:8080/social-hub-connector/loadingData";
public static String personalHomePage = "******";
public static String weiboUsername = "*********";
public static String weiboPassword = "*********";
public static int hourbefore = 0;
}

再来看一下Base64Encoder.java类,它对一些字段进行了编码的类,如下:

 package main.java.sina.utils;

 /**
* */
public class Base64Encoder {
private static final char last2byte = (char) Integer.parseInt("00000011", 2);
private static final char last4byte = (char) Integer.parseInt("00001111", 2);
private static final char last6byte = (char) Integer.parseInt("00111111", 2);
private static final char lead6byte = (char) Integer.parseInt("11111100", 2);
private static final char lead4byte = (char) Integer.parseInt("11110000", 2);
private static final char lead2byte = (char) Integer.parseInt("11000000", 2);
private static final char[] encodeTable = new char[]{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; public Base64Encoder() {
}
public static String encode(byte[] from) {
StringBuffer to = new StringBuffer((int) (from.length * 1.34) + 3);
int num = 0;
char currentByte = 0;
for (int i = 0; i < from.length; i++) {
num = num % 8;
while (num < 8) {
switch (num) {
case 0:
currentByte = (char) (from[i] & lead6byte);
currentByte = (char) (currentByte >>> 2);
break;
case 2:
currentByte = (char) (from[i] & last6byte);
break;
case 4:
currentByte = (char) (from[i] & last4byte);
currentByte = (char) (currentByte << 2);
if ((i + 1) < from.length) {
currentByte |= (from[i + 1] & lead2byte) >>> 6;
}
break;
case 6:
currentByte = (char) (from[i] & last2byte);
currentByte = (char) (currentByte << 4);
if ((i + 1) < from.length) {
currentByte |= (from[i + 1] & lead4byte) >>> 4;
}
break;
}
to.append(encodeTable[currentByte]);
num += 6;
}
}
if (to.length() % 4 != 0) {
for (int i = 4 - to.length() % 4; i > 0; i--) {
to.append("=");
}
}
return to.toString();
}
}

这个类中,针对新浪的一些特殊的加密规则,写的方法,这个在拼接最终的URl的时候回用到,如根据servertime+nonce两个参数来生成一串字符串加密规则:

 package main.java.sina.utils;
import java.io.File;
import java.io.FileReader; import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager; /**
* */
public class EncodeSuAndSp {
static ScriptEngineManager mgr = new ScriptEngineManager();
static ScriptEngine engine = mgr.getEngineByExtension("js");
static Invocable inv = (Invocable) engine; public static String getEncryptedP(String password,String servertime,String nonce){
String value1="";
try {
engine.eval(new FileReader(new File("js/encrypt.js")));
value1 = String.valueOf(inv.invokeFunction("hex_sha1",password));
value1 = String.valueOf(inv.invokeFunction("hex_sha1",value1));
value1 = String.valueOf(inv.invokeFunction("hex_sha1",value1+servertime+nonce));
} catch (Exception e) {
e.printStackTrace();
}
return value1;
} public static String getEncodedUsername(String username){
String value1="";
try {
engine.eval(new FileReader(new File("js/encrypt.js")));
value1 = String.valueOf(inv.invokeFunction("encode",username));
System.out.println(value1);
} catch (Exception e) {
e.printStackTrace();
}
return value1;
}
}
package main.java.sina.utils;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
public class EncodeUtils { public static final String encodeURL(String str,String enc) {
try {
return URLEncoder.encode(str, enc);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public static final String decodeURL(String str,String enc) {
try {
return URLDecoder.decode(str, enc);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
} public static String unicdoeToGB2312(String str) {
String res = null;
if(str==null ){
return "";
}
StringBuffer sb = new StringBuffer();
try {
while (str.length() > 0) {
if (str.startsWith("\\u")) {
int x = 0;
try{
x = Integer.parseInt(str.substring(2, 6), 16);
}catch(Exception ex){
x= 0;
}
sb.append((char) x);
str = str.substring(6);
} else {
sb.append(str.charAt(0));
str = str.substring(1);
}
}
res = sb.toString();
} catch (Exception e) {
e.printStackTrace(System.err);
}
res=res.replaceAll("\\\\r", "")
.replaceAll("\\\\n", "")
.replaceAll("\\\\t", "")
.replaceAll("&nbsp;", "")
.replaceAll("&gt", "")
.replaceAll("\\[", "\"")
.replaceAll("\\]", "\"");
return res;
} public static String unicodeTogb2312(String str) {
String res = null;
StringBuffer sb = new StringBuffer();
try {
while (str.length() > 0) {
if (str.startsWith("\\u")) {
int x = Integer.parseInt(str.substring(2, 6), 16);
sb.append((char) x);
str = str.substring(6);
} else {
sb.append(str.charAt(0));
str = str.substring(1);
}
}
res = sb.toString();
} catch (Exception e) {
e.printStackTrace(System.err);
}
res=res.replaceAll("\\\\r", "")
.replaceAll("\\\\t", "")
.replaceAll("&nbsp;", "")
.replaceAll("&gt", "")
.replaceAll("\\\\n", "");
return res;
}
}

这个类很关键HttpUtils.java类,这个方法中重写了doPost()和doGet()方法.如下:

package main.java.sina.utils;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.conn.params.ConnRouteParams;
import org.apache.http.cookie.Cookie;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HTTP;
import org.apache.http.protocol.HttpContext; /**
* http操作相关的类
*/
public class HttpUtils {
/*
* params :
* url: 地址
* headers请求头部信息
* return : httpresponse响应
*/
public static HttpResponse doGet(String url,Map<String,String> headers){
HttpClient client=createHttpClient();
HttpGet getMethod=new HttpGet(url);
HttpResponse response=null; HttpContext httpContext = new BasicHttpContext();
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
getMethod.addHeader(key, headers.get(key));
}
}
response=client.execute(getMethod);
HttpUriRequest realRequest = (HttpUriRequest)httpContext.getAttribute(ExecutionContext.HTTP_REQUEST);
System.out.println(realRequest.getURI());
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
String msg=e.getMessage();
if(msg.contains("Truncated chunk")){
System.out.println(e.getMessage() +" 数据获取不完整,需要重新获取。");
}else{
System.out.println(e.getMessage() +" 连接被拒绝,需要降低爬取频率。");
}
} catch(Exception e){
}
System.out.println(response);
return response;
} /*
* params :
* url: 地址
* headers:请求头部信息
* params:post的请求数据
* return : httpresponse响应
*/ public static HttpResponse doPost(String url,Map<String,String> headers,Map<String,String> params){
HttpClient client=createHttpClient();
HttpPost postMethod=new HttpPost(url);
HttpResponse response=null;
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
postMethod.addHeader(key, headers.get(key));
}
}
List<NameValuePair> p=null;
if(params!=null && params.keySet().size()>0){
p=new ArrayList<NameValuePair>();
for(String key:params.keySet()){
p.add(new BasicNameValuePair(key,params.get(key)));
}
}
if(p!=null)
postMethod.setEntity(new UrlEncodedFormEntity(p,HTTP.UTF_8));
response=client.execute(postMethod);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return response;
} //上传一个文件
public static HttpResponse doPost(String url,Map<String,String> headers,String fileName){
HttpClient client=createHttpClient();
HttpPost postMethod=new HttpPost(url);
String boundary = "";
HttpResponse response=null;
try {
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
postMethod.addHeader(key, headers.get(key));
if(key.equals("Content-Type")){
String tmp=headers.get(key);
boundary=tmp.substring(tmp.indexOf("=")+1);
}
}
}
File file=new File(fileName);
InputStream in=new FileInputStream(file); StringBuffer buffer=new StringBuffer();
buffer.append(boundary).append("\n")
.append("Content-Disposition: form-data; name=\"pic1\"; filename=\""+file.getName()).append("\"\n")
.append("Content-Type: image/pjpeg").append("\n")
.append("\n"); System.out.println(buffer.toString()); String tmpstr=Utils.getStringFromStream(in);
tmpstr=Base64Encoder.encode(tmpstr.getBytes());
buffer.append(tmpstr).append("\n");
buffer.append(boundary+"--").append("\n"); System.out.println(buffer.toString()); in=new ByteArrayInputStream(buffer.toString().getBytes()); InputStreamEntity ise=new InputStreamEntity(in,buffer.toString().getBytes().length); postMethod.setEntity(ise); response=client.execute(postMethod);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return response;
}
/*
* params :
* httpresponse
* return : 响应的头部信息
*/ public static List<Header> getReponseHeaders(HttpResponse response){
List<Header> headers=null;
Header[] hds=response.getAllHeaders();
if(hds!=null && hds.length>0){
headers=new ArrayList<Header>();
for(int i=0;i<hds.length;i++){
headers.add(hds[i]);
}
}
return headers;
} /*
* params :
* headers:头部信息
* request:请求
*/
public static void setHeaders(Map<String,String> headers,HttpUriRequest request){
if(headers!=null && headers.keySet().size()>0){
for(String key:headers.keySet()){
request.addHeader(key, headers.get(key)); }
}
} /*
* params :
* httpresponse
* return : 响应的cookies值
*/ public static List<Cookie> getResponseCookies(HttpResponse response){
List<Cookie> cookies=null;
Header[] hds=response.getAllHeaders();
if(hds!=null && hds.length>0){
for(int i=0;i<hds.length;i++){
if(hds[i].getName().equalsIgnoreCase("Set-Cookie")){
if(cookies==null){
cookies=new ArrayList<Cookie>();
}
String cookiestring[]=hds[i].getValue().split(";");
String ss[]=cookiestring[0].split("=",2);
String cookiename=ss[0];
String cookievalue=ss[1];
Cookie cookie=new BasicClientCookie(cookiename,cookievalue);
cookies.add(cookie);
}
}
}
return cookies;
}
/*
* params :
* cookies数组
* return : cookies数组组成的字符串
*/
public static String setCookie2String(List<Cookie> cookies){
StringBuilder builder=null;
if(cookies!=null && cookies.size()>0){
builder=new StringBuilder();
for(int j=0;j<cookies.size();j++){
Cookie c=cookies.get(j);
builder.append(c.getName()+"="+c.getValue());
if(j!=cookies.size()-1)
builder.append("; ");
}
return builder.toString();
}
return null;
} /*
* 从响应中得到输入流
*/
public static InputStream getInputStreamFromResponse(HttpResponse response){
if(response==null){
return null;
}
HttpEntity entity=response.getEntity();
InputStream in=null;
try {
in = entity.getContent();
} catch (IllegalStateException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return in;
} /*
* 从响应中得到字符串
*/
public static String getStringFromResponse(HttpResponse response){
if(response==null){
return null;
}
InputStream in=getInputStreamFromResponse(response);
String responseText="";
if(in!=null){
responseText=Utils.getStringFromStream(in);
}
return responseText;
} /**
* 创建支持多线程并发连接的HTTPCLIENT
*/
private final static HttpClient createHttpClient() {
String proxyHost = "web-proxy-sha.chn.hp.com";
int proxyPort = 8080;
HttpHost proxy = new HttpHost(proxyHost,proxyPort);
HttpParams params = new BasicHttpParams();
if(Constant.enableProxy){
params.setParameter(ConnRouteParams.DEFAULT_PROXY, proxy);
}
HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
HttpProtocolParams.setContentCharset(params, "UTF-8"); ThreadSafeClientConnManager clientmanager = new ThreadSafeClientConnManager();
clientmanager.setMaxTotal(20);
HttpClient client = new DefaultHttpClient(clientmanager, params); //定义了环形重定向,定向到相同的路径是否被允许.
client.getParams().setParameter("http.protocol.allow-circular-redirects", true); //定义了重定向的最大数量
client.getParams().setParameter("http.protocol.max-redirects", 50); //定义了重定向是否应该自动处理
client.getParams().setParameter("http.protocol.handle-redirects", false);
return client;
} /**
*加入代理的功能
* @return HttpClient 对象
*/
public static HttpClient getDefaultHttpClientByProxy() {
HttpClient httpclient =createHttpClient();
String filePath = "proxy.properties";
HttpHost proxy = null;
Map<String, String> map = ReadIni.getDbini(filePath);
if (map.size() == 0) {
throw new RuntimeException("无可用代理");
} else {
Set<String> set = map.keySet();
String[] array = (String[]) set.toArray(new String[set.size()]);
Random r = new Random();
int rnum = r.nextInt(array.length);
String ip = array[rnum];
String port = map.get(ip);
proxy = new HttpHost(ip, Integer.parseInt(port));
}
httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,proxy);
httpclient.getParams().setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
return httpclient;
}
}

接下来卡一个HttpHelper的辅助类,如下:

/**
*
*/
package main.java.sina.utils; import java.io.IOException;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.PostMethod; /**
* @ClassName: HttpHelper
*
*/
public class HttpHelper {
public static String getLiveData(String requestData,String url)
throws HttpException, IOException {
PostMethod postMethod = new PostMethod(url);
postMethod.setParameter("mids", requestData);
HttpClient httpClient = new HttpClient();
int statusCode = httpClient.executeMethod(postMethod);
String response = postMethod.getResponseBodyAsString();
postMethod.releaseConnection();
System.out.println(response);
return response;
} public static String getHobbyData(String userid, String hobbys)
throws HttpException, IOException {
PostMethod postMethod = new PostMethod("http://c0048925.itcs.hp.com:8080/connector/loadingHobby");
postMethod.setParameter("userid", userid);
postMethod.setParameter("hobbys", hobbys);
HttpClient httpClient = new HttpClient();
int statusCode = httpClient.executeMethod(postMethod);
String response = postMethod.getResponseBodyAsString();
postMethod.releaseConnection();
System.out.println(response);
return response;
} }

ReadIni.java类,在读文本文件中使用,如下:

package main.java.sina.utils;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map; public class ReadIni { public static Map<String, String> getDbini(String file) {
Map<String, String> map = new HashMap<String, String>();
InputStreamReader isr = null;
try{
isr = new InputStreamReader(new FileInputStream(file));
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
BufferedReader br = new BufferedReader(isr);
String s = null;
try {
s = br.readLine();
while (s != null) {
if (s.trim().length() > 0) {
String[] s1 = getIni(s);
map.put(s1[0], s1[1]);
s = br.readLine();
}
}
br.close();
isr.close();
} catch (Exception e) {
e.printStackTrace();
}
return map;
} public static String[] getIni(String str) {
String[] temp = str.split("=");
return temp;
} }

然后,我们跳转到登录sina,来看一下loginSina这个类的实现:

package main.java.sina.httpclient;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.security.InvalidKeyException;
import java.security.KeyFactory;
import java.security.NoSuchAlgorithmException;
import java.security.interfaces.RSAPublicKey;
import java.security.spec.InvalidKeySpecException;
import java.security.spec.RSAPublicKeySpec;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner; import javax.crypto.BadPaddingException;
import javax.crypto.Cipher;
import javax.crypto.IllegalBlockSizeException;
import javax.crypto.NoSuchPaddingException; import org.apache.commons.codec.binary.Hex;
import org.apache.commons.httpclient.params.HttpParams;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.cookie.Cookie;
import org.springframework.core.io.ClassPathResource; import main.java.sina.json.msg.PreLoginResponseMessage;
import main.java.sina.utils.Base64Encoder;
import main.java.sina.utils.EncodeUtils;
import main.java.sina.utils.HttpUtils;
import main.java.sina.utils.JsonUtils;
import main.java.sina.utils.Utils; public class LoginSina {
private String username;
private String password;
private String rsakv;
private String pubkey; //servertime和nonce都是在登录时需要使用的,用于post信息的加密
private String servertime;//服务器的时间
private String nonce;//一次性字符串
private String userid;//用户微博ID
private String pcid;//若需要输入验证码时用到
private String userdomainname;//用于域名
private String door;//验证码 private Map<String,String> headers=null; private List<Cookie> cookies=null; public LoginSina(String username,String password){
this.username=username;
this.password=password;
init();
} public Map<String,String> getHeaders(){
Map<String,String> hds=null;
if(headers!=null && headers.keySet().size()>0){
hds=new HashMap<String,String>();
for(String key:headers.keySet()){
hds.put(key,headers.get(key));
}
}
return hds;
} public List<Cookie> getCookies(){
List<Cookie> cc=null;
if(cookies!=null && cookies.size()>0){
cc=new ArrayList<Cookie>();
for(int i=0;i<cookies.size();i++){
cc.add(cookies.get(i));
}
}
return cc;
}
//登录微博
public String dologinSina(){
System.out.println("---do login, please hold on...---");
String url="http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)";//v1.3.17
Map<String,String> headers=new HashMap<String,String>();
Map<String,String> params=new HashMap<String,String>(); /*HTTP协议中的headers:http://www.cnblogs.com/yuzhongwusan/archive/2011/10/20/2218954.html
* */
headers.put("Accept", "text/html, application/xhtml+xml, */*");
headers.put("Referer", "http://login.sina.com.cn/member/my.php?entry=sso");
headers.put("Accept-Language", "zh-cn");
headers.put("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; BOIE9;ZHCN");
headers.put("Host", "login.sina.com.cn");
headers.put("Connection", "Keep-Alive");
headers.put("Content-Type", "application/x-www-form-urlencoded");
headers.put("Cache-Control", "no-cache");
params.put("encoding", "UTF-8");
params.put("entry", "weibo");
params.put("from", "");
params.put("prelt", "112");
params.put("gateway", "1");
params.put("nonce", nonce);
params.put("pwencode", "rsa2");//wsse
params.put("returntype", "META");
params.put("pagerefer", "");
params.put("savestate", "7");
params.put("servertime", servertime);
params.put("rsakv", rsakv);
params.put("service", "miniblog");
params.put("sp", getEncryptedP());
params.put("ssosimplelogin", "1");
params.put("su", getEncodedU());
params.put("url", "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack");
params.put("useticket", "1");
params.put("vsnf", "1");
HttpResponse response=HttpUtils.doPost(url, headers, params);
this.cookies=HttpUtils.getResponseCookies(response);
this.headers=headers;
String responseText=HttpUtils.getStringFromResponse(response);
try {
responseText=new String(responseText.getBytes(),"GBK");
if(!responseText.contains("retcode=0")){
downloadCheckImage();
this.nonce=getnonce();
Scanner s=new Scanner(System.in);
if(responseText.contains("retcode=4049"))
System.out.println("请输入验证码:");
else if(responseText.contains("retcode=2070")){
System.out.println("验证码不正确,请再次输入验证码:");
}
this.door=s.next();
dologinSina();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
System.out.println("Congratulations, you have login success!");
return responseText;
}
//登录后重定向
public String redirect(){
String cookieValue=HttpUtils.setCookie2String(this.cookies);
this.headers.clear();
this.headers.put("Accept", "image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
this.headers.put("Accept-Language", "zh-cn");
this.headers.put("Connection", "Keep-Alive");
this.headers.put("Host", "sina.com.cn");
this.headers.put("Referer", "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)");
this.headers.put("User", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; QQDownload 691)");
this.headers.put("Cookie", cookieValue);
String ssosavestate=""; //SSO即Sina Sign-on,
String ticket = "";
for(Cookie c:this.cookies){
if(c.getName().equals("ALF")){
ssosavestate=c.getValue();
}else if(c.getName().equals("tgc")){
ticket=c.getValue();
}
}
String url="http://weibo.com/ajaxlogin.php?" +
"framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack&" +
"sudaref=weibo.com";
HttpResponse response=HttpUtils.doGet(url, this.headers);
response=HttpUtils.doGet(url, this.headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//生成一次性的字符串 6位 用于加密
private String getnonce() {
String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String str = "";
for (int i = 0; i < 6; i++) {
str += x.charAt((int)Math.ceil(Math.random() * 1000000) % x.length());
}
return str;
}
//初始化:得到服务区的时间servertime和一次性字符串nonce
private void init(){
String url=compositeUrl();
Map<String,String> headers=new HashMap<String,String>();
headers.put("Accept", "*/*");
headers.put("Referer", "http://weibo.com/");
headers.put("Accept-Language", "zh-cn");
headers.put("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; QQDownload 691)");
headers.put("Host", "login.sina.com.cn");
headers.put("Connection", "Keep-Alive");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
int begin=responseText.indexOf("{");
int end=responseText.lastIndexOf("}");
responseText=responseText.substring(begin,end+1);
PreLoginResponseMessage plrmsg =JsonUtils.jsontoPreLoginResponseMessage(responseText);
this.nonce=plrmsg.getNonce();
this.servertime=plrmsg.getServertime()+"";
this.pubkey=plrmsg.getPubkey();
this.rsakv=plrmsg.getRsakv();
this.pcid=plrmsg.getPcid();
}
//下载验证码
private void downloadCheckImage() {
if(pcid==null) return;
this.headers.remove("Content-Type");
try {
if(this.cookies != null){
this.cookies.clear();
} } catch (Exception e) {
e.printStackTrace();
}
String cookieValue=HttpUtils.setCookie2String(this.cookies);
this.headers.put("Cookie", cookieValue);
String url="http://login.sina.com.cn/cgi/pin.php?r="+(long)(Math.random()*100000000)+"&s=0&p="+this.pcid;
HttpResponse response=HttpUtils.doGet(url, headers);
InputStream in=HttpUtils.getInputStreamFromResponse(response);
try {
//System.out.println(new ClassPathResource("checkImage.jpeg").getFile().getPath());
Utils.writeFileFromStream(new ClassPathResource("checkImage.jpeg").getFile().getPath(), in);
} catch (IOException e) {
e.printStackTrace();
}
}
//组合预登陆时的URL
private String compositeUrl(){
StringBuilder builder=new StringBuilder();
builder.append("http://login.sina.com.cn/sso/prelogin.php?")
.append("entry=weibo&callback=sinaSSOController.preloginCallBack&")
.append("su="+getEncodedU())
.append("&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.5)&_="+System.currentTimeMillis());
return builder.toString();
}
//对用户名进行编码
private String getEncodedU() {
if(username!=null && username.length()>0){
return Base64Encoder.encode(EncodeUtils.encodeURL(username,"utf-8").getBytes());
}
return "";
}
//对密码进行编码
private String getEncryptedP(){
// return EncodeSuAndSp.getEncryptedP(password, servertime, nonce);
String data=servertime+"\t"+nonce+"\n"+password;
String spT=rsaCrypt(pubkey, "10001", data);
return spT;
} public static String rsaCrypt(String pubkey, String exponentHex, String pwd,String servertime,String nonce) {
String data=servertime+"\t"+nonce+"\n"+pwd;
return rsaCrypt(pubkey,exponentHex,data);
} public static String rsaCrypt(String pubkey, String exponentHex, String messageg) {
KeyFactory factory=null;
try {
factory = KeyFactory.getInstance("RSA");
} catch (NoSuchAlgorithmException e1) {
return "";
}
BigInteger publicExponent = new BigInteger(pubkey, 16); /* public exponent */
BigInteger modulus = new BigInteger(exponentHex, 16); /* modulus */
RSAPublicKeySpec spec = new RSAPublicKeySpec(publicExponent, modulus);
RSAPublicKey pub=null;
try {
pub = (RSAPublicKey) factory.generatePublic(spec);
} catch (InvalidKeySpecException e1) {
return "";
}
Cipher enc=null;
byte[] encryptedContentKey =null;
try {
enc = Cipher.getInstance("RSA");
enc.init(Cipher.ENCRYPT_MODE, pub);
encryptedContentKey = enc.doFinal(messageg.getBytes());
} catch (NoSuchAlgorithmException e1) {
System.out.println(e1.getMessage());
return "";
} catch (NoSuchPaddingException e1) {
System.out.println(e1.getMessage());
return "";
} catch (InvalidKeyException e1) {
System.out.println(e1.getMessage());
return "";
} catch (IllegalBlockSizeException e1) {
System.out.println(e1.getMessage());
return "";
} catch (BadPaddingException e1) {
System.out.println(e1.getMessage());
return "";
}
return new String(Hex.encodeHex(encryptedContentKey));
}
public void setUserid(String userid) {
this.userid = userid;
} public String getUserid() {
return userid;
} public void setUserdomainname(String userdomainname) {
this.userdomainname = userdomainname;
} public String getUserdomainname() {
return userdomainname;
} }

Spider.sina类如下:

 package main.java.sina.httpclient;
import java.util.HashMap;
import java.util.List;
import java.util.Map; import org.apache.http.HttpResponse;
import org.apache.http.cookie.Cookie; import main.java.sina.utils.Constant;
import main.java.sina.utils.EncodeUtils;
import main.java.sina.utils.HttpUtils;
import main.java.sina.utils.Utils; public class SpiderSina {
private LoginSina ls;
private Map<String,String> headers;
private final int ADDFOLLOWING =1;
private final int CANCELFOLLOWING =2;
public SpiderSina(LoginSina ls){
this.ls=ls;
this.headers=new HashMap<String,String>();
headers.put("Accept", "text/html, application/xhtml+xml, */*");
headers.put("Accept-Language", "zh-cn");
headers.put("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; BOIE9;ZHCN");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "no-cache");
String cookieValue=HttpUtils.setCookie2String(ls.getCookies());
headers.put("Cookie", cookieValue);
}
public String getGroupCategory(){
String url="http://q.weibo.com/";
this.headers.put("Host", "q.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
public String search(String keyword, int pageNo){
String url="http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&page="+pageNo;
String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; un=shy_annan@126.com; myuid=5439352084; wvr=6; un=sm2014121904@126.com; _s_tentry=developer.51cto.com; SWB=usrmdinst_14; SUS=SID-5438576807-1419173757-GZ-lrze7-d8e1e3f082b428c12412c8ba30f0a6de; SUE=es%3D4cdfdd5d5f0f75141c092b32f89525a2%26ev%3Dv1%26es2%3D469e50c869315e57efeec3012c3bb6a8%26rs0%3DoWdG36CQ33LUEtKTvGn907Zy1mwFETvSVJsxeHEiaMPcKDB7pFxg596a2pLhFLJfQmswf4AvXYAkzTfemrYgWrz%252BQPustEA2wLNYufYpAZqFsGWanhTBq6elzB2yoZp41xcpy1WwXn1CuvzIzzEYpuILjHahkmJDQDQy6KaxlbA%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1419173757%26et%3D1419260157%26d%3Dc909%26i%3Da6de%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D27%26st%3D0%26uid%3D5438576807%26name%3Dsm2014121904%2540126.com%26nick%3DSocialMedia%25E5%259B%259B%25E5%25A8%2583%26fmp%3D%26lcp%3D; SUB=_2A255kq8tDeTxGeNK6FoU9yjEyzuIHXVa6DVlrDV8PUNbvtBeLW3TkW-bMoi0G_bBfpbS3TMqcXg6zDWFLA..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhGThsH46uNrx1VY0ApV0SR5JpX5KMt; ALF=1450709756; SSOLoginState=1419173757; WBStore=bc5ad8450c3f8a48|undefined; Apache=1027467835228.8901.1419173761694; ULV=1419173761704:6:6:1:1027467835228.8901.1419173761694:1418797827169; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn; ULOGIN_IMG=14192385783486";
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
//headers.put("Accept-Encoding", "gzip, deflate, sdch");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "max-age=0");
headers.put("Referer", "http://login.sina.com.cn/sso/login.php?url=http%3A%2F%2Fs.weibo.com%2Fweibo%2F%2525E6%252583%2525A0%2525E6%252599%2525AE%26page%3D2&_rand=1419173756.6387&gateway=1&service=weibo&entry=miniblog&useticket=1&returntype=META");
headers.put("Cookie", cookieValue);
this.headers.put("Host", "s.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText); return responseText;
} public String searchCommentsByUid(String uid){ String url="http://www.weibo.com/u/"+uid;
String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; myuid=2035860051; wvr=6; YF-Ugrow-G0=ad06784f6deda07eea88e095402e4243; SSOLoginState=1423150079; YF-V5-G0=32eb5467e9bfc8b60c2d771056535ac5; _s_tentry=www.weibo.com; Apache=6264929557219.147.1423150103832; ULV=1423150103842:18:2:2:6264929557219.147.1423150103832:1422769721265; ULOGIN_IMG=1423233797946; YF-Page-G0=82cdcdfb16327a659fbb60cc9368fb19; SUS=SID-2035860051-1423286223-GZ-jdkh4-c8ea11de0a42151313986e52f9aa6017; SUE=es%3D8701ff5aca59244ff1ff263cf985bee6%26ev%3Dv1%26es2%3D7995c9eb7455697c09fac4f7486e14eb%26rs0%3DTyXXIRjcEw%252BeS5PaVSM%252FhQjc2JGhKBOe3uFTgShiIUAbPFI2eKtrgxM2wIi9A1xndiTFFM72zY%252FDKYFXONrgkao5cRo%252FHkydV%252FnaQjNmXoeESu5gi6Iq0aX883NhGR0utBVNZb5XaIG3X6HMMfBJC%252B7pnVHogEo8eD6cx8nzN5c%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1423286223%26et%3D1423372623%26d%3Dc909%26i%3D6017%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D0%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A2550e-fDeTxGeRO6FcZ9i7Mzj2IHXVap0ZXrDV8PUNbvtBuLWnTkW-gBGVORTA7J_lSZzAqzW6E50JjBQ..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; SUHB=0M20OGRPiOKzyc; ALF=1454822222; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn";
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "max-age=0");
headers.put("Cookie", cookieValue);
this.headers.put("Host", "www.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
//爬虫根据关键字,查询时间断,和查询页数 来得到htmlContent
public String search(String keyword, int pageNo, String fromdate,String todate){
StringBuffer stringBuffer = new StringBuffer(200);
stringBuffer.append("http://s.weibo.com/weibo/"+ keyword +"&page=");
stringBuffer.append(pageNo);
stringBuffer.append("&typeall=1&suball=1&timescope=custom:");
stringBuffer.append(fromdate);
stringBuffer.append(":");
stringBuffer.append(todate);
stringBuffer.append("&Refer=g");
String url = stringBuffer.toString();
String cookieValue = headers.get("Cookie");
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
//headers.put("Accept-Encoding", "gzip, deflate, sdch");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
headers.put("Cache-Control", "max-age=0");
headers.put("Referer", "http://s.weibo.com/weibo/%25E5%25AE%2581%25E6%25B3%25A2%25E5%25A4%25A7%25E5%25AD%25A6&typeall=1&suball=1&timescope=custom:"+fromdate+":"+todate+"&Refer=g");
headers.put("Cookie", cookieValue);
this.headers.put("Host", "s.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText); System.out.println("************htmlContent start***********");
System.out.println(responseText);
System.out.println("************htmlContent end***********");
return responseText;
}
public void forwardToWeiboPage(){
String url = Constant.personalHomePage;
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
headers.put("Accept-Language", "zh-CN");
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
headers.put("Connection", "Keep-Alive");
this.headers.put("Host", "s.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
List<Cookie> cookies = HttpUtils.getResponseCookies(response);
String cookie = HttpUtils.setCookie2String(cookies);
headers.put("Cookie", cookie);
}
public String getGroupCategory(int id){
String url="http://q.weibo.com/class/category/?id="+id;
this.headers.put("Host", "q.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
//得到微群管理员ID信息,其实用户成员的第一页 HTML页面
public String getGroupAdministrator(String groupid) {
String url="http://q.weibo.com/"+groupid+"/members/all";
this.headers.remove("Referer");
this.headers.put("Host", "q.weibo.com");
this.headers.remove("Content-Type");
this.headers.remove("x-requested-with");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//根据微群号和页号得到群成员ID信息 -----JSON格式数据
public String getGroupMembers(String groupid,int pagenumber){
this.headers.put("Referer", "http://q.weibo.com/"+groupid+"/members/all");
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Map<String,String> params=new HashMap<String,String>();
params.put("_t", "0");
params.put("page", pagenumber+"");
params.put("gid", groupid);
params.put("query","");
params.put("tab", "all");
params.put("vip", "1");
String url="http://q.weibo.com/ajax/members/page";
HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
}
/*
* 得到微群中微博信息 经过多次尝试成功
* 每次获得50个微博记录,page是页号, count值50 可以在1-75之间,但是,每次开始的时候还是从50的倍数开始的
*/
public String getGroupTopic(int page,int count,String gid){
this.headers.put("Referer", "http://q.weibo.com/"+gid);
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Integer pre_page=1;
if(page==1){
pre_page=2;
}else{
pre_page=page-1;
}
Map<String,String> params=new HashMap<String,String>();
params.put("_k", System.currentTimeMillis()+"");
params.put("_t", "0");
params.put("count", count+"");
//params.put("end_id", end_id);
params.put("gid", gid);
params.put("is_search","");
params.put("key_word", "");
params.put("me", "0");
params.put("mids", "");
params.put("new", "0");
params.put("page", page+"");
params.put("pagebar", "0");
params.put("pre_page", pre_page+"");
params.put("since_id", "0");
params.put("uid", "0"); String url="http://q.weibo.com/ajax/mblog/groupfeed";
HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
}
/*
* 得到微群中微博信息数目
* 这个信息中其实还包含了微群的所有的基本信息~~~~~~~~~~****** json格式的数据信息
*/
public String getGroupMessageNumber(String gid){
this.headers.put("Referer", "http://q.weibo.com/"+gid);
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
String url="http://q.weibo.com/ajax/rightnav/groupprofile?gid="+gid+"&_t=0&__rnd="+System.currentTimeMillis();
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
}
//得到微群的主页信息 HTML页码 主要是为了得到第一条微博记录的MID值
public String getgroupMainPage(String groupid) {
String url="http://q.weibo.com/"+groupid+"?topnav=1";
this.headers.remove("Referer");
this.headers.put("Host", "q.weibo.com");
this.headers.remove("Content-Type");
this.headers.remove("x-requested-with"); HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
/*
* 根据分类得到微群信息
* categroyID :分类ID号
* pagenumber:页号
* sort:分类方式 1 按成员人数 2按 微群博数 3按创建时间分类
* count:每页的记录数目
*/
public String getGroupByCategroy(int categroyID,int pagenumber,int sort,int count){
this.headers.put("Referer", "http://q.weibo.com/class/category/?id="+categroyID);
this.headers.put("Host", "q.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Map<String,String> params=new HashMap<String,String>();
params.put("_t", "0");
params.put("page", pagenumber+"");
params.put("id", categroyID+"");
params.put("sort",sort+"");
params.put("count", count+""); String url="http://q.weibo.com/ajax/class/category";
HttpResponse response=HttpUtils.doPost(url, headers,params);
String responseText=HttpUtils.getStringFromResponse(response);
responseText=EncodeUtils.unicdoeToGB2312(responseText);
return responseText;
}
//得到表情列表信息
public String getFaceList(){
String url="http://weibo.com/aj/mblog/face?type=face&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://weibo.com/");
this.headers.put("Host", "weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest"); HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
System.out.println(responseText);
Utils.writeFileFromString("tmpFile/faceList.txt", responseText);
return responseText;
}
//用户基本信息 主要是将要解析用户主页下方经过编码后的内容
public String getMemberInfo(String memberID){
String url="http://weibo.com/"+memberID+"/info";
this.headers.put("Host", "weibo.com");
this.headers.put("Referer", "http://weibo.com/u/"+memberID);
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//用户粉丝用户信息 html页面,每次20个
public String getMemberFans(String memberID,int page){
String url="http://weibo.com/"+memberID+"/fans?&uid=1689219395&tag=&page="+page;
this.headers.put("Host", "weibo.com");
this.headers.put("Referer", "http://weibo.com/"+memberID+"/fans");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
}
//用户关注的用户信息 html页面
public String getMemberFollowing(String memberID,int page){
String url="http://weibo.com/"+memberID+"/follow?page="+page;
this.headers.put("Host", "weibo.com");
this.headers.put("Referer", "http://weibo.com/"+memberID+"/follow");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
} /*
* @params
* memberID:是用户ID
* max_id:每次AJAX获得数据时上面一次的最后一个ID值
* end_id:用户最新的一条微博的ID值
* k:一个随机数
* page:页号
* pre_page:前一页
* count:每次返回的数值 当max_id为null是 count=50 否则为15
* pagebar:ajax时,第一次为0,第二次为1
* 注意:
* 1 用此请求,每次获得的数据格式都一样,用同样的解析方法来进行解析。
* 2 每次一页可以获得总共45条记录,需要三次请求。每次请求可获得15条记录。
* 3 max_id可以不用到,直接等于 end_id就可以了.
* 4 第一次请求时可以将end_id设置为NUll,即为第一次时翻页时的请求后边的滚动时必须有end_id参数,end_id为第一页的第一条ID即可。
*/
//获得用户发布的微博信息 json格式的数据
public String getMemberReleaseTopic(String memberID,String end_id,Integer page,Integer pagebar){
String url="";
Integer pre_page=1;
Integer count=0;
String k=System.currentTimeMillis()+""+(int)(Math.random()*100000)%100;
if(end_id==null){
count=50;
if(page==1){
pre_page=2;
}else{
pre_page=page-1;
}
url="http://weibo.com/aj/mblog/mbloglist?" +
"page="+page+"&count="+count+"&pre_page="+pre_page+"&" +
"_k="+ k+"&uid="+memberID+
"&_t=0&__rnd="+System.currentTimeMillis();
}else{
count=15;
pre_page=page;
url="http://weibo.com/aj/mblog/mbloglist?" +
"page="+page+"&count="+count+"&max_id="+end_id+"&" +
"pre_page="+pre_page+"&end_id="+end_id+"&" +
"pagebar="+pagebar+"&_k="+k+"&" +
"uid="+memberID+"&_t=0&__rnd="+System.currentTimeMillis();
}
String cookieValue = "SINAGLOBAL=8556698272004.724.1417744632425; un=sm2014121903@126.com; myuid=5439352084; YF-Ugrow-G0=4703aa1c27ac0c4bab8fc0fc5968141e; SSOLoginState=1421374583; wvr=6; YF-V5-G0=8c4aa275e8793f05bfb8641c780e617b; _s_tentry=login.sina.com.cn; Apache=2461283528245.9854.1421374588453; ULV=1421374588550:13:5:3:2461283528245.9854.1421374588453:1421210767499; UOR=www.ilehao.com,widget.weibo.com,login.sina.com.cn; SUS=SID-2035860051-1421462085-GZ-7jcgb-1539d643bae5195fb7f792b2ae77befb; SUE=es%3Df15e11ed09b6a0108a28adfa58609b78%26ev%3Dv1%26es2%3Da0f706efac5c89495062648a4de3e337%26rs0%3DZBxlOUv0mhmxyHfOVmZ3tH7tNvAp08BjPeLUJPdu9WzG38Dsm40px%252Bd9w21ycDpZQwBK3q0prFfNs%252F8ZuZSasa1eps%252FOGNxJ3CIHN8JN%252Fik6gVpIPgVeeRdalNWTIbth6hLa34uOp%252BXii%252Bxeib%252BvINsr%252FdOvQx6kjp6fsC44QXc%253D%26rv%3D0; SUP=cv%3D1%26bt%3D1421462085%26et%3D1421548485%26d%3Dc909%26i%3Dbefb%26us%3D1%26vf%3D0%26vt%3D0%26ac%3D2%26st%3D0%26uid%3D2035860051%26name%3Dshy_annan%2540126.com%26nick%3D%25E7%2594%25A8%25E6%2588%25B72035860051%26fmp%3D%26lcp%3D2013-08-18%252021%253A48%253A10; SUB=_2A255vboVDeTxGeRO6FcZ9i7Mzj2IHXVazdpdrDV8PUNbvtBuLVj-kW91jmbQSGo7Rn30RVvGP5KOgBgNgQ..; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh7oKNCGYcNnhlC6eqqQbbl5JpX5KMt; ALF=1452998078; ULOGIN_IMG=14214638933178; YF-Page-G0=0acee381afd48776ab7a56bd67c2e7ac";
headers.put("Cookie", cookieValue);
this.headers.put("Referer", "http://weibo.com/u/"+memberID);
this.headers.put("Host", "www.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
url = "http://weibo.com/u/"+memberID;
HttpResponse response=HttpUtils.doGet(url, headers);
if(response==null){
return "";
}
return HttpUtils.getStringFromResponse(response);
}
/*
* ~~~~~~~~~~~~~~~~~~~~~获取用户的一些信息~~~end~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/ //********************************************************************************** /*
* 名人堂与达人信息
*/
public String getVerified(String url){
this.headers.put("Host", "verified.weibo.com");
this.headers.put("Referer", "http://plaza.weibo.com/?topnav=1&wvr=4");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response);
return responseText;
} public String getVerifiedMember(String path,Integer g_index){
String url="http://verified.weibo.com/aj/getgrouplist?g_index="+g_index+
"&path="+path+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Host", "verified.weibo.com");
this.headers.put("Referer", path);
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
String responseText=HttpUtils.getStringFromResponse(response); return responseText;
} public String setArea(Integer provinceID){
this.headers.put("Referer", "http://club.weibo.com/list");
this.headers.put("Host", "club.weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest"); Map<String,String> params=new HashMap<String,String>(); params.put("_t", "0");
params.put("city", "1000");
params.put("prov", provinceID+""); String url="http://club.weibo.com/ajax_setArea.php";
HttpResponse response=HttpUtils.doPost(url, headers, params); List<Cookie> cks=HttpUtils.getResponseCookies(response);
List<Cookie> cookies=ls.getCookies();
cookies.addAll(cks);
String cookieValue=HttpUtils.setCookie2String(cookies);
this.headers.put("Cookie", cookieValue); return HttpUtils.getStringFromResponse(response);
} public String getDaRen(Integer page){
String op="ltime";
String url="http://club.weibo.com/list?sex=3&op="+op+"&page="+page+"&";
Integer pre_page=(page<=1? 2:page-1);
this.headers.put("Host", "club.weibo.com");
this.headers.put("Referer", "http://club.weibo.com/list?sex=3&op=ltime&page="+pre_page+"&");
this.headers.remove("Content-Type");
this.headers.remove("x-requested-with"); HttpResponse response=HttpUtils.doGet(url, headers);
if(response!= null){
return HttpUtils.getStringFromResponse(response);
}
return ""; }
//发布一条文字微博
public String releaseTopic(String content){
this.headers.put("Referer", "http://weibo.com/");
this.headers.put("Host", "weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("x-requested-with", "XMLHttpRequest");
Map<String,String> params=new HashMap<String,String>();
params.put("_t", "0");
params.put("location", "home");
params.put("module", "stissue");
params.put("pic_id", "");
params.put("text", content);
String url="http://weibo.com/aj/mblog/add?__rnd="+System.currentTimeMillis();
HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
} //得到自己关注的成员
public String getSelfFollowIngs(){
return "";
}
//得到自己的粉丝
public String getSelfFollowers(){
return "";
}
//得到自己加入的微群
public String getSelfJoinedGroups(){
return "";
}
//得到自己的标签
public String getSelfTags(){
return "";
}
//得到自己发布的微博
public String getSelfReleaseTopics(){
return "";
}
//得到自己主页的微博
public String getSelfPageTopics(){
return "";
}
//关注一个人
public String addFollowing(String memberid){
return addorcancleFollowing(memberid,this.ADDFOLLOWING);
}
//取消关注一个人
public String cancelFollowing(String memberid){
return addorcancleFollowing(memberid,this.CANCELFOLLOWING);
}
private String addorcancleFollowing(String memberid,int option){
String url="";
switch(option){
case ADDFOLLOWING:
url="http://weibo.com/aj/f/followed?__rnd="+System.currentTimeMillis();
break;
case CANCELFOLLOWING:
url="http://weibo.com/aj/f/unfollow?__rnd="+System.currentTimeMillis();
break;
} Map<String,String> params=new HashMap<String,String>(); this.headers.put("Referer", "http://weibo.com/");
this.headers.put("Host", "weibo.com");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("Referer", "http://weibo.com/");
this.headers.put("x-requested-with", "XMLHttpRequest"); params.put("_t", "0");
params.put("f", "1");
params.put("location", "profile");
params.put("refer_flag", "");
params.put("refer_sort", "profile");
params.put("uid", memberid); HttpResponse response=HttpUtils.doPost(url, headers, params);
return HttpUtils.getStringFromResponse(response);
}
/**
* 得到的标签信息 调用一次10个
* @return
*/
public String getTags(){
String url="http://account.weibo.com/set/aj/tagsuggest?__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://account.weibo.com/set/tag#");
this.headers.put("Host", "account.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 得到微博热词信息
* @param k :热词的门类
*/
public String getHotWords(String k){
String url="http://data.weibo.com/top/keyword?k="+k;
try{
Integer.parseInt(k);
}catch(Exception ex){
url="http://data.weibo.com/top/keyword?t="+k;
}
this.headers.put("Referer", "http://data.weibo.com/top/keyword");
this.headers.put("Host", "data.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 得到微博热帖子
* @param cat 表示热帖门类
* @param page 表示页号
*/
public String getHotWeibo(String cat,int page){
String url="http://data.weibo.com/hot/ajax/catfeed?page="+page+"&cat="+cat+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://data.weibo.com/hot/minibloghot");
this.headers.put("Host", "data.weibo.com");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 按照分类获取 微博吧名字 第一步
*/
public String getWeiBar(String ctgid,int p){
String sort="post";
String url="http://weiba.weibo.com/aj_f/CategoryList?sort="+sort+"&p="+p+"&ctgid="+ctgid+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://weiba.weibo.com/ct/"+ctgid);
this.headers.put("Host", "weiba.weibo.com");
this.headers.put("Accept", "*/*");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("X-Requested-With", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
}
/**
* 根据微博吧 名称 ,得到该吧内的所有帖子标题 第二步
*/
public String getWeiBarByWeibarName(String bid,int p){
String url="http://weiba.weibo.com/aj_t/postlist?bid="+bid+"&p="+p+"&_t=all&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://weiba.weibo.com/");
this.headers.put("Host", "weiba.weibo.com");
this.headers.put("Accept", "*/*");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("X-Requested-With", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
} /**
* 新浪微公益名单
* type ="donate"
* type="discuss"
*/
public String getWeiGongYiMember(int page,int projectID,String type){
String url="http://gongyi.weibo.com/aj_personal_helpdata?page="+page+"&type="+type+"&project_id="+projectID+"&_t=0&__rnd="+System.currentTimeMillis();
this.headers.put("Referer", "http://gongyi.weibo.com/"+projectID);
this.headers.put("Host", "gongyi.weibo.com");
this.headers.put("Accept", "*/*");
this.headers.put("Content-Type", "application/x-www-form-urlencoded");
this.headers.put("X-Requested-With", "XMLHttpRequest");
HttpResponse response=HttpUtils.doGet(url, headers);
return HttpUtils.getStringFromResponse(response);
}
}

用java实现新浪爬虫,代码完整剖析(仅针对当前SinaSignOn有效)的更多相关文章

  1. Java 模拟新浪登录 2016

    想学习一下网络爬虫.涉及到模拟登录,查阅了一番资料以后发现大部分都有点过时了,就使用前辈们给的经验,Firefox抓包调试,採用httpclient模拟了一下新浪登录. 不正确之处多多包括.须要的能够 ...

  2. 【python网络编程】新浪爬虫:关键词搜索爬取微博数据

    上学期参加了一个大数据比赛,需要抓取大量数据,于是我从新浪微博下手,本来准备使用新浪的API的,无奈新浪并没有开放关键字搜索的API,所以只能用爬虫来获取了.幸运的是,新浪提供了一个高级搜索功能,为我 ...

  3. java调用新浪接口根据Ip查询所属地区

    import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import ...

  4. 新浪天气api

    package com.smartdot.dcu; /** * java获取新浪天气预报代码 */ import java.io.FileNotFoundException; import java. ...

  5. android新浪分享实例

    新浪分享比较简单,新浪有提供完整的demo. android实现新浪的分享功能,分3种分享情况: 纯文本的,带图片的,图片为本地图片(传入的是图片在手机的地址),第2种带图片的是,网络图片,图片地址为 ...

  6. Java 8新特性-4 方法引用

    对于引用来说我们一般都是用在对象,而对象引用的特点是:不同的引用对象可以操作同一块内容! Java 8的方法引用定义了四种格式: 引用静态方法     ClassName :: staticMetho ...

  7. Thinkcmf 在新浪云上的部署问题

    最近要开发一个社团主页,于是想到了CMF内容管理系统的,但是直接在自己的服务器测试成本太高,于是选择了在新浪云上进行部署测试. 但是在安装Thinkcmf的过程中产生了一些技术性的问题.但最后终于在自 ...

  8. Java编程的逻辑 (90) - 正则表达式 (下 - 剖析常见表达式)

    本系列文章经补充和完善,已修订整理成书<Java编程的逻辑>,由机械工业出版社华章分社出版,于2018年1月上市热销,读者好评如潮!各大网店和书店有售,欢迎购买,京东自营链接:http:/ ...

  9. python3.4学习笔记(十四) 网络爬虫实例代码,抓取新浪爱彩双色球开奖数据实例

    python3.4学习笔记(十四) 网络爬虫实例代码,抓取新浪爱彩双色球开奖数据实例 新浪爱彩双色球开奖数据URL:http://zst.aicai.com/ssq/openInfo/ 最终输出结果格 ...

随机推荐

  1. Atitit.软件中见算法 程序设计五大种类算法

    Atitit.软件中见算法 程序设计五大种类算法 1. 算法的定义1 2. 算法的复杂度1 2.1. Algo cate2 3. 分治法2 4. 动态规划法2 5. 贪心算法3 6. 回溯法3 7. ...

  2. Atitit  图像处理底色变红的解决

    Atitit  图像处理底色变红的解决 1.1. 原因  ImageIO  bug ,alpha通道应该在保存jpg的时候排除1 1.2. 解决,自己移除alpha通道即可1 2. Image sav ...

  3. Atitit 图像处理--图像分类 模式识别 肤色检测识别原理 与attilax的实践总结

    Atitit 图像处理--图像分类 模式识别 肤色检测识别原理 与attilax的实践总结 1.1. 五中滤镜的分别效果..1 1.2. 基于肤色的图片分类1 1.3. 性能提升2 1.4. --co ...

  4. fir.im Weekly - 做一款 App 需要考虑什么

    开发 App 是一个痛并快乐的旅程.工具越来越多,成本也越来越低,那么在开发之前需要考虑些什么?limboy 总结了一些经验和反思--做一个 App 前需要考虑的几件事,参考一下为你的 App 多留点 ...

  5. 练习2 练习目标-使用引用类型的成员变量:在本练习中,将扩展银行项目,添加一个(客户类)Customer类。Customer类将包含一个Account对象。

    package banking; public class Customer { private String firstName; private String lastName; private ...

  6. 快速入门系列--Log4net日志组件

    Log4net是阿帕奇基金会的非常流行的开源日志组件,是log4j的.NET移植版本,至今已经有11年的历史,使用方便并且非常稳定,此外很重要的一点是其和很多开源组件能很好的组合在一起工作,例如NHi ...

  7. Mina、Netty、Twisted一起学(七):发布/订阅(Publish/Subscribe)

    消息传递有很多种方式,请求/响应(Request/Reply)是最常用的.在前面的博文的例子中,很多都是采用请求/响应的方式,当服务器接收到消息后,会立即write回写一条消息到客户端.HTTP协议也 ...

  8. Zepto中文API

    原文地址:http://zeptojs.com/ 译文地址:http://www.html-5.cn/Manual/Zepto/ Zepto是一个轻量级的针对现代高级浏览器的JavaScript库,  ...

  9. HTML&CSS学习总结(一)

    上周用了一周的时间,周一到周五平均每天2-3小时,周六.周日每天各8小时,看网易云课堂燕十八的HTML+div+CSS视频,感觉还不错,按照视频的讲课思路大概做个总结吧. 基本思路:从大的方面(整体结 ...

  10. SQLServer学习笔记系列10

    一.写在前面的话 生活的路很长,还是要坚持走下去,自己选择的生活,就该让这样的生活放射精彩!我不奢求现在的积累,在将来能够收获多少,至少在以后的日子里回忆起来,我不曾放弃过,我坚持过,我不后悔!最近跟 ...