1. package step3;
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.BufferedWriter;
  5. import java.io.File;
  6. import java.io.FileReader;
  7. import java.io.FileWriter;
  8. import java.io.IOException;
  9. import java.io.InputStream;
  10. import java.io.InputStreamReader;
  11. import java.io.PrintWriter;
  12. import java.sql.ResultSet;
  13. import java.sql.SQLException;
  14. import java.sql.Statement;
  15. import java.util.ArrayList;
  16. import java.util.Calendar;
  17. import java.util.List;
  18.  
  19. import org.apache.commons.httpclient.HttpClient;
  20. import org.apache.commons.httpclient.methods.GetMethod;
  21. import org.apache.commons.httpclient.methods.PostMethod;
  22. import org.json.JSONObject;
  23. import org.jsoup.Jsoup;
  24. import org.jsoup.nodes.Document;
  25. import org.jsoup.select.Elements;
  26.  
  27. import bean.Porn;
  28. import util.DBConnection;
  29.  
  30. /**
  31. *
  32. * @ClassName: quhao
  33. * @Description: 91porn地址解析
  34. * @author zeze
  35. * @date 2016年06月30日 下午7:55:31
  36. *
  37. */
  38. public class porn91 {
  39.  
  40. private static String cookie = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMQ.INJmWYMZ8T220CgsSTcfpHhTxXI";
  41. private static String cookie2 = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMw.6MGC1jX7mgjsChpGFBd-xHTv9ZU";
  42.  
  43. private static String Token = "1467296187##60ecf40d9328862cc6cd6a478adfc72ee0554050";
  44.  
  45. private static String Url = "http://freeget.co/video/extraction";
  46. private static String url001 = null;
  47. private static String dirfile = "F:/91porn/91url.csv";
  48. private static String destfile = "F:/91porn/data.txt";
  49.  
  50. private static int cnt0 = 0;
  51.  
  52. private static String num = null;
  53. private static String title = null;
  54. private static String time = null;
  55. private static String longtime = null;
  56. private static String viewnum = null;
  57. private static String Parurl = null;// "http://www.91porn.com/view_video.php?viewkey=c5ec60d0da8c8fbdb180&page=4&viewtype=basic&category=mr";
  58.  
  59. public static void main(String[] args) throws InterruptedException {
  60.  
  61. File file = new File(dirfile);
  62. FileReader reader = null;
  63. BufferedReader br = null;
  64. try {
  65. reader = new FileReader(file);
  66. br = new BufferedReader(reader);
  67. String str = null;
  68. String[] strArr = null;
  69. int cnt = 0;
  70. while ((str = br.readLine()) != null) {
  71. // System.out.println(str);
  72. strArr = str.split(",");
  73. if (strArr.length != 7)
  74. continue;
  75. num = strArr[0];
  76. title = strArr[1];
  77. time = strArr[2];
  78. longtime = strArr[4];
  79. viewnum = strArr[5];
  80. Parurl = strArr[6];
  81. cnt++;
  82. System.out.println(num + "," + title + "," + time);
  83. func_step1();
  84. }
  85. System.out.println("采集结束,总共:" + cnt + "条,成功写入" + cnt0 + "条");
  86.  
  87. } catch (Exception e) {
  88. // TODO: handle exception
  89. e.printStackTrace();
  90. } finally {
  91. if (br != null) {
  92. try {
  93. br.close();
  94. } catch (Exception e2) {
  95. // TODO: handle exception
  96. e2.printStackTrace();
  97. }
  98. }
  99. if (reader != null) {
  100. try {
  101. reader.close();
  102. } catch (Exception e2) {
  103. // TODO: handle exception
  104. e2.printStackTrace();
  105. }
  106. }
  107. }
  108.  
  109. }
  110.  
  111. private static void func_step1() {
  112. HttpClient httpClient = new HttpClient();
  113. try {
  114. PostMethod postMethod = new PostMethod(Url);
  115. postMethod.getParams().setContentCharset("utf-8");
  116. // 每次访问需授权的网址时需 cookie 作为通行证
  117. postMethod.setRequestHeader("cookie", cookie);
  118. postMethod.setRequestHeader("X-CSRFToken", Token);
  119. postMethod.setRequestHeader("Accept-Language", "zh-CN,zh;q=0.8");
  120. postMethod.setRequestHeader("Host", "freeget.co");
  121. postMethod.setRequestHeader("Referer", "http://freeget.co/");
  122. postMethod.setRequestHeader("User-Agent",
  123. "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
  124. postMethod.setParameter("url", Parurl);
  125. int statusCode = httpClient.executeMethod(postMethod);// 返回状态码200为成功,500为服务器端发生运行错误
  126. System.out.println("返回状态码:" + statusCode);
  127. // 打印出返回数据,检验一下是否成功
  128. String result = postMethod.getResponseBodyAsString();
  129. if (statusCode == 200) {
  130. // 解析成功,取得token和view_key
  131. JSONObject a = new JSONObject(result);
  132. url001 = "http://freeget.co/video/" + a.get("view_key") + "/" + a.get("token");
  133. System.out.println("视频解析地址:" + url001);
  134. func_step2(url001);
  135. }
  136. } catch (Exception e) {
  137. e.printStackTrace();
  138. }
  139. }
  140.  
  141. private static void func_step2(String url) {
  142. HttpClient httpClient = new HttpClient();
  143. try {
  144. GetMethod getMethod = new GetMethod(url);
  145. getMethod.getParams().setContentCharset("utf-8");
  146. getMethod.setRequestHeader("cookie", cookie2);
  147. getMethod.setRequestHeader("Accept-Language", "zh-cn");
  148. getMethod.setRequestHeader("User-Agent",
  149. "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
  150. int statusCode = httpClient.executeMethod(getMethod);// 返回状态码200为成功,500为服务器端发生运行错误
  151. // System.out.println("返回状态码:" + statusCode);
  152. // 打印出返回数据,检验一下是否成功
  153. InputStream inputStream = getMethod.getResponseBodyAsStream();
  154. BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
  155. StringBuffer stringBuffer = new StringBuffer();
  156. String str = "";
  157. while ((str = br.readLine()) != null) {
  158. stringBuffer.append(str);
  159. }
  160. if (statusCode == 200) {
  161. Document doc = Jsoup.parse(stringBuffer.toString());
  162. Elements name = doc.select("a");
  163. String playurl = name.get(4).text();
  164. System.out.println("在线播放地址:" + playurl);
  165. writefile(playurl);
  166. cnt0++;
  167. }
  168. } catch (Exception e) {
  169. e.printStackTrace();
  170. }
  171. }
  172.  
  173. private static void writefile(String url) {
  174. FileWriter fw = null;
  175. BufferedWriter bw = null;
  176. PrintWriter pw = null;
  177. try {
  178. fw = new FileWriter(new File(destfile), true);
  179. bw = new BufferedWriter(fw);
  180. pw = new PrintWriter(bw);
  181. pw.write(num + ',' + title + ',' + time + ',' + longtime + ',' + viewnum + ',' + url + "\r\n");
  182. } catch (IOException e) {
  183. // TODO Auto-generated catch block
  184. e.printStackTrace();
  185. } finally {
  186. if (pw != null) {
  187. pw.close();
  188. }
  189. if (bw != null) {
  190. try {
  191. bw.close();
  192. } catch (IOException e) {
  193. // TODO Auto-generated catch block
  194. e.printStackTrace();
  195. }
  196. }
  197. if (fw != null) {
  198. try {
  199. fw.close();
  200. } catch (IOException e) {
  201. // TODO Auto-generated catch block
  202. e.printStackTrace();
  203. }
  204. }
  205. }
  206. }
  207.  
  208. public List<Porn> QueryAllBook() {
  209. java.sql.Connection connection = DBConnection.getConnection();
  210. String sql = "select * from porn where status=0";
  211. java.sql.PreparedStatement pstmt = DBConnection.getPreparedStatement(connection, sql);
  212. List<Porn> pornlist = new ArrayList<Porn>();
  213. System.out.println(sql);
  214. try {
  215. Statement stmt = connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY);
  216. java.sql.ResultSet rs = stmt.executeQuery(sql);
  217. while (rs.next()) {
  218. Porn porn = new Porn();
  219. porn.setNum(rs.getString(1));
  220. porn.setTitle(rs.getString(2));
  221. porn.setTime(rs.getString(3));
  222. porn.setViewkey(rs.getString(4));
  223. porn.setLongtime(rs.getString(5));
  224. porn.setViewnum(rs.getString(6));
  225. porn.setParurl(rs.getString(7));
  226. pornlist.add(porn);
  227. }
  228. rs.last();
  229. } catch (SQLException e) {
  230. e.printStackTrace();
  231. } finally {
  232. DBConnection.close(connection, pstmt, null);
  233. }
  234. return pornlist;
  235. }
  236. }

Java解析采集模块的更多相关文章

  1. java解析xml的三种方法

    java解析XML的三种方法 1.SAX事件解析 package com.wzh.sax; import org.xml.sax.Attributes; import org.xml.sax.SAXE ...

  2. atitit.java解析sql语言解析器解释器的实现

    atitit.java解析sql语言解析器解释器的实现 1. 解析sql的本质:实现一个4gl dsl编程语言的编译器 1 2. 解析sql的主要的流程,词法分析,而后进行语法分析,语义分析,构建sq ...

  3. java 解析XML文档

    Java 解析XML文档 一.解析XML文档方式: 1.DOM方式:将整个XML文档读取到内存中,按照XML文件的树状结构图进行解析. 2.SAX方式:基于事件的解析,只需要加载XML中的部分数据,优 ...

  4. Java 解析 XML

    Java 解析 XML 标签: Java基础 XML解析技术有两种 DOM SAX DOM方式 根据XML的层级结构在内存中分配一个树形结构,把XML的标签,属性和文本等元素都封装成树的节点对象 优点 ...

  5. JAVA解析XML的四种方式

    java解析xml文件四种方式 1.介绍 1)DOM(JAXP Crimson解析器) DOM是用与平台和语言无关的方式表示XML文档的官方W3C标准.DOM是以层次结构组织的节点或信息片断的集合.这 ...

  6. JSON 之JAVA 解析

    一.   JSON (JavaScript Object Notation)一种简单的数据格式,比xml更轻巧. Json建构于两种结构:     1.“名称/值”对的集合(A collection ...

  7. Java解析XML汇总(DOM/SAX/JDOM/DOM4j/XPath)

    [目录] 一.[基础知识——扫盲] 二.[DOM.SAX.JDOM.DOM4j简单使用介绍] 三.[性能测试] 四.[对比] 五.[小插曲XPath] 六.[补充] 关键字:Java解析xml.解析x ...

  8. Java 解析epub格式电子书,helloWorld程序,附带源程序和相关jar包

    秀才坤坤出品 一.epub格式电子书 相关材料和源码均在链接中可以下载:http://pan.baidu.com/s/1bnm8YXT 包括 1.JAVA项目工程test_epub,里面包括了jar包 ...

  9. Java解析XML文档(简单实例)——dom解析xml

      一.前言 用Java解析XML文档,最常用的有两种方法:使用基于事件的XML简单API(Simple API for XML)称为SAX和基于树和节点的文档对象模型(Document Object ...

随机推荐

  1. codeforces 711B - Chris and Magic Square(矩阵0位置填数)

    题目链接:http://codeforces.com/problemset/problem/711/B 题目大意: 输入 n ,输入 n*n 的矩阵,有一个占位 0 , 求得将 0 位置换成其他的整数 ...

  2. thinkphp删除

    $result = M('content')->where('id>0')->delete $result =M('content')->where(array('id'=&g ...

  3. Index/Common目录下文件

    1.在Common目录下创建Common.php(系统会自动加载Common.php) 代码: function say(){ echo '; } 在IndecAction.php输出 public ...

  4. memcache相同主域名下的session共享

    本配置适合具有相同主域名的多台服务器进行session共享. 例如:www.lee.com , bbs.lee.com(多个子域名). 配置session保存在memcache: ini_set(&q ...

  5. editplus的配置和使用

    editplus以及其他所有软件的 "页" 是一个什么概念? 所谓 页 : 是指 当前 你看到的 "客户区" client 的区域大小. 如果窗口越小, 那么你 ...

  6. CF449B Jzzhu and Cities (最短路)

    CF449B CF450D http://codeforces.com/contest/450/problem/D http://codeforces.com/contest/449/problem/ ...

  7. hdu4951 Multiplication table (乘法表的奥秘)

    http://acm.hdu.edu.cn/showproblem.php?pid=4951 2014多校 第八题 1008 2014 Multi-University Training Contes ...

  8. UvaLive6661 Equal Sum Sets dfs或dp

    UvaLive6661 PDF题目 题意:让你用1~n中k个不同的数组成s,求有多少种组法. 题解: DFS或者DP或打表. 1.DFS 由于数据范围很小,直接dfs每种组法统计个数即可. //#pr ...

  9. HTML5游戏设计与开发 小白7-9月的动态

    好久没有更新博客了,最近在努力修炼提升逼格,当然了还有个恶心的毕业论文... 当然啦...在写这个论文的时候也就是为了提升下自身的技术,毕竟我的公司也不是游戏公司,SO 我决定开发个手机游戏.然后考虑 ...

  10. iOS原生的搜索:UISearchController

    iOS8之前我们使用UISearchDisplayController做TableView的本地搜索,查看UIKit库,苹果已经使用新控件取代它. NS_CLASS_DEPRECATED_IOS(3_ ...