验证码识别,爬虫永远的话题~

用打码兔总体的体验就是单线程速度太慢~

  1. import java.io.IOException;
  2. import java.net.MalformedURLException;
  3. import java.util.Date;
  4.  
  5. import org.apache.log4j.Logger;
  6. import org.jsoup.Jsoup;
  7. import org.jsoup.nodes.Document;
  8. import org.jsoup.select.Elements;
  9.  
  10. import com.gargoylesoftware.htmlunit.BrowserVersion;
  11. import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
  12. import com.gargoylesoftware.htmlunit.WebClient;
  13. import com.gargoylesoftware.htmlunit.html.HtmlButton;
  14. import com.gargoylesoftware.htmlunit.html.HtmlForm;
  15. import com.gargoylesoftware.htmlunit.html.HtmlPage;
  16. import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
  17.  
  18. import cn.smy.dama2.Dama2Web;
  19. import cn.smy.dama2.Dama2Web.DecodeResult;
  20. import cn.smy.dama2.Dama2Web.ReadBalanceResult;
  21.  
  22. /**
  23. * @Title: main.java
  24. * @Package
  25. * @Description: TODO(用一句话描述该文件做什么)
  26. * @author A18ccms A18ccms_gmail_com
  27. * @date 2017年2月15日 下午3:42:00
  28. * @version V1.0
  29. */
  30.  
  31. /**
  32. * @ClassName: main
  33. * @Description: TODO
  34. * @author zeze
  35. * @date 2017年2月15日 下午3:42:00
  36. *
  37. */
  38. public class main {
  39. private static Logger logger = Logger.getLogger(main.class);
  40. private static final long serialVersionUID = 1325980466616825****;
  41. private static Dama2Web dama2 = new Dama2Web(46****, "41c5a58de6********d23b67f61645e3a7", "***", "****");
  42. private static int id;
  43.  
  44. private static long nd = 1000 * 24 * 60 * 60;
  45. private static long nh = 1000 * 60 * 60;
  46. private static long nm = 1000 * 60;
  47. private static long ns = 1000;
  48. // 获得两个时间的毫秒时间差异
  49. private static Date nowDate;
  50. private static Date endDate;
  51. private static long diff;// getTime返回的是一个long型的毫秒数
  52. // 计算差多少分钟
  53. private static long min;
  54. // 计算差多少秒//输出结果
  55. private static long sec;
  56. // 计算多少毫秒
  57. private static long ms;
  58.  
  59. public static void main(String[] agrs) {
  60. String emailAccount = "asd@qq.com";
  61.  
  62. for (int i = 0; i < 10; i++) {
  63. nowDate = new Date();
  64.  
  65. emailAccount = "asd" + i + "@qq.com";
  66. if(i==0)
  67. emailAccount="asd@qq.com";
  68.  
  69. int statusCode=checkEbayAccount(emailAccount);
  70. if(statusCode==0){
  71. System.out.println(emailAccount + " 该邮箱号不是ebay账号");
  72. }else if(statusCode==1){
  73. System.out.println(emailAccount + " 该账号是eBay账号!");
  74. }else if(statusCode==101){
  75. System.out.println("打码错误!");
  76. statusCode=checkEbayAccount(emailAccount);
  77. while(statusCode==101){
  78. statusCode=checkEbayAccount(emailAccount);
  79. }
  80. }else{
  81. System.out.println(statusCode);
  82. }
  83.  
  84. endDate = new Date();
  85. diff = endDate.getTime() - nowDate.getTime();
  86. min = diff % nd % nh / nm;
  87. sec = diff % nd % nh % nm / ns;
  88. ms = diff % nd % nh % nm % ns;
  89. System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
  90. }
  91.  
  92. }
  93.  
  94. // 验证邮箱是否为eBay账号
  95. public static int checkEbayAccount(String emailAccount) {
  96. System.out.println("开始验证账号:" + emailAccount);
  97. WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
  98. HtmlPage page = null;
  99. try {
  100. page = webClient.getPage("http://fyp.ebay.com/");
  101. } catch (FailingHttpStatusCodeException e) {
  102. logger.error(e);
  103. } catch (MalformedURLException e) {
  104. logger.error(e);
  105. } catch (IOException e) {
  106. logger.error(e);
  107. }
  108. HtmlForm form = page.getForms().get(1);
  109. form.getInputByName("input").setValueAttribute(emailAccount);
  110. HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);
  111.  
  112. try {
  113. page = button.click();
  114. } catch (IOException e1) {
  115. logger.error(e1);
  116. }
  117. if (page.asText().indexOf("Select how you want to reset your password") != -1) {
  118. // System.out.println(emailAccount + " 该账号是eBay账号!");
  119. return 1;
  120. }
  121.  
  122. while (page.asText().indexOf("Security Measure") != -1) {
  123.  
  124. Document doc = Jsoup.parse(page.asXml());
  125. Elements imgSrc = doc.getElementsByTag("iframe");
  126. String imgUrl = imgSrc.attr("src");
  127. System.out.println("验证码图片链接:" + imgUrl);
  128. String code = getCode(imgUrl);
  129.  
  130. // 提交验证码
  131. form = page.getForms().get(0);
  132. form.getInputByName("tokenText").setValueAttribute(code);
  133. HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
  134. try {
  135. page = input.click();
  136. } catch (IOException e1) {
  137. logger.error(e1);
  138. }
  139.  
  140. if (page.asText().indexOf("the verification code you entered doesn't match against the image") != -1) {
  141. // System.out.println("打码错误!");
  142. dama2.reportError(id);
  143. return 101;
  144. }
  145.  
  146. // 再次提交邮箱
  147. form = page.getForms().get(1);
  148. form.getInputByName("input").setValueAttribute(emailAccount);
  149. button = (HtmlButton) form.getElementsByTagName("button").get(0);
  150. try {
  151. page = button.click();
  152. } catch (IOException e1) {
  153. logger.error(e1);
  154. }
  155.  
  156. if (page.asText().indexOf("Security Measure") != -1){// 如果还是验证码页面
  157. System.out.println("提交还是验证码页面!");
  158. continue;
  159. }
  160.  
  161. if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
  162. // System.out.println(emailAccount + " 该邮箱号不是ebay账号");
  163. return 0;
  164. } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
  165. // System.out.println(emailAccount + " 该账号是eBay账号!");
  166. return 1;
  167. } else {
  168. System.out.println(page.asText());
  169. return 2;
  170. }
  171. }
  172. return 3;
  173. }
  174.  
  175. // 打码兔获取验证码
  176. public static String getCode(String imgUrl) {
  177. // 打码兔
  178. int type = 6;
  179. int timeout = 30;
  180. ReadBalanceResult balanceResult = dama2.getBalance();
  181. // System.out.println(balanceResult);
  182. DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
  183. String s;
  184. if (res.ret >= 0) {
  185. id = res.ret;
  186. s = "success: result=" + res.result + "; id=" + res.ret;
  187. System.out.println(s);
  188. } else {
  189. s = "failed: ret = " + res.ret + "; desc=" + res.desc;
  190. System.err.println(s);
  191. }
  192. return res.result;
  193. }
  194.  
  195. }

测试结果如下:

用多线程测试,明显快多了

  1. package test;
  2.  
  3. import java.io.IOException;
  4. import java.net.MalformedURLException;
  5. import java.util.ArrayList;
  6. import java.util.Date;
  7. import java.util.concurrent.Callable;
  8. import java.util.concurrent.ExecutorService;
  9. import java.util.concurrent.Executors;
  10. import java.util.concurrent.Future;
  11.  
  12. import org.apache.log4j.Logger;
  13. import org.jsoup.Jsoup;
  14. import org.jsoup.nodes.Document;
  15. import org.jsoup.select.Elements;
  16.  
  17. import com.gargoylesoftware.htmlunit.BrowserVersion;
  18. import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
  19. import com.gargoylesoftware.htmlunit.WebClient;
  20. import com.gargoylesoftware.htmlunit.html.HtmlButton;
  21. import com.gargoylesoftware.htmlunit.html.HtmlForm;
  22. import com.gargoylesoftware.htmlunit.html.HtmlPage;
  23. import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
  24.  
  25. import cn.smy.dama2.Dama2Web;
  26. import cn.smy.dama2.Dama2Web.DecodeResult;
  27. import cn.smy.dama2.Dama2Web.ReadBalanceResult;
  28.  
  29. /***
  30. *
  31. * @ClassName: EbayMultiplyThreadCheck
  32. * @Description: TODO
  33. * @author zeze
  34. * @date 2017年2月16日 上午8:49:46
  35. *
  36. */
  37. public class EbayMultiplyThreadCheck {
  38.  
  39. private static int threadNum = 30;
  40.  
  41. private static long nd = 1000 * 24 * 60 * 60;
  42. private static long nh = 1000 * 60 * 60;
  43. private static long nm = 1000 * 60;
  44. private static long ns = 1000;
  45. private static Date nowDate;
  46. private static Date endDate;
  47. private static long diff;
  48. private static long min;
  49. private static long sec;
  50. private static long ms;
  51.  
  52. public static void main(String[] args) {
  53. nowDate = new Date();
  54.  
  55. ExecutorService exec = Executors.newFixedThreadPool(threadNum);
  56. ArrayList<Future<Integer>> results = new ArrayList<Future<Integer>>();
  57.  
  58. for (int i = 0; i < threadNum; i++) {
  59. String email = "asd" + i + "@qq.com";
  60. if (i == 0)
  61. email = "asd@qq.com";
  62. results.add(exec.submit(new CheckEbayAccount(email)));
  63. }
  64.  
  65. boolean isDone = false;
  66. while (!isDone) {
  67. isDone = true;
  68. for (Future<Integer> future : results) {
  69. if (!future.isDone()) {
  70. isDone = false;
  71. try {
  72. Thread.sleep(1000);
  73. } catch (InterruptedException e) {
  74. }
  75. break;
  76. }
  77. }
  78. }
  79. exec.shutdown();
  80.  
  81. endDate = new Date();
  82. diff = endDate.getTime() - nowDate.getTime();
  83. min = diff % nd % nh / nm;
  84. sec = diff % nd % nh % nm / ns;
  85. ms = diff % nd % nh % nm % ns;
  86. System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
  87.  
  88. }
  89. }
  90.  
  91. class CheckEbayAccount implements Callable<Integer> {
  92.  
  93. private String email;
  94. private static Logger logger = Logger.getLogger(CheckEbayAccount.class);
  95. private static Dama2Web dama2 = new Dama2Web(****, "41c5a58de68ebe2*******", "***", "****");
  96. private static int id;
  97.  
  98. public CheckEbayAccount(String email) {
  99. this.email = email;
  100. }
  101.  
  102. @Override
  103. public Integer call() {
  104.  
  105. System.out.println(Thread.currentThread().getName() + " 开始验证账号:" + email);
  106. WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
  107. HtmlPage page = null;
  108. try {
  109. page = webClient.getPage("http://fyp.ebay.com/");
  110. } catch (FailingHttpStatusCodeException e) {
  111. logger.error(e);
  112. } catch (MalformedURLException e) {
  113. logger.error(e);
  114. } catch (IOException e) {
  115. logger.error(e);
  116. }
  117. HtmlForm form = page.getForms().get(1);
  118. form.getInputByName("input").setValueAttribute(email);
  119. HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);
  120.  
  121. try {
  122. page = button.click();
  123. } catch (IOException e1) {
  124. logger.error(e1);
  125. }
  126.  
  127. if (page.asText().indexOf("Select how you want to reset your password") != -1) {
  128. System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
  129. return 1;
  130. } else if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
  131. System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
  132. return 0;
  133. }
  134.  
  135. while (page.asText().indexOf("Security Measure") != -1) {
  136.  
  137. Document doc = Jsoup.parse(page.asXml());
  138. Elements imgSrc = doc.getElementsByTag("iframe");
  139. String imgUrl = imgSrc.attr("src");
  140. System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
  141. String code = getCode(imgUrl);
  142.  
  143. // 提交验证码
  144. form = page.getForms().get(0);
  145. form.getInputByName("tokenText").setValueAttribute(code);
  146. HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
  147. try {
  148. page = input.click();
  149. } catch (IOException e1) {
  150. System.out.println(Thread.currentThread().getName() + " " + e1);
  151. }
  152.  
  153. while (page.asText().indexOf("Sorry") != -1) {
  154. System.out.println(Thread.currentThread().getName() + " 打码错误!重试");
  155. dama2.reportError(id);
  156.  
  157. doc = Jsoup.parse(page.asXml());
  158. imgSrc = doc.getElementsByTag("iframe");
  159. imgUrl = imgSrc.attr("src");
  160. System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
  161. code = getCode(imgUrl);
  162.  
  163. // 提交验证码
  164. form = page.getForms().get(0);
  165. form.getInputByName("tokenText").setValueAttribute(code);
  166. input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
  167. try {
  168. page = input.click();
  169. } catch (IOException e1) {
  170. logger.error(e1);
  171. }
  172. }
  173.  
  174. // 再次提交邮箱
  175. form = page.getForms().get(1);
  176. form.getInputByName("input").setValueAttribute(email);
  177. button = (HtmlButton) form.getElementsByTagName("button").get(0);
  178. try {
  179. page = button.click();
  180. } catch (IOException e1) {
  181. logger.error(e1);
  182. }
  183.  
  184. if (page.asText().indexOf("Security Measure") != -1) {// 如果还是验证码页面
  185. System.out.println(Thread.currentThread().getName() + " 提交还是验证码页面!");
  186. continue;
  187. }
  188.  
  189. if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
  190. System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
  191. return 0;
  192. } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
  193. System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
  194. return 1;
  195. } else {
  196. System.out.println(Thread.currentThread().getName() + " " + page.asText());
  197. return 2;
  198. }
  199. }
  200. System.out.println(Thread.currentThread().getName() + " " + page.asText());
  201. return 3;
  202.  
  203. }
  204.  
  205. // 打码兔获取验证码
  206. public static String getCode(String imgUrl) {
  207. // 打码兔
  208. int type = 6;
  209. int timeout = 30;
  210. ReadBalanceResult balanceResult = dama2.getBalance();
  211. // System.out.println(balanceResult);
  212. DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
  213. String s;
  214. if (res.ret >= 0) {
  215. id = res.ret;
  216. s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
  217. System.out.println(Thread.currentThread().getName() + " " + s);
  218. } else {
  219. while (res.result == null) {
  220. s = "打码失败,重试: ret = " + res.ret + "; desc=" + res.desc;
  221. System.out.println(Thread.currentThread().getName() + " " + s);
  222. dama2.reportError(id);
  223. res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
  224. if (res.ret >= 0) {
  225. id = res.ret;
  226. s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
  227. System.out.println(Thread.currentThread().getName() + " " + s);
  228. }
  229. }
  230. }
  231. return res.result;
  232. }
  233.  
  234. }

测试30个账号,平均每个3秒

Java 验证码识别之多线程打码兔的更多相关文章

  1. Java验证码识别解决方案

    建库,去重,切割,识别. package edu.fzu.ir.test; import java.awt.Color; import java.awt.image.BufferedImage; im ...

  2. Java 验证码识别库 Tess4j 学习

    Java 验证码识别库 Tess4j 学习 [在用java的Jsoup做爬虫爬取数据时遇到了验证码识别的问题(基于maven),找了网上挺多的资料,发现Tess4j可以自动识别验证码,在这里简单记录下 ...

  3. JAVA验证码识别:基于jTessBoxEditorFX和Tesseract-OCR训练样本

    JAVA验证识别:基于jTessBoxEditorFX和Tesseract-OCR训练样本 工具准备: jTessBoxEditorFX下载:https://github.com/nguyenq/jT ...

  4. java验证码识别

    首先参考了csdn大佬的文章,但是写的不全ImgUtils类没有给出代码,无法进行了 写不完整就是制造垃圾 不过这个大佬又说这个大佬的文章值得参考于是又查看这篇文章 有案例https://blog.c ...

  5. 【Java】验证码识别解决方案

    对于类似以下简单的验证码的识别方案: 1. 2 3 4. 1.建库:切割验证码为单个字符,人工标记,比如:A. 2.识别:给一个验证码:切割为单个字符,在库中查询识别. /*** * author:c ...

  6. Java 验证码、二维码

    Java 验证码.二维码 资源 需要:   jelly-core-1.7.0.GA.jar网站:   http://lychie.github.io/products.html将下载下来的 jelly ...

  7. uu云验证码识别平台,验证码,验证码识别,全自动验证码识别技术,优优云全自动打码,代答题系统,优优云远程打码平台,uu云打码

    uu云验证码识别平台,验证码,验证码识别,全自动验证码识别技术,优优云全自动打码,代答题系统,优优云远程打码平台,uu云打码 优优云验证码识别答题平台介绍 优优云|UU云(中国公司)是全球唯一领先的智 ...

  8. 基于tensorflow的‘端到端’的字符型验证码识别源码整理(github源码分享)

    基于tensorflow的‘端到端’的字符型验证码识别 1   Abstract 验证码(CAPTCHA)的诞生本身是为了自动区分 自然人 和 机器人 的一套公开方法, 但是近几年的人工智能技术的发展 ...

  9. 基于python语言的tensorflow的‘端到端’的字符型验证码识别源码整理(github源码分享)

    基于python语言的tensorflow的‘端到端’的字符型验证码识别 1   Abstract 验证码(CAPTCHA)的诞生本身是为了自动区分 自然人 和 机器人 的一套公开方法, 但是近几年的 ...

随机推荐

  1. 全国出现大面积DNS服务器故障 域名被劫持

    1月21日消息,继今日上午腾讯16项服务出现故障后,大量网站出现了无法访问的情况,据了解,该故障是由于国内DNS根服务器故障所致. 据了解,此次攻击式由于国内所有通用顶级域的根服务器出现异常,导致大量 ...

  2. tensorflow serving 打印调试log

    启动时添加环境变量 export TF_CPP_MIN_VLOG_LEVEL=1 ,这样可以打印VLOG(1)的log

  3. spring mvc上传、下载的实现

    下载 //下载 @RequestMapping(value="/download") public ResponseEntity<byte[]> download() ...

  4. listView滚动事件

    listView滚动事件 实现接口: android.widget.AbsListView.OnScrollListener Interface definition for a callback t ...

  5. Java虚拟机学习 - 对象引用强度 ( 8 )

    无论是通过计数算法判断对象的引用数量,还是通过根搜索算法判断对象引用链是否可达,判定对象是否存活都与“引用”相关. 引用主要分为 :强引用(Strong Reference).软引用(Soft Ref ...

  6. ASP.NET中UrlEncode应该用Uri.EscapeDataString()

    今天,茄子_2008反馈他博客中的“C++”标签失效.检查了一下代码,生成链接时用的是HttpUtility.UrlEncode(url),从链接地址获取标签时用的是HttpUtility.UrlDe ...

  7. mysql数据库以加索引方式提高性能

    数据库查询速率慢的情况下可以给对应的表加上对应的索引,能够有效的提高查询效率,mysql数据库添加索引的SQL入下: ALTER TABLE `table_name` ADD INDEX index_ ...

  8. 【转】python2与python3的主要区别

    摘自:http://www.cnblogs.com/codingmylife/archive/2010/06/06/1752807.html 1.性能 Py3.0运行 pystone benchmar ...

  9. CSS选择器的优化

    前面花了几个篇幅着重介绍了CSS的选择器的使用,我将其分成三个部分:CSS基本选择器.CSS属性选择器以及CSS伪类选择器.那么今天我主要想和大家一起来学习——CSS选择器方面的性能优化.因为对性能这 ...

  10. cocos2dx 3.x draw debug

    有时候需要用ccDrawXXX绘制debug线框来调试图形程序. 在cocos2dx 2.x中,由于是立即模式,所以如果在draw函数中用ccDrawXXX画线框,要用节点的局部坐标. 在cocos2 ...