Java 验证码识别之多线程打码兔
验证码识别,爬虫永远的话题~
用打码兔总体的体验就是单线程速度太慢~
- import java.io.IOException;
- import java.net.MalformedURLException;
- import java.util.Date;
- import org.apache.log4j.Logger;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.select.Elements;
- import com.gargoylesoftware.htmlunit.BrowserVersion;
- import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
- import com.gargoylesoftware.htmlunit.WebClient;
- import com.gargoylesoftware.htmlunit.html.HtmlButton;
- import com.gargoylesoftware.htmlunit.html.HtmlForm;
- import com.gargoylesoftware.htmlunit.html.HtmlPage;
- import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
- import cn.smy.dama2.Dama2Web;
- import cn.smy.dama2.Dama2Web.DecodeResult;
- import cn.smy.dama2.Dama2Web.ReadBalanceResult;
- /**
- * @Title: main.java
- * @Package
- * @Description: TODO(用一句话描述该文件做什么)
- * @author A18ccms A18ccms_gmail_com
- * @date 2017年2月15日 下午3:42:00
- * @version V1.0
- */
- /**
- * @ClassName: main
- * @Description: TODO
- * @author zeze
- * @date 2017年2月15日 下午3:42:00
- *
- */
- public class main {
- private static Logger logger = Logger.getLogger(main.class);
- private static final long serialVersionUID = 1325980466616825****;
- private static Dama2Web dama2 = new Dama2Web(46****, "41c5a58de6********d23b67f61645e3a7", "***", "****");
- private static int id;
- private static long nd = 1000 * 24 * 60 * 60;
- private static long nh = 1000 * 60 * 60;
- private static long nm = 1000 * 60;
- private static long ns = 1000;
- // 获得两个时间的毫秒时间差异
- private static Date nowDate;
- private static Date endDate;
- private static long diff;// getTime返回的是一个long型的毫秒数
- // 计算差多少分钟
- private static long min;
- // 计算差多少秒//输出结果
- private static long sec;
- // 计算多少毫秒
- private static long ms;
- public static void main(String[] agrs) {
- String emailAccount = "asd@qq.com";
- for (int i = 0; i < 10; i++) {
- nowDate = new Date();
- emailAccount = "asd" + i + "@qq.com";
- if(i==0)
- emailAccount="asd@qq.com";
- int statusCode=checkEbayAccount(emailAccount);
- if(statusCode==0){
- System.out.println(emailAccount + " 该邮箱号不是ebay账号");
- }else if(statusCode==1){
- System.out.println(emailAccount + " 该账号是eBay账号!");
- }else if(statusCode==101){
- System.out.println("打码错误!");
- statusCode=checkEbayAccount(emailAccount);
- while(statusCode==101){
- statusCode=checkEbayAccount(emailAccount);
- }
- }else{
- System.out.println(statusCode);
- }
- endDate = new Date();
- diff = endDate.getTime() - nowDate.getTime();
- min = diff % nd % nh / nm;
- sec = diff % nd % nh % nm / ns;
- ms = diff % nd % nh % nm % ns;
- System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
- }
- }
- // 验证邮箱是否为eBay账号
- public static int checkEbayAccount(String emailAccount) {
- System.out.println("开始验证账号:" + emailAccount);
- WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
- HtmlPage page = null;
- try {
- page = webClient.getPage("http://fyp.ebay.com/");
- } catch (FailingHttpStatusCodeException e) {
- logger.error(e);
- } catch (MalformedURLException e) {
- logger.error(e);
- } catch (IOException e) {
- logger.error(e);
- }
- HtmlForm form = page.getForms().get(1);
- form.getInputByName("input").setValueAttribute(emailAccount);
- HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);
- try {
- page = button.click();
- } catch (IOException e1) {
- logger.error(e1);
- }
- if (page.asText().indexOf("Select how you want to reset your password") != -1) {
- // System.out.println(emailAccount + " 该账号是eBay账号!");
- return 1;
- }
- while (page.asText().indexOf("Security Measure") != -1) {
- Document doc = Jsoup.parse(page.asXml());
- Elements imgSrc = doc.getElementsByTag("iframe");
- String imgUrl = imgSrc.attr("src");
- System.out.println("验证码图片链接:" + imgUrl);
- String code = getCode(imgUrl);
- // 提交验证码
- form = page.getForms().get(0);
- form.getInputByName("tokenText").setValueAttribute(code);
- HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
- try {
- page = input.click();
- } catch (IOException e1) {
- logger.error(e1);
- }
- if (page.asText().indexOf("the verification code you entered doesn't match against the image") != -1) {
- // System.out.println("打码错误!");
- dama2.reportError(id);
- return 101;
- }
- // 再次提交邮箱
- form = page.getForms().get(1);
- form.getInputByName("input").setValueAttribute(emailAccount);
- button = (HtmlButton) form.getElementsByTagName("button").get(0);
- try {
- page = button.click();
- } catch (IOException e1) {
- logger.error(e1);
- }
- if (page.asText().indexOf("Security Measure") != -1){// 如果还是验证码页面
- System.out.println("提交还是验证码页面!");
- continue;
- }
- if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
- // System.out.println(emailAccount + " 该邮箱号不是ebay账号");
- return 0;
- } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
- // System.out.println(emailAccount + " 该账号是eBay账号!");
- return 1;
- } else {
- System.out.println(page.asText());
- return 2;
- }
- }
- return 3;
- }
- // 打码兔获取验证码
- public static String getCode(String imgUrl) {
- // 打码兔
- int type = 6;
- int timeout = 30;
- ReadBalanceResult balanceResult = dama2.getBalance();
- // System.out.println(balanceResult);
- DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
- String s;
- if (res.ret >= 0) {
- id = res.ret;
- s = "success: result=" + res.result + "; id=" + res.ret;
- System.out.println(s);
- } else {
- s = "failed: ret = " + res.ret + "; desc=" + res.desc;
- System.err.println(s);
- }
- return res.result;
- }
- }
测试结果如下:
用多线程测试,明显快多了
- package test;
- import java.io.IOException;
- import java.net.MalformedURLException;
- import java.util.ArrayList;
- import java.util.Date;
- import java.util.concurrent.Callable;
- import java.util.concurrent.ExecutorService;
- import java.util.concurrent.Executors;
- import java.util.concurrent.Future;
- import org.apache.log4j.Logger;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.select.Elements;
- import com.gargoylesoftware.htmlunit.BrowserVersion;
- import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
- import com.gargoylesoftware.htmlunit.WebClient;
- import com.gargoylesoftware.htmlunit.html.HtmlButton;
- import com.gargoylesoftware.htmlunit.html.HtmlForm;
- import com.gargoylesoftware.htmlunit.html.HtmlPage;
- import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
- import cn.smy.dama2.Dama2Web;
- import cn.smy.dama2.Dama2Web.DecodeResult;
- import cn.smy.dama2.Dama2Web.ReadBalanceResult;
- /***
- *
- * @ClassName: EbayMultiplyThreadCheck
- * @Description: TODO
- * @author zeze
- * @date 2017年2月16日 上午8:49:46
- *
- */
- public class EbayMultiplyThreadCheck {
- private static int threadNum = 30;
- private static long nd = 1000 * 24 * 60 * 60;
- private static long nh = 1000 * 60 * 60;
- private static long nm = 1000 * 60;
- private static long ns = 1000;
- private static Date nowDate;
- private static Date endDate;
- private static long diff;
- private static long min;
- private static long sec;
- private static long ms;
- public static void main(String[] args) {
- nowDate = new Date();
- ExecutorService exec = Executors.newFixedThreadPool(threadNum);
- ArrayList<Future<Integer>> results = new ArrayList<Future<Integer>>();
- for (int i = 0; i < threadNum; i++) {
- String email = "asd" + i + "@qq.com";
- if (i == 0)
- email = "asd@qq.com";
- results.add(exec.submit(new CheckEbayAccount(email)));
- }
- boolean isDone = false;
- while (!isDone) {
- isDone = true;
- for (Future<Integer> future : results) {
- if (!future.isDone()) {
- isDone = false;
- try {
- Thread.sleep(1000);
- } catch (InterruptedException e) {
- }
- break;
- }
- }
- }
- exec.shutdown();
- endDate = new Date();
- diff = endDate.getTime() - nowDate.getTime();
- min = diff % nd % nh / nm;
- sec = diff % nd % nh % nm / ns;
- ms = diff % nd % nh % nm % ns;
- System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
- }
- }
- class CheckEbayAccount implements Callable<Integer> {
- private String email;
- private static Logger logger = Logger.getLogger(CheckEbayAccount.class);
- private static Dama2Web dama2 = new Dama2Web(****, "41c5a58de68ebe2*******", "***", "****");
- private static int id;
- public CheckEbayAccount(String email) {
- this.email = email;
- }
- @Override
- public Integer call() {
- System.out.println(Thread.currentThread().getName() + " 开始验证账号:" + email);
- WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
- HtmlPage page = null;
- try {
- page = webClient.getPage("http://fyp.ebay.com/");
- } catch (FailingHttpStatusCodeException e) {
- logger.error(e);
- } catch (MalformedURLException e) {
- logger.error(e);
- } catch (IOException e) {
- logger.error(e);
- }
- HtmlForm form = page.getForms().get(1);
- form.getInputByName("input").setValueAttribute(email);
- HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);
- try {
- page = button.click();
- } catch (IOException e1) {
- logger.error(e1);
- }
- if (page.asText().indexOf("Select how you want to reset your password") != -1) {
- System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
- return 1;
- } else if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
- System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
- return 0;
- }
- while (page.asText().indexOf("Security Measure") != -1) {
- Document doc = Jsoup.parse(page.asXml());
- Elements imgSrc = doc.getElementsByTag("iframe");
- String imgUrl = imgSrc.attr("src");
- System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
- String code = getCode(imgUrl);
- // 提交验证码
- form = page.getForms().get(0);
- form.getInputByName("tokenText").setValueAttribute(code);
- HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
- try {
- page = input.click();
- } catch (IOException e1) {
- System.out.println(Thread.currentThread().getName() + " " + e1);
- }
- while (page.asText().indexOf("Sorry") != -1) {
- System.out.println(Thread.currentThread().getName() + " 打码错误!重试");
- dama2.reportError(id);
- doc = Jsoup.parse(page.asXml());
- imgSrc = doc.getElementsByTag("iframe");
- imgUrl = imgSrc.attr("src");
- System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
- code = getCode(imgUrl);
- // 提交验证码
- form = page.getForms().get(0);
- form.getInputByName("tokenText").setValueAttribute(code);
- input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
- try {
- page = input.click();
- } catch (IOException e1) {
- logger.error(e1);
- }
- }
- // 再次提交邮箱
- form = page.getForms().get(1);
- form.getInputByName("input").setValueAttribute(email);
- button = (HtmlButton) form.getElementsByTagName("button").get(0);
- try {
- page = button.click();
- } catch (IOException e1) {
- logger.error(e1);
- }
- if (page.asText().indexOf("Security Measure") != -1) {// 如果还是验证码页面
- System.out.println(Thread.currentThread().getName() + " 提交还是验证码页面!");
- continue;
- }
- if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
- System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
- return 0;
- } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
- System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
- return 1;
- } else {
- System.out.println(Thread.currentThread().getName() + " " + page.asText());
- return 2;
- }
- }
- System.out.println(Thread.currentThread().getName() + " " + page.asText());
- return 3;
- }
- // 打码兔获取验证码
- public static String getCode(String imgUrl) {
- // 打码兔
- int type = 6;
- int timeout = 30;
- ReadBalanceResult balanceResult = dama2.getBalance();
- // System.out.println(balanceResult);
- DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
- String s;
- if (res.ret >= 0) {
- id = res.ret;
- s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
- System.out.println(Thread.currentThread().getName() + " " + s);
- } else {
- while (res.result == null) {
- s = "打码失败,重试: ret = " + res.ret + "; desc=" + res.desc;
- System.out.println(Thread.currentThread().getName() + " " + s);
- dama2.reportError(id);
- res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
- if (res.ret >= 0) {
- id = res.ret;
- s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
- System.out.println(Thread.currentThread().getName() + " " + s);
- }
- }
- }
- return res.result;
- }
- }
测试30个账号,平均每个3秒
Java 验证码识别之多线程打码兔的更多相关文章
- Java验证码识别解决方案
建库,去重,切割,识别. package edu.fzu.ir.test; import java.awt.Color; import java.awt.image.BufferedImage; im ...
- Java 验证码识别库 Tess4j 学习
Java 验证码识别库 Tess4j 学习 [在用java的Jsoup做爬虫爬取数据时遇到了验证码识别的问题(基于maven),找了网上挺多的资料,发现Tess4j可以自动识别验证码,在这里简单记录下 ...
- JAVA验证码识别:基于jTessBoxEditorFX和Tesseract-OCR训练样本
JAVA验证识别:基于jTessBoxEditorFX和Tesseract-OCR训练样本 工具准备: jTessBoxEditorFX下载:https://github.com/nguyenq/jT ...
- java验证码识别
首先参考了csdn大佬的文章,但是写的不全ImgUtils类没有给出代码,无法进行了 写不完整就是制造垃圾 不过这个大佬又说这个大佬的文章值得参考于是又查看这篇文章 有案例https://blog.c ...
- 【Java】验证码识别解决方案
对于类似以下简单的验证码的识别方案: 1. 2 3 4. 1.建库:切割验证码为单个字符,人工标记,比如:A. 2.识别:给一个验证码:切割为单个字符,在库中查询识别. /*** * author:c ...
- Java 验证码、二维码
Java 验证码.二维码 资源 需要: jelly-core-1.7.0.GA.jar网站: http://lychie.github.io/products.html将下载下来的 jelly ...
- uu云验证码识别平台,验证码,验证码识别,全自动验证码识别技术,优优云全自动打码,代答题系统,优优云远程打码平台,uu云打码
uu云验证码识别平台,验证码,验证码识别,全自动验证码识别技术,优优云全自动打码,代答题系统,优优云远程打码平台,uu云打码 优优云验证码识别答题平台介绍 优优云|UU云(中国公司)是全球唯一领先的智 ...
- 基于tensorflow的‘端到端’的字符型验证码识别源码整理(github源码分享)
基于tensorflow的‘端到端’的字符型验证码识别 1 Abstract 验证码(CAPTCHA)的诞生本身是为了自动区分 自然人 和 机器人 的一套公开方法, 但是近几年的人工智能技术的发展 ...
- 基于python语言的tensorflow的‘端到端’的字符型验证码识别源码整理(github源码分享)
基于python语言的tensorflow的‘端到端’的字符型验证码识别 1 Abstract 验证码(CAPTCHA)的诞生本身是为了自动区分 自然人 和 机器人 的一套公开方法, 但是近几年的 ...
随机推荐
- 全国出现大面积DNS服务器故障 域名被劫持
1月21日消息,继今日上午腾讯16项服务出现故障后,大量网站出现了无法访问的情况,据了解,该故障是由于国内DNS根服务器故障所致. 据了解,此次攻击式由于国内所有通用顶级域的根服务器出现异常,导致大量 ...
- tensorflow serving 打印调试log
启动时添加环境变量 export TF_CPP_MIN_VLOG_LEVEL=1 ,这样可以打印VLOG(1)的log
- spring mvc上传、下载的实现
下载 //下载 @RequestMapping(value="/download") public ResponseEntity<byte[]> download() ...
- listView滚动事件
listView滚动事件 实现接口: android.widget.AbsListView.OnScrollListener Interface definition for a callback t ...
- Java虚拟机学习 - 对象引用强度 ( 8 )
无论是通过计数算法判断对象的引用数量,还是通过根搜索算法判断对象引用链是否可达,判定对象是否存活都与“引用”相关. 引用主要分为 :强引用(Strong Reference).软引用(Soft Ref ...
- ASP.NET中UrlEncode应该用Uri.EscapeDataString()
今天,茄子_2008反馈他博客中的“C++”标签失效.检查了一下代码,生成链接时用的是HttpUtility.UrlEncode(url),从链接地址获取标签时用的是HttpUtility.UrlDe ...
- mysql数据库以加索引方式提高性能
数据库查询速率慢的情况下可以给对应的表加上对应的索引,能够有效的提高查询效率,mysql数据库添加索引的SQL入下: ALTER TABLE `table_name` ADD INDEX index_ ...
- 【转】python2与python3的主要区别
摘自:http://www.cnblogs.com/codingmylife/archive/2010/06/06/1752807.html 1.性能 Py3.0运行 pystone benchmar ...
- CSS选择器的优化
前面花了几个篇幅着重介绍了CSS的选择器的使用,我将其分成三个部分:CSS基本选择器.CSS属性选择器以及CSS伪类选择器.那么今天我主要想和大家一起来学习——CSS选择器方面的性能优化.因为对性能这 ...
- cocos2dx 3.x draw debug
有时候需要用ccDrawXXX绘制debug线框来调试图形程序. 在cocos2dx 2.x中,由于是立即模式,所以如果在draw函数中用ccDrawXXX画线框,要用节点的局部坐标. 在cocos2 ...