java版模拟浏览器下载百度动漫图片到本地。

 package javaNet.Instance.ImageDownload;

 import java.io.BufferedReader;

 import java.io.File;

 import java.io.FileOutputStream;

 import java.io.IOException;

 import java.io.InputStream;

 import java.io.InputStreamReader;

 import java.net.MalformedURLException;

 import java.net.URL;

 import java.util.ArrayList;

 import java.util.regex.Matcher;

 import java.util.regex.Pattern;

 public class DownloadImgs {

     private String url=null;

     public DownloadImgs(String url) {

         this.url=url;

     }

     //----------------------------------gethtml start-----------------------------

     /**

      * visit the baidu.img page to get the html

      * @return inputStream

      * @throws IOException

      * @throws MalformedURLException

      */

     public InputStream GetBaiduImgHtml_Stream() throws IOException,MalformedURLException {

         URL img_Url=new URL(url);

         return img_Url.openStream();

     }

     /**

      * convert the stream to the string

      * @param inStrm

      * @return string of the page

      */

     public String InputStreamToString(InputStream inStrm){

         BufferedReader reader=new BufferedReader(new InputStreamReader(inStrm));

         StringBuilder sb=new StringBuilder();

         String  line=null;

         try {

             while((line=reader.readLine())!=null){

                 sb.append(line+'\n');

               }

             }

         catch (IOException e)

         {

             e.printStackTrace();

         }

         finally

         {

             try

             {

                 inStrm.close();

             }

             catch (IOException e)

             {

                 e.printStackTrace();

             }

         }

         return sb.toString();

     }

     /**

      * get the origin page of baidu.img

      * @return

      * @throws MalformedURLException

      * @throws IOException

      */

     public String GetBaiduImgHtml_Page() throws MalformedURLException, IOException

     {

         return this.InputStreamToString(this.GetBaiduImgHtml_Stream());

     }

     /**

      * test whether url have been visited the image page,and get the page.

      * @param page

      */

     public void Display_HtmlPage(String page)

     {

         System.out.println(page);

     }

     //-------------------------gethtml end----------------

     //-------------------------paretoimgurllist start-----

     public ArrayList<String> ParsePageToImgList(String page,String imgPa)

     {

         ArrayList<String> imgList=new ArrayList<String>();

         Pattern pattern=Pattern.compile(imgPa);

         Matcher matcher=pattern.matcher(page);

         while(matcher.find())

         {

             imgList.add(matcher.group(1));

         }

         return imgList;

     }

     //------------------------paretoimgurllist end---------

     //------------------------DownloadFile  start----------

     public  boolean DownloadFile(String imgUrl,int index,String path)

     {

         try

         {

             File f=new File(path+"\\"+index+".jpg");

             System.out.println("下载："+imgUrl);

             URL url=new URL(imgUrl);

             InputStream ins=url.openStream();

             FileOutputStream fout=new FileOutputStream(f);

             byte[] buffer=new byte[2048];

             int bytes_number;

             while((bytes_number=ins.read(buffer))!=-1)

             {

                 fout.write(buffer,0,bytes_number);

                 fout.flush();

             }

             ins.close();

             fout.close();

         }

         catch(Exception e)

         {

             System.out.println("下载失败！");

             e.printStackTrace();

             return false;

         }

         System.out.println("下载完成...");

         return true;

     }

     //------------------------DownloadFile  end----------

     //------------------------mkDir  start----------

     /**

      * make a direction for download the images in the native disk.

      * @param path the native path

      * @return is success

      */

     public void MkDir(String path)

     {

         File dir=new File(path);

         if(!dir.exists())

         {

             dir.mkdirs();

         }

     }

     //------------------------mkDir  end------------

     public void Display_ArrayList(ArrayList<String> list)

     {

         for(String temp:list)

         {

             System.out.println(temp);

         }

     }

     public static void main(String[] args) throws MalformedURLException, IOException

     {

         String imgPa="\"objURL\":\"(.*?)\"";

         String path="F:\\photos";

         int index=0;

         DownloadImgs downloadimgs=new DownloadImgs("http://image.baidu.com/search/index?"

                 + "tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&sf=1"

                 + "&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0"

                 + "&istype=2&ie=utf-8&word=%E5%8A%A8%E6%BC%AB&oq=%E5%8A%A8%E6%BC%AB&rsp=-1");

         //downloadimgs.Display_HtmlPage(downloadimgs.GetBaiduImgHtml_Page());

         String htmlPage=downloadimgs.GetBaiduImgHtml_Page();

         ArrayList<String> imgList=downloadimgs.ParsePageToImgList(htmlPage, imgPa);

         //downloadimgs.Display_ArrayList(imgList);

         downloadimgs.MkDir(path);

         for(String imgUrl:imgList)

             downloadimgs.DownloadFile(imgUrl, (index++)+1, path);

         System.out.println("一共下载了"+index+"个图片。");

     }

 }

java版模拟浏览器下载百度动漫图片到本地。的更多相关文章

[JAVA]解决不同浏览器下载附件的中文名乱码问题
附件下载时,遇到中文附件名的兼容性问题,firefox.chrome.ie三个派系不兼容,通过分析整理,总结出处理该问题的办法,记录如下: 1.文件名编码服务器默认使用的是ISO8859-1,而我们 ...
java 实现模拟浏览器访问网站
一般的情况下我们都是使用IE或者Navigator浏览器来访问一个WEB服务器,用来浏览页面查看信息或者提交一些数据等等.所访问的这些页面有的仅仅是一些普通的页面,有的需要用户登录后方可使用,或者需 ...
Java 使用IE浏览器下载文件，文件名乱码问题
使用Servlet实现文件下载功能时,使用IE下载出现文件名乱码; 网上常见的解决办法是通过"user-agen"来判断浏览器: if (req.getHeader("u ...
Java 解决IE浏览器下载文件，文件名出现乱码问题
/** * 区分ie 和其他浏览器的下载文件乱码问题 * @param request * @param fileName * @return */ public String getFileName ...
Java使用IE浏览器下载文件，文件名乱码问题
String userAgent = request.getHeader("user-agent").toLowerCase(); if (userAgent.contains(& ...
浏览器下载img标签Base64图片
https://blog.csdn.net/qq_42076140/article/details/82113622 原文地址 <a href="javascript:downl ...
java读流方式，下载网络上的图片
本工具类支持url的list集合,具体实现如下所示: public static void download(ArrayList<String> listUrl, String downl ...
下载百度上的图片C#——输入名字就可以下载
using System; using System.Collections.Generic; using System.Data; using System.Configuration; using ...
python爬虫:使用Selenium模拟浏览器行为
前几天有位微信读者问我一个爬虫的问题,就是在爬去百度贴吧首页的热门动态下面的图片的时候,爬取的图片总是爬取不完整,比首页看到的少.原因他也大概分析了下,就是后面的图片是动态加载的.他的问题就是这部分动 ...

随机推荐

js获取?后面具体参数的值
function getURLParam(name) { return decodeURIComponent((new RegExp('[?|&]' + name + '=' ...
vue.js慢速入门（1）
0.MVVM 什么是MVVM?就是Model-View-ViewModel. ViewModel是Vue.js的核心,它是一个Vue实例. 不太懂也没关系,慢慢就懂了. 1.基础示例代码: < ...
*HDU3047 并查集
Zjnu Stadium Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)Tota ...
java并发控制：lock
一.synchronized的缺陷 synchronized是java中的一个关键字,也就是说是Java语言内置的特性.那么为什么会出现Lock呢? 在上面一篇文章中,我们了解到如果一个代码块被syn ...
internet 协议入门
正文 1.概述互联网的实现,分成好几层.每一层都有自己的功能,就像建筑物一样,每一层都靠下一层支持. 1.1 模型划分首先我们需要明白的事互联网的实现是分层级的,那么这个层级的划分根据不同的模型又 ...
Java编程中-servlet
今天将别人的项目导入eclipse之后,出现了“The import javax.servlet cannot be resolved”错误 import javax.servlet.ServletE ...
openstack-glance
1.glance 功能对外提供image的管理功能 2.glance架构 api : REST API,提供对外调用接口 registry: 数据库管理逻辑处理 backen:image的实际存放位 ...
JS学习总结（新手）
1. JS面向对象 http://www.cnblogs.com/JavascriptDream/p/5064976.html a. Prototype 属性的理解 b. 遗传继承函数 functio ...
Vim，极简使用教程，让你瞬间脱离键鼠切换的痛苦
注:看大家对Vim仇恨极大,其实它只是一种文本操作方式,可以减少键鼠的切换,从而让编辑文本的操作更迅捷.并不等同于IDE,在我看来,它们是两个是包含关系,IDE可以有Vim编辑模式.Vim或许可以通过 ...
php 获取中文长度截取中文字符串
#获取中文长度mb_strlen($str,$encoding); #截取中文字符串 mb_substr(str,start,length,encoding);

java版模拟浏览器下载百度动漫图片到本地。

java版模拟浏览器下载百度动漫图片到本地。的更多相关文章

随机推荐

热门专题