网络版程序:

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List; public class TestIndex { private String rootUrl = "http://localhost/apk/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex ti = new TestIndex();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{
try
{
URL url = new URL(urlStr);
URLConnection conn = url.openConnection();
conn.connect();
InputStream is = conn.getInputStream();
return is;
}
catch (Exception e)
{ }
return null;
}
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

本地版程序:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List; public class TestIndex_IO { private String rootUrl = "D:/Hixin/webandroid/";
private String listUrl = rootUrl + "test-index.htm";
private static List<String> imageUrlList = new ArrayList<String>();
public static void main(String args[]){
TestIndex_IO ti = new TestIndex_IO();
ti.getData();
System.out.println(imageUrlList.size());
for(int i=0; i<imageUrlList.size();i++){
System.out.println(imageUrlList.get(i));
} } private InputStream getNetInputStream(String urlStr)
{ InputStream is;
try {
is = new FileInputStream(new File(urlStr));
return is;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return null; }
private void getData() {
try
{
InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
} is.close();
String startStr = "src=\"https://";
String endStr = " width=";
int start = 0;
int end = 0;
int index =0;
imageUrlList.clear();
while (true)
{
start = html.indexOf(startStr, index);
if (start < 0)
break;
index=start;
end = html.indexOf(endStr, index);
String ss = html.substring(start+5,end-1);
imageUrlList.add(ss);
index +=ss.length();
}
}
catch (Exception e)
{
// TODO: handle exception
}
}
}

差别仅仅在于private InputStream getNetInputStream(String urlStr)函数。为避免中文乱码,建议InputStreamReader isr = new InputStreamReader(is,"utf-8");

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77300

            InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is,"utf-8");
BufferedReader br = new BufferedReader(isr);
String s = null;
String html="";
while ((s = br.readLine()) != null)
{
html+=s;
}
System.out.println(html.length());
is.close();

输出结果为:77135

 private String writeUrl = "D:/newfile/new/new";
private String fileName ="test-index.htm";
File f = new File(writeUrl);
if(!f.exists()) {
f.mkdirs();
}
File f1 = new File(f, fileName);
FileOutputStream fos = new FileOutputStream(f1);
OutputStreamWriter osw = new OutputStreamWriter(fos); osw.write(html,0,html.length());
osw.flush();
osw.close();

解析出htm文件中包含的网址。

结果:

20
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

用正则表达式更简单:

 InputStream is = getNetInputStream(listUrl);
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
String s = ""; while ((s = br.readLine()) != null)
{
Pattern p = Pattern.compile("src=\"https[^\"]+");
Matcher m = p.matcher(s);
while(m.find()) {
System.out.println(m.group());
}
}
 src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRvQgUjsVDBncM3mVIgIyIuE87BnlyJUy2BNsAp8kUoTanrC_css5mVAw
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcThd8cYjOTmCgYJZxX5ls-xpxaAlH1_yocOSCqI5_7OkL29SNtbCZ7q2Yoj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTl-FzKmsppxuwzmTITGCv9uDxmrWr1pG0lw8mUD9wkWIloASxQeBEMnVjz
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcQWbmiZJIXKHV2IoTBp7zSY6kD5g5VPzVtBTLJYYR5nwTtKi2-0_u93qL4e
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcSlrLi_GtVgUehU7coFe1eMdrJxPdvS42iTqXkla0g75s31NBfAq2u1LE4
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcSkrlyGxSs8Dr_7k3MUvoGq1vE45LgHZ0zEhIEdD9LLZiaoMcE7IAqn8ho
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTu__OUSJ4R4EKBu4jOi2ZAdHohpVQIBy3-SfnI8FYpN8wVC9kJG_aWuk_w
src="https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcR3Bf7YtsHJ813A5_wWzpxIy4MbEmqz5NLw3qv1nPxOZqVjH7QlY-qYSCg
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcToB4nJPqVwnzn0xeasnXyhxGgOqHXdypE6KZIMTfV9k52eIrE3iYsA6Ixm
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTkKw0LpqdB2eQMUpwdQdvM9DTeNtq1mrvMNivoQtN37p3m0OPsx4ME9i4O
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSZGzMf_3hmdDktz91yp5ZQi-eGWLCenZ0U446sXT2nqYuwlWRI_V_BVIWi
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTQF-55T5GM3dLdaoafPdlIYK0ESNvM6-Bsb4-B2rQTeyD5gGoCKxokExM-
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcRoRjo4TFeXmx47zE6VH0ylcO0IQ2HBsOHYIMJCI9MsRyg_PF1WhHbqG76Q
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcRrdegt1koEy51dLWrJAbVMJBlCEZ7fPl2mztYYM6onvxocRCq030Ft1gE
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcTtnQpte0uq9Ue9nsg25GeO1kw_-Hcn69ozTQkiMBHrXKwlANutyhwKD9XM
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNRdxzmuFKABoGgyv2SC0gMticosL2LB3V1fBMOwNtVBZxHkyMw4IcWBFj
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQr40CEf75nWCj5dg-oeKtb9zK6mhktu7vnfoYAh5ioy34goC3c9ptDkQwP
src="https://encrypted-tbn2.gstatic.com/images?q=tbn:ANd9GcQUnyHrVEbppqhZnWnQrijhBFP0X34gRf7pKw6PdT4ggepB2k9g-p71sgGh
src="https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcR9Us9qblbTJaw47gULXCI8sHKN4I61gYsT2ijebtZzgsMDI8GmYqQpIIw
src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSIrW-IbBZjM9Ztn60r9QE1_FIMjt494qGX12tqsLsibYPLuFVwyVSgz1I

通过网络得到html,并解析出其中网址(JAVA程序)的更多相关文章

  1. Java匹马行天下之 Java国出了个Java——举国欢庆

    Java帝国的崛起 前言: 看庭前花开花落,宠辱不惊, 望天上云卷云舒,去留无意. 闹心的事儿,选择释怀: 纠缠的人儿,试着放下, 生活其实很美. 心若向阳,就无惧悲伤. 愿你明朗坦荡纵情豁达,有得有 ...

  2. Android网络请求与数据解析,使用Gson和GsonFormat解析复杂Json数据

    版权声明:未经博主允许不得转载 一:简介 [达叔有道]软件技术人员,时代作者,从 Android 到全栈之路,我相信你也可以!阅读他的文章,会上瘾!You and me, we are family ...

  3. 初探iOS网络开发,数据解析。

    通过大众点评平台开发来简单了解一下,oc的网络编程和数据解析(json) 首先我们需要到大大众点评开发者平台申请一个key.http://developer.dianping.com/app/tech ...

  4. Flutter网络请求和数据解析

    一:前言 - 什么是反射机制,Flutter为什么禁用反射机制? 在Flutter中它的网络请求和数据解析稍微的比较麻烦一点,因为Flutter不支持反射机制.相信大家都看到这么一条,就是Flutte ...

  5. Android okHttp网络请求之Json解析

    前言: 前面两篇文章介绍了基于okHttp的post.get请求,以及文件的上传下载,今天主要介绍一下如何和Json解析一起使用?如何才能提高开发效率? okHttp相关文章地址: Android o ...

  6. ZeroMQ接口函数之 :zmq_z85_decode – 从一个用Z85算法生成的文本中解析出二进制密码

    ZeroMQ 官方地址 :http://api.zeromq.org/4-0:zmq_z85_decode zmq_z85_decode(3)         ØMQ Manual - ØMQ/4.1 ...

  7. js中解析json对象:JSON.parse()用于从一个字符串中解析出json对象, JSON.stringify()用于从一个对象解析出字符串。

    JSON.parse()用于从一个字符串中解析出json对象. var str = '{"name":"huangxiaojian","age&quo ...

  8. GJM : Unity3D 常用网络框架与实战解析 【笔记】

    Unity常用网络框架与实战解析 1.Http协议          Http协议                  存在TCP 之上 有时候 TLS\SSL 之上 默认端口80 https 默认端口 ...

  9. 网络热恋之XML解析

    XML 可扩展标记语言 用于标记电子文件使其具有结构性的标记语言,可以用来标记数据.定义数据类型,是一种允许用户对自己的标记语言进行定义的源语言 易读性高,编码手写难度小,数据量大 NSXMLPars ...

随机推荐

  1. 老李分享:jvm垃圾回收

    老李分享:jvm垃圾回收   poptest是国内唯一一家培养测试开发工程师的培训机构,以学员能胜任自动化测试,性能测试,测试工具开发等工作为目标.如果对课程感兴趣,请大家咨询qq:908821478 ...

  2. 博客搬到CSDN了

    新博客地址: http://blog.csdn.net/enlangs

  3. Java与面向对象之随感(2)

    我们知道Java语言的一大特性就是相比于c语言和c++语言,其更加安全.那么Java安全性的一个重要保证就是它取消了指针,并且坚决反对数组的出界(c++对当数组超出上限但是还进行读写操作时允许的!), ...

  4. NDK 线程同步

    使用场景 对底层代码进行 HOOK, 不可避免的要考虑多线程同步问题, 当然也可以写个类似 java 的线程本地变量来隔离内存空间. 死锁分析 恩, 道理其实大家都懂的, 毕竟大学就学了操作系统,理论 ...

  5. 读书笔记 effective c++ Item 50 了解何时替换new和delete 是有意义的

    1. 自定义new和delete的三个常见原因 我们先回顾一下基本原理.为什么人们一开始就想去替换编译器提供的operator new和operator delete版本?有三个最常见的原因: 为了检 ...

  6. 初识ElasticSearch

    概述 Elasticsearch是一个基于Apache Lucene(TM)的开源搜索引擎.无论在开源还是专有领域,Lucene可以被认为是迄今为止最先进.性能最好的.功能最全的搜索引擎库. 分布式的 ...

  7. stm32之USART学习

    首先,我是看着这位博主的文章受到的启发,进而加深了自己对USART的理解.下面是自己改装并实验过的程序. 原文:http://www.cnblogs.com/greatwgb/archive/2011 ...

  8. Go - 第一个 go 程序 -- helloworld

    创建程序目录 接着上一节的内容,在我们的workspace (D:\Gopher) 里面创建子目录 hello,他的绝对路径为:D:\Gopher\src\github.com\tuo\hello 创 ...

  9. hibernate 多对多关系总结

    hibernate中,对对象关系的映射处理估计是最让人迷惑和头疼的,特别是cascade和inverse属性的使用,不知已经杀死了我多少个脑细胞了,好记性永远比不上烂笔头,为了能节省自己的脑细胞,降低 ...

  10. sql server 2008 数据库管理系统使用SQL语句创建登录用户详细步骤

    --服务器角色:--固定服务器角色具有一组固定的权限,并且适用于整个服务器范围. 它们专门用于管理 SQL Server,且不能更改分配给它们的权限. --可以在数据库中不存在用户帐户的情况下向固定服 ...