
0. 字节流与二进制文件


  1. package experiment.io;
  2. import java.io.DataInputStream;
  3. import java.io.DataOutputStream;
  4. import java.io.FileInputStream;
  5. import java.io.FileNotFoundException;
  6. import java.io.FileOutputStream;
  7. import java.io.IOException;
  8. public class Experiment {
  9. public static void main(String[] args) throws FileNotFoundException {
  10. FileOutputStream outFile = new FileOutputStream("student.data");
  11. FileInputStream inFile = new FileInputStream("student.data");
  12. try (DataOutputStream dataOutput = new DataOutputStream(outFile);
  13. DataInputStream dataInput = new DataInputStream(inFile)) {
  14. /**
  15. * 将学生信息写入student.data文件中
  16. */
  17. Student st = new Student(2018211, "张三", 15, 3);
  18. dataOutput.writeInt(st.getId());
  19. dataOutput.writeUTF(st.getName());
  20. dataOutput.writeInt(st.getAge());
  21. dataOutput.writeDouble(st.getGrade());
  22. dataOutput.flush();
  23. /**
  24. * 将student.data文件中的数据重新读出到newSt对象中
  25. */
  26. Student newSt = new Student();
  27. newSt.setId(dataInput.readInt());
  28. newSt.setName(dataInput.readUTF());
  29. newSt.setAge(dataInput.readInt());
  30. newSt.setGrade(dataInput.readDouble());
  31. System.out.println(newSt.toString());
  32. } catch (IOException e) {
  33. e.printStackTrace();
  34. }
  35. }
  36. }


  • 二进制文件与文本文件的区别:

    • 二进制文件可以存储char/int/long等各种变量类型的值,实际上存储的是01字符串,这也表明存储数据字节大小的不同;而文本文件只能存储char型的字符变量,其每条数据是固定长度的,如ASCII码存储的每个字符为1字节。
    • 二进制文件,它是将内存中的数据以二进制形式原样放到文件中,读取时候也不需要经过处理就可以直接放到内存中,读写速度快,但是如果不经过专门编译器的编译,其可读性差;而文本文件会有一个编码方式,如ASCII码,它会将内存中的数据转化为对应编码,再将编码写入文件,读取时需要解码,再将对应字符读出,读写速度较慢,但是可读性好
  • try...catch...finally注意事项 :
    • 即使try或catch语句中有return语句,也要执行完finally中的语句,程序才可能结束;finally常用于关闭资源。
    • 执行一次try块只会执行一次catch块。
  • 使用try..with...resouces关闭资源 可以简化关闭资源的步骤,直接将初始化资源代码写在try后的括号中即可。

1. 字符流与文本文件:使用 PrintWriter(写),BufferedReader(读)


  1. package experiment.io;
  2. import java.io.BufferedReader;
  3. import java.io.FileInputStream;
  4. import java.io.FileNotFoundException;
  5. import java.io.IOException;
  6. import java.io.InputStreamReader;
  7. import java.io.PrintWriter;
  8. import java.io.UnsupportedEncodingException;
  9. import java.util.ArrayList;
  10. import java.util.List;
  11. import java.util.Scanner;
  12. public class ExperimentFirst {
  13. List<Student> students = new ArrayList<Student>();
  14. public static void ListreadStudents(String fileName) throws FileNotFoundException, UnsupportedEncodingException {
  15. ArrayList<Student> students = new ArrayList<Student>();
  16. FileInputStream file = new FileInputStream("students.txt");
  17. InputStreamReader in = new InputStreamReader(file, "UTF-8");// 解决中文乱码问题
  18. String s = null;
  19. try (BufferedReader buf = new BufferedReader(in)) {
  20. while ((s = buf.readLine()) != null) {
  21. String[] item = s.split("\\s+");
  22. Student st = null;
  23. /**
  24. * 出错行处理,增强程序的健壮性
  25. */
  26. try {
  27. st = new Student(Integer.parseInt(item[0]), item[1], Integer.parseInt(item[2]),
  28. Integer.parseInt(item[3]));
  29. students.add(st);
  30. } catch (ArrayIndexOutOfBoundsException e) {
  31. // 处理数组越界,即一行中内容过多或则内容缺少问题
  32. System.out.println(e);
  33. continue;
  34. } catch (NumberFormatException e) {
  35. // 处理数据类型不对应问题
  36. System.out.println(e);
  37. continue;
  38. }
  39. }
  40. for (Student e : students) {
  41. System.out.println(e.toString());
  42. }
  43. } catch (FileNotFoundException e) {
  44. e.printStackTrace();
  45. } catch (IOException e) {
  46. e.printStackTrace();
  47. }
  48. }
  49. public static void main(String[] args) {
  50. Scanner sc = new Scanner(System.in);
  51. String fileName = sc.nextLine();
  52. try {
  53. ExperimentFirst.ListreadStudents(fileName);
  54. } catch (FileNotFoundException | UnsupportedEncodingException e) {
  55. // TODO Auto-generated catch block
  56. e.printStackTrace();
  57. }
  58. /**
  59. * 使用PrintWriter将Student对象写入文本文件
  60. */
  61. List<Student> stuList = new ArrayList<>();
  62. Student d1 = new Student(1,"x",18,99.5);
  63. Student d2 = new Student(2,"x",19,100.0);
  64. Student d3 = new Student(3,"x",20,59.5);
  65. stuList.add(d1);
  66. stuList.add(d2);
  67. stuList.add(d3);
  68. PrintWriter printWriter = null;
  69. try {
  70. System.out.println("请输入要写入数据的文件名:");
  71. fileName = sc.nextLine();
  72. printWriter = new PrintWriter(fileName);
  73. for (Student e : stuList) {
  74. printWriter.write(String.valueOf(e.getId()) + " ");
  75. printWriter.write(e.getName() + " ");
  76. printWriter.write(String.valueOf(e.getAge()) + " ");
  77. printWriter.write(String.valueOf(e.getGrade()) + "\n");
  78. }
  79. } catch (FileNotFoundException e) {
  80. e.printStackTrace();
  81. } finally {
  82. printWriter.close();// 关闭资源,保存
  83. }
  84. sc.close();
  85. }
  86. }


  • 中文乱码问题可以通过InputStreamReader方法解决,要在初始化时候传入编码方式,以告诉改变FileReader的默认编码方式,解决中文乱码问题。
  • 在解决错误行问题时,对应的错误情况应用相应的catch块抓取即可解决。比如每行只有3个数据的错误,即说明它会出现数组越界的问题,增加ArrayIndexOutOfBoundsException的catch块即可。

2. 缓冲流(结合使用JUint进行测试)



  1. package experiment.io;
  2. import java.io.FileNotFoundException;
  3. import java.io.FileOutputStream;
  4. import java.io.PrintWriter;
  5. import java.util.Random;
  6. public class ExperimentSecond {
  7. public static void main(String[] args) {
  8. Random ra = new Random(100);
  9. int num = 1000_0000;
  10. try(PrintWriter pw = new PrintWriter(new FileOutputStream("student.txt"))){
  11. for (int i = 0; i < num; i++) {
  12. pw.println(ra.nextInt(11));
  13. }
  14. } catch (FileNotFoundException e) {
  15. e.printStackTrace();
  16. }
  17. }
  18. }

代码2:使用JUint进行测试 BufferedReaderScanner 的读取效率

  1. package experiment.io;
  2. import java.io.BufferedReader;
  3. import java.io.FileInputStream;
  4. import java.io.FileNotFoundException;
  5. import java.io.IOException;
  6. import java.io.InputStreamReader;
  7. import java.util.Scanner;
  8. import org.junit.jupiter.api.Test;
  9. class ReadTest {
  10. @Test
  11. void testBufferedReader() throws IOException {
  12. int count = 0;
  13. int sum = 0;
  14. FileInputStream file = new FileInputStream("student.txt");
  15. InputStreamReader in = new InputStreamReader(file);
  16. try(BufferedReader br = new BufferedReader(in)) {
  17. String s = null;
  18. while((s = br.readLine()) != null) {
  19. count++;
  20. sum += Integer.parseInt(s);
  21. }
  22. }
  23. System.out.printf("testBufferedReader: count = %d, sum = %d, avg = %.5f\n", count, sum, sum * 1.0 / count);
  24. }
  25. @Test
  26. void testScanner() throws FileNotFoundException {
  27. int count = 0;
  28. int sum = 0;
  29. FileInputStream file = new FileInputStream("student.txt");
  30. try(Scanner sc = new Scanner(file)) {
  31. while(sc.hasNextLine()) {
  32. count += 1;
  33. sum += Integer.parseInt(sc.nextLine());
  34. }
  35. }
  36. System.out.printf("testScanner: count = %d, sum = %d, avg = %.5f\n", count, sum, sum * 1.0 / count);
  37. }
  38. }


  • 在JUint中进行测试代码时,对测试的方法要加上@Test,否则会发生错误。
  • 格式化输出的format方法基本上和printf一致。
  • 测试代码效率时,应设置基本相同的代码,以确保测试变量的单一性。
  • Scanner的方法中,hashNextXXX应该与nextXXXX对应使用,否则会提示错误信息。

3. 字节流之对象流


  1. package experiment.io;
  2. import java.io.FileInputStream;
  3. import java.io.FileNotFoundException;
  4. import java.io.FileOutputStream;
  5. import java.io.IOException;
  6. import java.io.ObjectInputStream;
  7. import java.io.ObjectOutputStream;
  8. import java.util.ArrayList;
  9. import java.util.List;
  10. public class ExperimentThird {
  11. /**
  12. * 通过ObjectOutputStream和FileOutputStream将Student对象写出到文件中。
  13. * @param stuList 代表要写入文件中的若干个Student数据。
  14. * @throws IOException
  15. */
  16. public static void writeStudent(List<Object> stuList) throws IOException {
  17. FileOutputStream file = new FileOutputStream("student.txt");
  18. Student[] students = new Student[stuList.size()];
  19. for (int i = 0; i < students.length; i++) {
  20. students[i] = (Student)stuList.get(i);
  21. }
  22. try (ObjectOutputStream out = new ObjectOutputStream(file)) {
  23. out.writeObject(students);
  24. }
  25. }
  26. /**
  27. * 通过ObjectInOutputStream和FileInputStream将Student对象读入到变量中。
  28. * @param fileName 表示要访问的文件名
  29. * @return 一个带有若干个Student对象的List
  30. * @throws FileNotFoundException
  31. * @throws ClassNotFoundException
  32. */
  33. public static List<Object> readStudents(String fileName) throws FileNotFoundException, ClassNotFoundException {
  34. List<Object> newStuList = new ArrayList<>();
  35. FileInputStream file = new FileInputStream(fileName);
  36. Student[] students = null;
  37. try (ObjectInputStream in = new ObjectInputStream(file)) {
  38. students = (Student[])in.readObject();
  39. } catch (IOException e) {
  40. e.printStackTrace();
  41. }
  42. for (Student st : students) {
  43. newStuList.add(st);
  44. }
  45. return newStuList;
  46. }
  47. public static void main(String[] args) throws IOException, ClassNotFoundException {
  48. List<Object> stuList = new ArrayList<>();
  49. Student st1 = new Student(1, "a", 18, 10);
  50. Student st2 = new Student(2, "b", 19, 11);
  51. Student st3 = new Student(3, "c", 20, 12);
  52. stuList.add(st1);
  53. stuList.add(st2);
  54. stuList.add(st3);
  55. ExperimentThird.writeStudent(stuList);
  56. List<Object> newStuList = ExperimentThird.readStudents("student.txt");
  57. for (Object e : newStuList) {
  58. System.out.println(e);
  59. }
  60. }
  61. }


  • 使用ObjectInputStream和ObjectOutputStream读写文件时,读写的对象对应的那个类应该进行序列化,即实现Serializable接口。上面代码中的Student类就实现了该接口。
  • 序列化的作用就是为了保存各种对象的状态在内存中,并且可以把保存的对象状态再读出来。且序列化时只对对象进行保存,不管对象的方法。
  • 在读写序列化后的对象时,可以通过writeObject和readObject的方法读取一个数组,如例子中的Student[] students 数组。

5. 文件操作


  1. package experiment.io;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import java.nio.file.Path;
  5. import java.nio.file.Paths;
  6. import java.util.Scanner;
  7. /**
  8. * @version 1.00 05 Sep 1997
  9. * @author Gary Cornell
  10. */
  11. public class ExperimentFifth {
  12. public static void findFile(Path dir,String fileName) {
  13. try {
  14. File pathName = new File(dir.toString()); //将路径转化为String形式,传入pathName中
  15. String[] fileNames = pathName.list(); // 获得该路径下所有目录
  16. for (int i = 0; i < fileNames.length; i++) { // 遍历这些目录
  17. File f = new File(pathName.getPath(), fileNames[i]);
  18. if(fileNames[i].contains(fileName)) {
  19. System.out.println(f.getCanonicalPath());
  20. }
  21. if (f.isDirectory()) {
  22. findFile(f.toPath(), fileName);
  23. }
  24. }
  25. } catch (IOException e) {
  26. e.printStackTrace();
  27. }
  28. }
  29. public static void main(String[] args) {
  30. Scanner sc = new Scanner(System.in);
  31. System.out.println("输入根路径信息:");
  32. String path = sc.nextLine();
  33. Path dir = Paths.get(path);
  34. System.out.println("输入要查找的文件名:");
  35. String fileName = sc.nextLine();
  36. ExperimentFifth.findFile(dir, fileName);
  37. sc.close();
  38. }
  39. }


  1. 输入根路径信息:
  2. G:\\eclipse-workspace
  3. 输入要查找的文件名:
  4. Experiment
  5. G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\Experiment (1).launch
  6. G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\Experiment.launch
  7. G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentFifth.launch
  8. G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentFirst.launch
  9. G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentSecond.launch
  10. G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentSixth.launch
  11. G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentThird.launch
  12. G:\eclipse-workspace\IO\bin\experiment\io\Experiment.class
  13. G:\eclipse-workspace\IO\bin\experiment\io\ExperimentFifth.class
  14. G:\eclipse-workspace\IO\bin\experiment\io\ExperimentFirst.class
  15. G:\eclipse-workspace\IO\bin\experiment\io\ExperimentSecond.class
  16. G:\eclipse-workspace\IO\bin\experiment\io\ExperimentSixth.class
  17. G:\eclipse-workspace\IO\bin\experiment\io\ExperimentThird.class
  18. G:\eclipse-workspace\IO\src\experiment\io\Experiment.java
  19. G:\eclipse-workspace\IO\src\experiment\io\ExperimentFifth.java
  20. G:\eclipse-workspace\IO\src\experiment\io\ExperimentFirst.java
  21. G:\eclipse-workspace\IO\src\experiment\io\ExperimentSecond.java
  22. G:\eclipse-workspace\IO\src\experiment\io\ExperimentSixth.java
  23. G:\eclipse-workspace\IO\src\experiment\io\ExperimentThird.java
  24. G:\eclipse-workspace\javaLearn\bin\ExperimentSix
  25. G:\eclipse-workspace\javaLearn\src\ExperimentSix


  • 该查找路径的方法是通过递归实现的,首先获得一个目录下的所有文件目录,存入数组中,再对数组中的每个数据遍历;如果发现包含fileName关键字,则输出其路径;如果发现是一个文件,则递归继续寻找该文件下的目录,以此类推。
  • 这里要注意的一点就是通过File中的list()方法来获得目录底下的所有文件或文本字符信息。

6. 正则表达式


  1. package experiment.io;
  2. import java.io.*;
  3. import java.net.*;
  4. import java.util.regex.*;
  5. /**
  6. * This program displays all URLs in a web page by matching a regular expression
  7. * that describes the <a href=...> HTML tag. Start the program as <br>
  8. * java HrefMatch URL
  9. *
  10. * @version 1.01 2004-06-04
  11. * @author Cay Horstmann
  12. */
  13. public class ExperimentSixth {
  14. public static void main(String[] args) {
  15. try {
  16. // get URL string from command line or use default
  17. String urlString;
  18. if (args.length > 0)
  19. urlString = args[0];
  20. else
  21. urlString = "http://cec.jmu.edu.cn/index.jsp";
  22. // open reader for URL
  23. InputStreamReader in = new InputStreamReader(new URL(urlString).openStream());
  24. // InputStreamReader in = new InputStreamReader(new
  25. // FileInputStream("集美大学-计算机工程学院.htm"));
  26. // read contents into string builder
  27. StringBuilder input = new StringBuilder();
  28. int ch;
  29. while ((ch = in.read()) != -1)
  30. input.append((char) ch);
  31. // search for all occurrences of pattern
  32. String patternString = "<a\\s+href\\s*=\\s*(\"[^\"]*\"|[^\\s>]*)\\s*>";
  33. String patternImgString = "[+-]?[0-9]+"; // 匹配所有数字字符串
  34. String patternChineseString = "[\u4e00-\u9fa5]"; // 匹配文档中的所有中文
  35. String patternPictureString = "img(.*?)(src=)(.*?)(jpg|gif)\"";// 匹配所有图片
  36. Pattern pattern = Pattern.compile(patternPictureString, Pattern.CASE_INSENSITIVE);
  37. Matcher matcher = pattern.matcher(input);
  38. while (matcher.find()) {
  39. int start = matcher.start();
  40. int end = matcher.end();
  41. String match = input.substring(start, end);
  42. System.out.println(match);
  43. }
  44. } catch (IOException e) {
  45. e.printStackTrace();
  46. } catch (PatternSyntaxException e) {
  47. e.printStackTrace();
  48. }
  49. }
  50. }


  1. IMG src="images/jimei12.jpg"
  2. IMG src="images/1_dh_01.gif"
  3. IMG src="images/1_dh_03.gif"
  4. IMG src="images/1_dhs_01.gif"
  5. IMG src="images/1_dhs_03.gif"
  6. IMG src="images/1_body_01.gif"
  7. IMG border="0" src="images/1_more.gif"
  8. IMG src="images/1_huandeng_01.gif"
  9. imgdiv" style="padding:0px;border:0px;"><a id="u_u2_url" target="_blank"><img id="u_u2_pic" border=0 src="/system/resource/images/space.gif"
  10. IMG border="0" src="images/1_more.gif"
  11. IMG src="images/1_ad001.gif"
  12. IMG src="images/1_ico001.gif"
  13. IMG border="0" src="images/1_more.gif"
  14. IMG src="images/1_title_index3.gif"
  15. IMG src="images/1_ico001.gif"
  16. IMG border="0" src="images/1_more.gif"
  17. IMG src="images/1_title_index3.gif"
  18. IMG src="images/1_ico001.gif"
  19. IMG border="0" src="images/1_more.gif"
  20. IMG src="images/1_title_index3.gif"
  21. IMG src="images/1_ico001.gif"
  22. IMG border="0" src="images/1_more.gif"
  23. IMG src="images/1_title_index3.gif"
  24. IMG border="0" src="images/1_more.gif"
  25. IMG border="0" src="images/1_more.gif"
  26. IMG src="images/1_list_body_bg02.gif"
  27. IMG src="images/1_yqlj_1.gif"
  28. IMG src="images/1_yqlj_3.gif"


  1. img(.*?)(src=)(.*?)(jpg|gif)\"
  • 如上代码为匹配图片的正则表达式。首先img匹配以img或IMG开头的数据;(.*?)任意字符尽可能少的匹配;(src=)匹配一次src=;接下来括号信息说明同上;(jpg|gif)是匹配一次jpg或gif信息;\“ 即匹配一次双引号。
  1. [+-]?[0-9]+
  • 上述代码匹配所以数字字符串。[+-]?表示匹配0次或一次+-中的一个;[0-9]+表示匹配1次或多次数字字符信息。


