对于文章的字母、单词、短语,(无用词表)的检索Java代码实现
日期:2019.5.9
博客期:073
星期四
今天软件工程课上,又做了测试,老师说我们的速度太慢了,实际上我也觉得自己很慢。老师说了这是我们的上一届的大二上半学期学习中的速度,所以呢?意思就是说我们和上一届的学长学姐们相比差的是天与地的距离啊!emmmm......唉~我也承认了!以下是我提交的源码,请各位欣赏,有疑问的话,评论区里见!
源码:
package basic; public class Chara {
public char str;
public int times;
}
char.Chara.java
package basic; import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner; public class Justice {
public List <Chara> list = new ArrayList<Chara>();
public int length = 0;
public boolean isAccess(char c){
return ((c>='a'&&c<='z')||(c>='A'&&c<='Z'));
}
public double p(char c){
int seat = (changeToSmall(c)-'a');
int time = list.get(seat).times;
return (double)((double)time/(double)length);
}
public char changeToSmall(char c){
if(c>='a'&&c<='z')
return c;
else
return (char)(c-'A'+'a');
}
public void dealFile(String filename) throws Exception{
File f = new File(filename);
if(!f.exists())
{
f.createNewFile();
}
Scanner sc = new Scanner(f);
while(sc.hasNext())
{
String s = sc.next();
for(int t=0;t<s.length();++t)
{
char c = s.charAt(t);
if(this.isAccess(c))
{
int seat = (changeToSmall(c)-'a');
Chara ch = list.get(seat);
ch.times++;
list.set(seat, ch);
++length;
}
}
} sc.close();
}
public void changeChannel(){
int size = list.size();
for(int i=0;i<size;++i)
{
for(int j=0;j<size-1;++j)
{
if(list.get(j).times<list.get(j+1).times)
{
Chara temp = list.get(j);
list.set(j,list.get(j+1));
list.set(j+1, temp);
}
}
}
}
public void display(){
int leng = list.size();
for(int i=0;i<leng;++i)
{
char c = (char)(i+'a');
System.out.println((char)(i+'a')+"出现了"+(list.get(i).times)+"次,它的频率是"+p(c)+"\t");
}
}
public Justice(){
for(char i='a';i<='z';++i)
{
Chara c = new Chara();
c.str = i;
c.times = 0;
list.add(c);
}
}
}
Justice.java
package basic; public class Main {
public static void main(String[] args) throws Exception {
Justice justice = new Justice();
justice.dealFile("txt/piao.txt");
justice.changeChannel();
justice.display();
}
}
Main.java
package more; public class Chara {
public String str = "";
public int times = 0;
public Chara(){ }
public Chara(String str,int times){
this.str = str;
this.times = times;
}
}
word.Chara.java
package more; import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner; public class EditTable{ public List<Chara> list = new ArrayList<Chara>();
public List<String> listString = new ArrayList<String>();
public int length = 0;
public boolean isAddmitted = false;
//单词处理
public String changeWord(String str){
String newStr = str.replace("“","");
newStr = newStr.replace(",","");
newStr = newStr.replace("”","");
newStr = newStr.replace(".","");
newStr = newStr.replace("?","");
newStr = newStr.replace("!","");
newStr = newStr.replace(":","");
return newStr.toLowerCase();
}
public boolean isInUnUseTable(String str){
int leng = listString.size();
for(int i=0;i<leng;++i)
{
if(str.toLowerCase().compareTo(listString.get(i))==0)
return true;
}
return false;
}
//判断是否可以通过
public boolean isAccess(String str){
int leng = str.length();
str = this.changeWord(str); if(isAddmitted)
{
if(isInUnUseTable(str))
return false;
} for(int i=0;i<leng;++i)
{
char c = str.charAt(i);
if(!((c>='a'&&c<='z')||(c>='A'&&c<='Z')))
return false;
}
return true;
}
public boolean isAddmitted() {
return isAddmitted;
}
public void setAddmitted(boolean isAddmitted) {
this.isAddmitted = isAddmitted;
}
//判断位置
public int seatAt(String c){
int leng = list.size();
for(int i=0;i<leng;++i)
{
String trp = list.get(i).str;
if(trp.compareTo(c)==0)
return i;
}
return -1;
}
//判断并添加
public void AddString(String str){
if(isAccess(changeWord(str)))
{
int seat = seatAt(changeWord(str));
if(seat==-1)
{
Chara chara = new Chara(changeWord(str),1);
list.add(chara);
}
else
{
Chara chara = list.get(seat);
chara.times++;
list.set(seat, chara);
}
++length;
}
}
//排序
public void changeChannel(){
int size = list.size();
for(int i=0;i<size;++i)
{
for(int j=0;j<size-1;++j)
{
if(list.get(j).times<list.get(j+1).times)
{
Chara temp = list.get(j);
list.set(j,list.get(j+1));
list.set(j+1, temp);
}
}
}
}
//概率
public double p(String str){
int seat = seatAt(changeWord(str));
if(seat==-1)
return 0;
int time = list.get(seat).times;
return ((double)time/(double)length);
}
//展示
public void display(int seat){
Chara cha = list.get(seat);
System.out.println("单词:"+cha.str+(cha.str.length()<=2?"\t\t":"\t")+"出现次数:"+cha.times+"\t"+"概率为:"+((double) Math.round(p(cha.str) * 10000) / 100)+"%\t");
}
public void Display(int N){
for(int i=0;i<N&&i<list.size();++i)
display(i);
}
public EditTable() throws FileNotFoundException{ Scanner sc = new Scanner(new File("txt/stopword.txt"));
while(sc.hasNext())
{
String str = sc.next();
listString.add(str);
}
}
public EditTable(boolean isAd) throws FileNotFoundException{
this.isAddmitted = isAd;
Scanner sc = new Scanner(new File("txt/stopword.txt"));
while(sc.hasNext())
{
String str = sc.next();
listString.add(str);
}
}
//处理文件
public void DealFile(String fileName) throws Exception{
File f = new File(fileName);
Scanner sc = new Scanner(f);
while(sc.hasNext())
{
String str = sc.next();
AddString(str);
}
sc.close();
} }
EditTable.java
package more; import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner; import more.Chara; public class Table {
public List<Chara> list = new ArrayList<Chara>();
public List<String> listString = new ArrayList<String>();
public int length = 0;
public boolean isAddmitted = false;
//单词处理
public String changeWord(String str){
String newStr = str.replace("“","");
newStr = newStr.replace(",","");
newStr = newStr.replace("”","");
newStr = newStr.replace(".","");
newStr = newStr.replace("?","");
newStr = newStr.replace("!","");
newStr = newStr.replace(":","");
return newStr.toLowerCase();
}
public static boolean isContainSpecifical(String str){
if(str.contains("“"))
return false;
if(str.contains("”"))
return false;
if(str.contains(","))
return false;
if(str.contains("."))
return false;
if(str.contains("?"))
return false;
if(str.contains("!"))
return false;
if(str.contains(":"))
return false;
return true;
}
public boolean isInUnUseTable(String str){
int leng = listString.size();
for(int i=0;i<leng;++i)
{
if(str.toLowerCase().compareTo(listString.get(i))==0)
return true;
}
return false;
}
//判断是否可以通过
public boolean isAccess(String str){
int leng = str.length();
str = this.changeWord(str); if(isAddmitted)
{
if(isInUnUseTable(str))
return false;
} for(int i=0;i<leng;++i)
{
char c = str.charAt(i);
if(!((c>='a'&&c<='z')||(c>='A'&&c<='Z')))
return false;
}
return true;
}
public boolean isAddmitted() {
return isAddmitted;
}
public void setAddmitted(boolean isAddmitted) {
this.isAddmitted = isAddmitted;
}
//判断位置
public int seatAt(String c){
int leng = list.size();
for(int i=0;i<leng;++i)
{
String trp = list.get(i).str;
if(trp.compareTo(c)==0)
return i;
}
return -1;
}
//判断并添加
public void AddString(String str){
if(isAccess(changeWord(str)))
{
int seat = seatAt(changeWord(str));
if(seat==-1)
{
Chara chara = new Chara(changeWord(str),1);
list.add(chara);
}
else
{
Chara chara = list.get(seat);
chara.times++;
list.set(seat, chara);
}
++length;
}
}
//排序
public void changeChannel(){
int size = list.size();
for(int i=0;i<size;++i)
{
for(int j=0;j<size-1;++j)
{
if(list.get(j).times<list.get(j+1).times)
{
Chara temp = list.get(j);
list.set(j,list.get(j+1));
list.set(j+1, temp);
}
}
}
}
//概率
public double p(String str){
int seat = seatAt(changeWord(str));
if(seat==-1)
return 0;
int time = list.get(seat).times;
return ((double)time/(double)length);
}
//展示
public void display(int seat){
Chara cha = list.get(seat);
System.out.println("单词:"+cha.str+(cha.str.length()<=2?"\t\t":"\t")+"出现次数:"+cha.times+"\t"+"概率为:"+((double) Math.round(p(cha.str) * 10000) / 100)+"%\t");
}
public void Display(int N){
for(int i=0;i<N&&i<list.size();++i)
display(i);
}
public Table() throws FileNotFoundException{ Scanner sc = new Scanner(new File("txt/stopword.txt"));
while(sc.hasNext())
{
String str = sc.next();
listString.add(str);
}
}
public Table(boolean isAd) throws FileNotFoundException{
this.isAddmitted = isAd;
Scanner sc = new Scanner(new File("txt/stopword.txt"));
while(sc.hasNext())
{
String str = sc.next();
listString.add(str);
}
}
//读取
public void AddMenu(List <String> sqlList){
int length = sqlList.size();
String ss = "";
if(length!=0)
ss = sqlList.get(length-1);
for(int i=length-2;i>=0;--i)
{
ss = sqlList.get(i) + " " + ss;
AddString(ss);
}
}
//处理文件
public void DealFile(String fileName) throws Exception{
File f = new File(fileName);
Scanner sc = new Scanner(f);
boolean isT = true;
while(sc.hasNext())
{
while(isT&&sc.hasNext())
{
List <String> strSql = new ArrayList<String>();
String str = sc.next();
if(isInUnUseTable(str))
break;
strSql.add(str);
AddMenu(strSql);
if(Table.isContainSpecifical(str))
break;
}
}
sc.close();
}
}
Table.java
package running; import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.Scanner; import more.EditTable;
import more.Table; import basic.Justice; public class TestMain {
//第0步
public static void process0(String file) throws Exception{
Justice justice = new Justice();
justice.dealFile(file);
justice.changeChannel();
justice.display();
}
//第1步
public static void process1(String file) throws Exception{
EditTable table = new EditTable();
table.DealFile(file);
table.changeChannel();
table.Display(99999999);
}
//第2步
public static void process2(String file,int num) throws Exception{
EditTable table = new EditTable();
table.DealFile(file);
table.changeChannel();
table.Display(num);
}
//第3步
public static void process3(String file,int num) throws Exception{
EditTable table = new EditTable(true);
table.DealFile(file);
table.changeChannel();
table.Display(num);
}
//第4步
public static void process4(String file,int num) throws Exception{
Table table = new Table();
table.DealFile(file);
table.changeChannel();
table.Display(999999);
}
//停用词
public static void stopWord(String file) throws Exception{
File f = new File("txt/stopword.txt");
PrintWriter pw = new PrintWriter(new FileWriter(f,true));
pw.println();
pw.println(file);
pw.close();
}
//主
public static void main(String[] args) throws Exception {
Scanner sc = new Scanner(System.in);
boolean isAccess = true;
while(isAccess)
{
System.out.println("===============================================");
System.out.println(" p0 + 文件路径 -----------> 分析字母");
System.out.println(" p1 + 文件路径 -----------> 分析单词");
System.out.println(" p2 + 文件路径 + 前几项 -----------> 分析频率最高的单词");
System.out.println(" p3 + 文件路径 + 前几项 -----------> 分析频率最高的单词(过滤)");
System.out.println(" sw + 单词 -----------> 添加停用词");
System.out.println(" p4 + 文件路径 + 前几项 -----------> 分析短语");
System.out.println(" q -----------> 退出");
System.out.println();
String order = sc.next();
String file = sc.next();
if(order.compareTo("p0")==0)
{
TestMain.process0(file);
}
else if(order.compareTo("p1")==0)
{
TestMain.process1(file);
}
else if(order.compareTo("p2")==0)
{
int num = sc.nextInt();
TestMain.process2(file,num);
}
else if(order.compareTo("p3")==0)
{
int num = sc.nextInt();
TestMain.process3(file,num);
}
else if(order.compareTo("sw")==0)
{
TestMain.stopWord(file);
}
else if(order.compareTo("p4")==0)
{
int num = sc.nextInt();
TestMain.process4(file,num);
}
else if(order.compareTo("q")==0)
{
System.out.println("欢迎下次使用!");
break;
}
System.out.println("===============================================");
}
sc.close();
}
}
TestMain.java
package more; import java.io.File; public class Test {
public static void TestForPath(String path) throws Exception{
File file = new File(path);
if(file.isDirectory())
{
File []fl = file.listFiles();
int leng = fl.length;
for(int i=0;i<leng;++i)
{
String fileName = path+"/"+fl[i].getName();
EditTable table = new EditTable();
System.out.println("====================================================");
System.out.println("文件名称:"+fl[i].getName());
table.DealFile(fileName);
table.changeChannel();
table.Display(40);
}
}
}
public static void main(String[] args) throws Exception {
TestForPath("txt");
}
}
Test.java
package running; import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.Scanner; import more.EditTable;
import more.Table; import basic.Justice; public class TestMain {
//第0步
public static void process0(String file) throws Exception{
Justice justice = new Justice();
justice.dealFile(file);
justice.changeChannel();
justice.display();
}
//第1步
public static void process1(String file) throws Exception{
EditTable table = new EditTable();
table.DealFile(file);
table.changeChannel();
table.Display(99999999);
}
//第2步
public static void process2(String file,int num) throws Exception{
EditTable table = new EditTable();
table.DealFile(file);
table.changeChannel();
table.Display(num);
}
//第3步
public static void process3(String file,int num) throws Exception{
EditTable table = new EditTable(true);
table.DealFile(file);
table.changeChannel();
table.Display(num);
}
//第4步
public static void process4(String file,int num) throws Exception{
Table table = new Table();
table.DealFile(file);
table.changeChannel();
table.Display(999999);
}
//停用词
public static void stopWord(String file) throws Exception{
File f = new File("txt/stopword.txt");
PrintWriter pw = new PrintWriter(new FileWriter(f,true));
pw.println();
pw.println(file);
pw.close();
}
//主
public static void main(String[] args) throws Exception {
Scanner sc = new Scanner(System.in);
boolean isAccess = true;
while(isAccess)
{
System.out.println("===============================================");
System.out.println(" p0 + 文件路径 -----------> 分析字母");
System.out.println(" p1 + 文件路径 -----------> 分析单词");
System.out.println(" p2 + 文件路径 + 前几项 -----------> 分析频率最高的单词");
System.out.println(" p3 + 文件路径 + 前几项 -----------> 分析频率最高的单词(过滤)");
System.out.println(" sw + 单词 -----------> 添加停用词");
System.out.println(" p4 + 文件路径 + 前几项 -----------> 分析短语");
System.out.println(" q -----------> 退出");
System.out.println();
String order = sc.next();
String file = sc.next();
if(order.compareTo("p0")==0)
{
TestMain.process0(file);
}
else if(order.compareTo("p1")==0)
{
TestMain.process1(file);
}
else if(order.compareTo("p2")==0)
{
int num = sc.nextInt();
TestMain.process2(file,num);
}
else if(order.compareTo("p3")==0)
{
int num = sc.nextInt();
TestMain.process3(file,num);
}
else if(order.compareTo("sw")==0)
{
TestMain.stopWord(file);
}
else if(order.compareTo("p4")==0)
{
int num = sc.nextInt();
TestMain.process4(file,num);
}
else if(order.compareTo("q")==0)
{
System.out.println("欢迎下次使用!");
break;
}
System.out.println("===============================================");
}
sc.close();
}
}
TestMain.java
附上截图:
对于文章的字母、单词、短语,(无用词表)的检索Java代码实现的更多相关文章
- 统计英文文章中各单词的频率,打印频率最高的十个单词(C语言实现)
一.程序思路及相关代码 首先打开文件,代码如下 FILE *fp; char fname[10]; printf("请输入要分析的文件名:\n"); scanf("%s ...
- 【原创】怎样才能写出优雅的 Java 代码?这篇文章告诉你答案!
本文已经收录自 JavaGuide (59k+ Star):[Java学习+面试指南] 一份涵盖大部分Java程序员所需要掌握的核心知识. 本文比较简短,基本就是推荐一些对于写好代码非常有用的文章或者 ...
- C++语言,统计一篇英文文章中的单词数(用正则表达式实现)
下面的例子展示了如何在C++11中,利用regex_search()统计一篇英文文章中的单词数: #include <iostream> #include <regex> #i ...
- NLP系列文章:子词嵌入(fastText)的理解!(附代码)
1. 什么是fastText 英语单词通常有其内部结构和形成⽅式.例如,我们可以从"dog""dogs"和"dogcatcher"的字⾯上推 ...
- php文章内容分页并生成相应的htm静态页面代码
代码如下: <?php $url='test.php?1=1'; $contents="fjka;fjsa;#page#批量生成分成文件并且加上分页代码"; $ptext = ...
- java代码把字母转换大小写、、、、
总结:从键盘输入多少次,就用for循环控制.这些需要输入的数据都放在循环内部,否则不会执行多次. package com.aaa; import java.util.Scanner; //大小写字母的 ...
- 务必收藏备用:.net core中通过Json或直接获取图形验证码(数字验证码、字母验证码、混合验证码),有源代码全实战demo(开源代码.net core3.0)
很多人写的博客大家看了会一知半解,不知道怎么用,应该引用什么类库或者代码不全,这样很多小白很是头疼,尤其是尝新技术更是如此.我们这边不止告诉你步骤,而且还提供开源demo.随着时间的推移,我们的dem ...
- 通过递归方法对一个单词所有的组合进行列举(java)
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; public ...
- [转载非常好的文章]JLink+GDBServer调试S3C6410裸板的初始化代码 For OK6410开发板
要调试裸板,有两种初始化方法,一个是用烧好的uboot初始化,再有就是直接用JLink+GDBServer初始化.代码参考了网上的资料,根据手头的OK6410开发板做了修改.整体代码如下: # Con ...
随机推荐
- __str__()方法和__repr__()方法
有时候我们想让屏幕打印的结果不是对象的内存地址,而是它的值或者其他可以自定义的东西,以便更直观地显示对象内容,可以通过在该对象的类中创建或修改__str__()或__repr__()方法来实现(显示对 ...
- 【PAT甲级】1003 Emergency (25 分)(SPFA,DFS)
题意:n个点,m条双向边,每条边给出通过用时,每个点给出点上的人数,给出起点终点,求不同的最短路的数量以及最短路上最多能通过多少人.(N<=500) AAAAAccepted code: #in ...
- [IDEA] Idea复制文件到项目一直updating indices的问题
通常我们在开发JavaWeb项目的时候,都需要先将网页写好,在进行复制到web目录下,如果里面包含了很多的资源文件,就会造成一直updating indices. 方法一: 这是因为项目需要对web目 ...
- golang的io.copy使用
net/http 下载 在golang中,如果我们要下载一个文件,最简单的就是先用http.get()方法创建一个远程的请求后,后面可使用ioutil.WriteFile()等将请求内容直接写到文件中 ...
- springMVC是如何实现参数封装和自动返回Json的
HTTP 请求和响应是基于文本的,意味着浏览器和服务器通过交换原始文本进行通信.但是,使用 Spring,controller 类中的方法返回纯 ‘String’ 类型和域模型(或其他 Java 内建 ...
- asp.net core配置下载文件
asp.net core的wwwroot文件夹下默认时保存静态文件的地方,外面可以直接访问,但是如果是一些无法识别的后缀文件,如(.apk),会报错404 如果想要实现下载这些文件,在配置静态文件中间 ...
- tensorflow中的Fetch、Feed(02-3)
import tensorflow as tf #Fetch概念 在session中同时运行多个op input1=tf.constant(3.0) #constant()是常量不用进行init初始化 ...
- Linux命令:top命令
top命令是Linux下常用的性能分析工具,能够实时显示系统中各个进程的资源占用状况,类似于Windows的任务管理器.下面详细介绍它的使用方法.top是一个动态显示过程,即可以通过用户按键来不断刷新 ...
- Atcoder Grand Contest 037A(贪心,思维)
#include<bits/stdc++.h>using namespace std;string s;char ans[200007][7];char anss[200007][7];i ...
- 列表推导式、生成器表达式以及zip()max()max()/min()sum()sort()map()filter()的用法
列表推导式: 基本格式: variable = [out_exp_res for out_exp in input_list if out_exp == 2] #out_exp_res: 列表生成元素 ...