Java判断中文字符

package com.jsoft.test;

import java.util.regex.Pattern;

/**

 * 判断中文字符

 *

 * @author jim

 * @date 2017-12-22

 */

public class ChineseHelper {

    public static void main(String[] args) {

        // 纯英文

        String s1 = "Hello,Tom.!@#$%^&*()_+-={}|[];':\"?";

        // 纯中文（不含中文标点）

        String s2 = "你好中国";

        // 纯中文（含中文标点）

        String s3 = "你好，中国。《》：“”‘'；（）【】！￥、";

        // 韩文

        String s4 = "한국어난";

        // 日文

        String s5 = "ぎじゅつ";

        // 特殊字符

        String s6 = "��";

        String s7 = "╃";

        String s8 = "╂";

        // 繁体中文

        String s9 = "蒼老師";

        // 1 使用字符范围判断

        System.out.println("s1是否包含中文：" + hasChineseByRange(s1));// false

        System.out.println("s2是否包含中文：" + hasChineseByRange(s2));// true

        System.out.println("s3是否包含中文：" + hasChineseByRange(s3));// true

        System.out.println("s4是否包含中文：" + hasChineseByRange(s4));// false

        System.out.println("s5是否包含中文：" + hasChineseByRange(s5));// false

        System.out.println("s6是否包含中文：" + hasChineseByRange(s6));// false

        System.out.println("s7是否包含中文：" + hasChineseByRange(s7));// false

        System.out.println("s8是否包含中文：" + hasChineseByRange(s8));// false

        System.out.println("s9是否包含中文：" + hasChineseByRange(s9));// true

        System.out.println("-------分割线-------");

        System.out.println("s1是否全是中文：" + isChineseByRange(s1));// false

        System.out.println("s2是否全是中文：" + isChineseByRange(s2));// true

        System.out.println("s3是否全是中文：" + isChineseByRange(s3));// false 中文标点不在范围内

        System.out.println("s4是否全是中文：" + isChineseByRange(s4));// false

        System.out.println("s5是否全是中文：" + isChineseByRange(s5));// false

        System.out.println("s6是否全是中文：" + isChineseByRange(s6));// false

        System.out.println("s7是否全是中文：" + isChineseByRange(s7));// false

        System.out.println("s8是否全是中文：" + isChineseByRange(s8));// false

        System.out.println("s9是否全是中文：" + isChineseByRange(s9));// true

        System.out.println("-------分割线-------");

        // 2 使用字符范围正则判断（结果同1）

        System.out.println("s1是否包含中文：" + hasChineseByReg(s1));// false

        System.out.println("s2是否包含中文：" + hasChineseByReg(s2));// true

        System.out.println("s3是否包含中文：" + hasChineseByReg(s3));// true

        System.out.println("s4是否包含中文：" + hasChineseByReg(s4));// false

        System.out.println("s5是否包含中文：" + hasChineseByReg(s5));// false

        System.out.println("s6是否包含中文：" + hasChineseByReg(s6));// false

        System.out.println("s7是否包含中文：" + hasChineseByReg(s7));// false

        System.out.println("s8是否包含中文：" + hasChineseByReg(s8));// false

        System.out.println("s9是否包含中文：" + hasChineseByReg(s9));// true

        System.out.println("-------分割线-------");

        System.out.println("s1是否全是中文：" + isChineseByReg(s1));// false

        System.out.println("s2是否全是中文：" + isChineseByReg(s2));// true

        System.out.println("s3是否全是中文：" + isChineseByReg(s3));// false 中文标点不在范围内

        System.out.println("s4是否全是中文：" + isChineseByReg(s4));// false

        System.out.println("s5是否全是中文：" + isChineseByReg(s5));// false

        System.out.println("s6是否全是中文：" + isChineseByReg(s6));// false

        System.out.println("s7是否全是中文：" + isChineseByReg(s7));// false

        System.out.println("s8是否全是中文：" + isChineseByReg(s8));// false

        System.out.println("s9是否全是中文：" + isChineseByReg(s9));// true

        System.out.println("-------分割线-------");

        // 3 使用CJK字符集判断

        System.out.println("s1是否包含中文：" + hasChinese(s1));// false

        System.out.println("s2是否包含中文：" + hasChinese(s2));// true

        System.out.println("s3是否包含中文：" + hasChinese(s3));// true

        System.out.println("s4是否包含中文：" + hasChinese(s4));// false

        System.out.println("s5是否包含中文：" + hasChinese(s5));// false

        System.out.println("s6是否包含中文：" + hasChinese(s6));// false

        System.out.println("s7是否包含中文：" + hasChinese(s7));// false

        System.out.println("s8是否包含中文：" + hasChinese(s8));// false

        System.out.println("s9是否包含中文：" + hasChinese(s9));// true

        System.out.println("-------分割线-------");

        System.out.println("s1是否全是中文：" + isChinese(s1));// false

        System.out.println("s2是否全是中文：" + isChinese(s2));// true

        System.out.println("s3是否全是中文：" + isChinese(s3));// true 中文标点也被包含进来

        System.out.println("s4是否全是中文：" + isChinese(s4));// false

        System.out.println("s5是否全是中文：" + isChinese(s5));// false

        System.out.println("s6是否全是中文：" + isChinese(s6));// false

        System.out.println("s7是否全是中文：" + isChinese(s7));// false

        System.out.println("s8是否全是中文：" + isChinese(s8));// false

        System.out.println("s9是否全是中文：" + isChinese(s9));// true

    }

    /**

     * 是否包含中文字符<br>

     * 包含中文标点符号<br>

     *

     * @param str

     * @return

     */

    public static boolean hasChinese(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (isChinese(c)) {

                return true;

            }

        }

        return false;

    }

    /**

     * 是否全是中文字符<br>

     * 包含中文标点符号<br>

     *

     * @param str

     * @return

     */

    public static boolean isChinese(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (!isChinese(c)) {

                return false;

            }

        }

        return true;

    }

    /**

     * 是否是中文字符<br>

     * 包含中文标点符号<br>

     *

     * @param c

     * @return

     */

    private static boolean isChinese(char c) {

        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);

        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D) {

            return true;

        } else if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {

            return true;

        } else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {

            return true;

        }

        return false;

    }

    /**

     * 是否包含汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean hasChineseByReg(String str) {

        if (str == null) {

            return false;

        }

        Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+");

        return pattern.matcher(str).find();

    }

    /**

     * 是否全是汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean isChineseByReg(String str) {

        if (str == null) {

            return false;

        }

        Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+");

        return pattern.matcher(str).matches();

    }

    /**

     * 是否包含汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean hasChineseByRange(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (c >= 0x4E00 && c <= 0x9FBF) {

                return true;

            }

        }

        return false;

    }

    /**

     * 是否全是汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean isChineseByRange(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (c < 0x4E00 || c > 0x9FBF) {

                return false;

            }

        }

        return true;

    }

}

如果仅仅去判断是否是中文，不需判断中文标点的话，推荐使用正则去匹配，可能更高效点。

还有另外一种投机取巧的方法：转int类型，然后try...catch

参考：

http://www.jb51.net/article/79101.htm（以上内容转自此篇文章）

http://blog.csdn.net/h082602/article/details/73251446

http://blog.csdn.net/u011240877/article/details/49907751

http://blog.csdn.net/l1028386804/article/details/43764073

http://blog.csdn.net/qwkxq/article/details/53508736

https://www.cnblogs.com/jinc/archive/2013/02/26/2933766.html

Java判断中文字符的更多相关文章

Java 判断中文字符
Java判断一个字符串中是否有中文字符有两种方法,但是原理都一样,就是通过Unicode编码来判断,因为中文在Unicode中的编码区间为:0x4e00--0x9fa5 第一种: String chi ...
Java判断一个字符是否是数字的几种方法的代码
在工作期间,将写内容过程经常用到的一些内容段做个记录,下面内容是关于Java判断一个字符是否是数字的几种方法的内容,希望能对码农们有好处. public class Test{ public stat ...
JAVA的中文字符乱码问题
来源:http://luzefengoo.blog.163.com/blog/static/1403593882012754428536/ JAVA的中文字符乱码问题一直很让人头疼.特别是在WEB应用 ...
Java 完美判断中文字符
Java判断一个字符串是否有中文一般情况是利用Unicode编码(CJK统一汉字的编码区间:0x4e00–0x9fbb)的正则来做判断,但是其实这个区间来判断中文不是非常精确,因为有些中文的标点符号比 ...
Java 完美判断中文字符的方法
Java判断一个字符串是否有中文一般情况是利用Unicode编码(CJK统一汉字的编码区间:0x4e00–0x9fbb)的正则来做判断,但是其实这个区间来判断中文不是非常精确,因为有些中文的标点符号比 ...
C# 判断中文字符（字符串）
在unicode 字符串中,中文的范围是在4E00..9FFF:CJK Unified Ideographs.通过对字符的unicode编码进行判断来确定字符是否为中文.protected bool ...
python利用utf-8编码判断中文字符
下面这个小工具包含了判断unicode是否是汉字,数字,英文,或者其他字符. 全角符号转半角符号. unicode字符串归一化等工作. 还有一个能处理多音字的汉字转拼音的程序,还在整理中. #!/u ...
MySQL判断中文字符的方法（转）
准备: 2.1.环境 MySQL mysql> SHOW VARIABLES LIKE "%version%"; +-------------------------+--- ...
java 获取中文字符的首字母
原理: GB2312编码中的中文是按照拼音排序的注意: 一些生僻的字无法获得正确的首字母,原因是这些字都是后加入的. import java.io.UnsupportedEncodingExcept ...

随机推荐

一个符号冲突导致的core分析
问题描述: 修改跟踪程序(Trace)支持IPV6时,发现程序启动后正常,但是客户端一旦下发查询条件进行跟踪,Trace程序就直接coredump! (gdb) bt # 0x00007f7dab9e ...
C 语言进阶
清单狂魔,只挖坑不填坑.. 前言最近经常被询问 C 语言相关的问题,突然便也觉得需要思考一下 C 语言的进阶了. 我用 C 语言写过的最大的一个项目,也只是那个贪吃蛇,后来就断断续续地用 Pyth ...
usb host和device的关系-ARM 论坛 - 21ic电子技术论坛
usb host和device的关系疑问1:我们通常所用的u盘应该是usb device吧?我想这个不用多说,呵呵. ===============恩.============== 疑问2:我们通常 ...
VB.NET——报表
在工具箱查找ReportViewer,添加. 选择设计新报表: 排列字段,布局的步骤省略. 完成. 接下来,我们可以更改中文标题,设置背景色等,让界面看起来更美观. 如果需要添加参数,所传递的参数要与 ...
详解Linux运维工程师应具备的十大技能
Linux系统如果是学习可以选用Redhat或CentOS,特别是CentOS在企业中用得最多,当然还会有其它版本的,但学习者还是以这2个版本学习就行,因为这两个版本都是兄弟,没区别的,有空可以再研究 ...
【bzoj3669】[Noi2014]魔法森林 Kruskal+LCT
原文地址:http://www.cnblogs.com/GXZlegend/p/6797748.html 题目描述为了得到书法大家的真传,小E同学下定决心去拜访住在魔法森林中的隐士.魔法森林可以被看 ...
BZOJ1565 [NOI2009]植物大战僵尸【最大权闭合子图 + tarjan缩点(或拓扑)】
题目输入格式输出格式仅包含一个整数,表示可以获得的最大能源收入.注意,你也可以选择不进行任何攻击,这样能源收入为0. 输入样例 3 2 10 0 20 0 -10 0 -5 1 0 0 100 ...
UltraEdit 删除空行
UltraEdit 删除空行数据里有大量的空行,想在UltraEdit里删除,在网上搜了很多方法都不管用,功夫不负有心人,最后终于找到了可用的方法: 搜索—>替换,在“查找什么”里输入:\n( ...
Tomcat学习笔记(二)
Servlet浅析 javax.servlet.Servlet是一个接口,所有的Servlet必须实现接口里面的方法. 该接口在tomcat/bin中的servlet-api.jar包中. Servl ...
DP———2.最大m子序列和
Max Sum Plus Plus Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others ...

Java判断中文字符

Java判断中文字符的更多相关文章

随机推荐

热门专题