package com.opslab.util;

import java.io.UnsupportedEncodingException;

/**
* Various character and character sequence utilities, including <code>char[]</code> - <code>byte[]</code> conversions.
*/
public final class CharUtil {

/**
* Converts (signed) byte to (unsigned) char.
*/
public static char toChar(byte b) {
return (char) (b & 0xFF);
}

/**
* Converts char array into byte array by stripping the high byte of each character.
*/
public final static byte[] toSimpleByteArray(char[] carr) {
byte[] barr = new byte[carr.length];
for (int i = 0; i < carr.length; i++) {
barr[i] = (byte) carr[i];
}
return barr;
}

/**
* Converts char sequence into byte array.
*
* @see #toSimpleByteArray(char[])
*/
public final static byte[] toSimpleByteArray(CharSequence charSequence) {
byte[] barr = new byte[charSequence.length()];
for (int i = 0; i < barr.length; i++) {
barr[i] = (byte) charSequence.charAt(i);
}
return barr;
}

// ---------------------------------------------------------------- ascii

/**
* Converts byte array to char array by simply extending bytes to chars.
*/
public final static char[] toSimpleCharArray(byte[] barr) {
char[] carr = new char[barr.length];
for (int i = 0; i < barr.length; i++) {
carr[i] = (char) (barr[i] & 0xFF);
}
return carr;
}

/**
* Returns ASCII value of a char. In case of overload, 0x3F is returned.
*/
public final static int toAscii(char c) {
if (c <= 0xFF) {
return c;
} else {
return 0x3F;
}
}

/**
* Converts char array into {@link #toAscii(char) ASCII} array.
*/
public final static byte[] toAsciiByteArray(char[] carr) {
byte[] barr = new byte[carr.length];
for (int i = 0; i < carr.length; i++) {
barr[i] = (byte) ((int) (carr[i] <= 0xFF ? carr[i] : 0x3F));
}
return barr;
}

// ---------------------------------------------------------------- raw arrays

/**
* Converts char sequence into ASCII byte array.
*/
public final static byte[] toAsciiByteArray(CharSequence charSequence) {
byte[] barr = new byte[charSequence.length()];
for (int i = 0; i < barr.length; i++) {
char c = charSequence.charAt(i);
barr[i] = (byte) ((int) (c <= 0xFF ? c : 0x3F));
}
return barr;
}

/**
* Converts char array into byte array by replacing each character with two bytes.
*/
public final static byte[] toRawByteArray(char[] carr) {
byte[] barr = new byte[carr.length << 1];
for (int i = 0, bpos = 0; i < carr.length; i++) {
char c = carr[i];
barr[bpos++] = (byte) ((c & 0xFF00) >> 8);
barr[bpos++] = (byte) (c & 0x00FF);
}
return barr;
}

// ---------------------------------------------------------------- encoding

public final static char[] toRawCharArray(byte[] barr) {
int carrLen = barr.length >> 1;
if (carrLen << 1 < barr.length) {
carrLen++;
}
char[] carr = new char[carrLen];
int i = 0, j = 0;
while (i < barr.length) {
char c = (char) (barr[i] << 8);
i++;

if (i != barr.length) {
c += barr[i] & 0xFF;
i++;
}
carr[j++] = c;
}
return carr;
}

/**
* Converts char array to byte array using default Jodd encoding.
*/
public final static byte[] toByteArray(char[] carr) throws UnsupportedEncodingException {
return new String(carr).getBytes(CharsetUtil.UTF_8);
}

/**
* Converts char array to byte array using provided encoding.
*/
public final static byte[] toByteArray(char[] carr, String charset) throws UnsupportedEncodingException {
return new String(carr).getBytes(charset);
}

/**
* Converts byte array of default Jodd encoding to char array.
*/
public final static char[] toCharArray(byte[] barr) throws UnsupportedEncodingException {
return new String(barr, CharsetUtil.UTF_8).toCharArray();
}

// ---------------------------------------------------------------- find

/**
* Converts byte array of specific encoding to char array.
*/
public final static char[] toCharArray(byte[] barr, String charset) throws UnsupportedEncodingException {
return new String(barr, charset).toCharArray();
}

/**
* Match if one character equals to any of the given character.
*
* @return <code>true</code> if characters match any character from given array,
* otherwise <code>false</code>
*/
public final static boolean equalsOne(char c, char[] match) {
for (char aMatch : match) {
if (c == aMatch) {
return true;
}
}
return false;
}

/**
* Finds index of the first character in given array the matches any from the
* given set of characters.
*
* @return index of matched character or -1
*/
public final static int findFirstEqual(char[] source, int index, char[] match) {
for (int i = index; i < source.length; i++) {
if (equalsOne(source[i], match) == true) {
return i;
}
}
return -1;
}

/**
* Finds index of the first character in given array the matches any from the
* given set of characters.
*
* @return index of matched character or -1
*/
public final static int findFirstEqual(char[] source, int index, char match) {
for (int i = index; i < source.length; i++) {
if (source[i] == match) {
return i;
}
}
return -1;
}

/**
* Finds index of the first character in given array the differs from the
* given set of characters.
*
* @return index of matched character or -1
*/
public final static int findFirstDiff(char[] source, int index, char[] match) {
for (int i = index; i < source.length; i++) {
if (equalsOne(source[i], match) == false) {
return i;
}
}
return -1;
}

/**
* Finds index of the first character in given array the differs from the
* given set of characters.
*
* @return index of matched character or -1
*/
public final static int findFirstDiff(char[] source, int index, char match) {
for (int i = index; i < source.length; i++) {
if (source[i] != match) {
return i;
}
}
return -1;
}

/**
* Returns <code>true</code> if character is a white space ({@code <= ' '}).
* White space definition is taken from String class (see: <code>trim()</code>).
*/
public final static boolean isWhitespace(char c) {
return c <= ' ';
}

/**
* Returns <code>true</code> if specified character is lowercase ASCII.
* If user uses only ASCIIs, it is much much faster.
*/
public final static boolean isLowercaseAlpha(char c) {
return (c >= 'a') && (c <= 'z');
}

/**
* Returns <code>true</code> if specified character is uppercase ASCII.
* If user uses only ASCIIs, it is much much faster.
*/
public final static boolean isUppercaseAlpha(char c) {
return (c >= 'A') && (c <= 'Z');
}

public final static boolean isAlphaOrDigit(char c) {
return isDigit(c) || isAlpha(c);
}

public final static boolean isWordChar(char c) {
return isDigit(c) || isAlpha(c) || (c == '_');
}

public final static boolean isPropertyNameChar(char c) {
return isDigit(c) || isAlpha(c) || (c == '_') || (c == '.') || (c == '[') || (c == ']');
}

/**
* Indicates whether the given character is in the {@code ALPHA} set.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a>
*/
public final static boolean isAlpha(char c) {
return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
}

/**
* Indicates whether the given character is in the {@code DIGIT} set.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a>
*/
public final static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

/**
* Indicates whether the given character is the hexadecimal digit.
*/
public final static boolean isHexDigit(char c) {
return (c >= '0' && c <= '9') || ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F'));
}

/**
* Indicates whether the given character is in the <i>gen-delims</i> set.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a>
*/
public final static boolean isGenericDelimiter(int c) {
switch (c) {
case ':':
case '/':
case '?':
case '#':
case '[':
case ']':
case '@':
return true;
default:
return false;
}
}

/**
* Indicates whether the given character is in the <i>sub-delims</i> set.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a>
*/
public final static boolean isSubDelimiter(int c) {
switch (c) {
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return true;
default:
return false;
}
}

/**
* Indicates whether the given character is in the <i>reserved</i> set.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a>
*/
public final static boolean isReserved(char c) {
return isGenericDelimiter(c) || isSubDelimiter(c);
}

/**
* Indicates whether the given character is in the <i>unreserved</i> set.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a>
*/
public final static boolean isUnreserved(char c) {
return isAlpha(c) || isDigit(c) || c == '-' || c == '.' || c == '_' || c == '~';
}

/**
* Indicates whether the given character is in the <i>pchar</i> set.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a>
*/
public final static boolean isPchar(char c) {
return isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@';
}

/**
* Uppers lowercase ASCII char.
*/
public final static char toUpperAscii(char c) {
if (isLowercaseAlpha(c)) {
c -= (char) 0x20;
}
return c;
}

/**
* Lowers uppercase ASCII char.
*/
public final static char toLowerAscii(char c) {
if (isUppercaseAlpha(c)) {
c += (char) 0x20;
}
return c;
}

}

CharUtil的更多相关文章

  1. 《Java学习笔记(第8版)》学习指导

    <Java学习笔记(第8版)>学习指导 目录 图书简况 学习指导 第一章 Java平台概论 第二章 从JDK到IDE 第三章 基础语法 第四章 认识对象 第五章 对象封装 第六章 继承与多 ...

  2. 20145330第六周《Java学习笔记》

    20145330第六周<Java学习笔记> . 这周算是很忙碌的一周.因为第六周陆续很多实验都开始进行,开始要准备和预习的科目日渐增多,对Java分配的时间不知不觉就减少了,然而第十和十一 ...

  3. 20145225《Java程序设计》 第6周学习总结

    20145225<Java程序设计> 第6周学习总结 教材学习内容总结 第十章 输入/输出 10.1InputStream与OutputStream 串流:衔接数据的来源和目的地就是串流对 ...

  4. Java 完美判断中文字符

    Java判断一个字符串是否有中文一般情况是利用Unicode编码(CJK统一汉字的编码区间:0x4e00–0x9fbb)的正则来做判断,但是其实这个区间来判断中文不是非常精确,因为有些中文的标点符号比 ...

  5. 20145235 《Java程序设计》第6周学习总结

    教材学习内容总结 10.1 InputStream与OutputStream 串流设计的概念 Java将输入/输出抽象化为串流,数据有来源及目的地,衔接两者的是串流对象. 从应用程序角度来看,如果要将 ...

  6. 20145207《Java程序设计》第6周学习总结

    教材学习内容总结 一.输入/输出 InputStream与Outputstream • 串流设计的概念 从应用程序角度看,将数据从来源取出,可以使用输入串流,将数据写入目的地,可以使用输出串流:在Ja ...

  7. 《Java程序设计》第六周学习总结

    20145224 <Java程序设计>第六周学习总结 教材学习内容总结 第十章输入和输出 10.1.1 ·若要将数据从来源中取出,可以使用输入串流:若要将数据写入目的地,可以使用输出串流. ...

  8. # 20145210 《Java程序设计》第06周学习总结

    教材学习内容总结 第十章 输入\输出 10.1 InputStream与OutputStream •串流设计的概念 •java将输入\输出抽象化为串流,数据有来源及目的地,衔接两者的是串流对象 •从应 ...

  9. Java 完美判断中文字符的方法

    Java判断一个字符串是否有中文一般情况是利用Unicode编码(CJK统一汉字的编码区间:0x4e00–0x9fbb)的正则来做判断,但是其实这个区间来判断中文不是非常精确,因为有些中文的标点符号比 ...

随机推荐

  1. TortoiseGit-下载安装汉语语言包(汉化-方法)

    TortoiseGit是一款版本控制软件,和git bash是差不多的, 但是TortoiseGit是图形界面,git bash却是命令界面,但是, 我更新了TortoiseGit后,记得明明选择了汉 ...

  2. Sqoop 安装与简单测试

    sqoop基于Hadoop与Hive Hadoop https://www.cnblogs.com/xibuhaohao/p/11772031.html Hive      https://www.c ...

  3. Oracle 解决无法生成Snapshot问题

    1. 概述 Specify the number of days of snapshots to choose from ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ...

  4. 三.cron计划任务

    • 用途:按照设置的时间间隔为用户反复执行某一项固 定的系统任务 • 软件包:cronie.crontabs • 系统服务:crond • 日志文件:/var/log/crond   • 使用 cro ...

  5. 2019暑期金华集训 Day6 杂题选讲

    自闭集训 Day6 杂题选讲 CF round 469 E 发现一个数不可能取两次,因为1,1不如1,2. 发现不可能选一个数的正负,因为1,-1不如1,-2. hihoCoder挑战赛29 D 设\ ...

  6. git的实际工作经验总结

    分支工作的一个较佳的实践, 即git工作的最佳实践 从最初的svn到后来的git,上来给我的感觉就是git更方便, 可以在本地进行版本的提交,回退. 后来对hash有所了解, 知道了git的每个版本其 ...

  7. ubuntu 14.04 系统配置磁盘,CPU,内存,硬盘信息查看

    Linux查看物理CPU个数.核数.逻辑CPU个数# 总核数 = 物理CPU个数 X 每颗物理CPU的核数 # 总逻辑CPU数 = 物理CPU个数 X 每颗物理CPU的核数 X 超线程数 查看分区磁盘 ...

  8. Spring MVC国际化配置

    Spring MVC国际化配置 前言 项目开发中要考虑支持国际化,框架选用的是Spring MVC框架,那么问题来了Spring MVC如何配置并实现国际化. 实现过程(Maven项目) 对于Spri ...

  9. 【随记】Sql Server 2008 R2 备份时“无法打开备份设备”

    如下图所示,在执行SQL一个简单的备份命令时发生下面的错误 可能的原因: 1.文件夹权限问题: 2.Sql Server SQLServer服务器用户策略问题: 问题排查: 1.查看了temp文件夹, ...

  10. python3之线程(一)

    线程的概念 现在的操作系统几乎都支持运行多个任务,而在操作系统内部,一个任务往往代表的执行的某一个程序,也就是运行中的程序,运行的程序是一个动态的概念,也就是所说的进程,而在进程内部,往往有许多顺序执 ...