#include <regex>

 #include <iostream>

 #include <string>

 #include <atlstr.h>

 static std::string U2A(CString cstring)

 {

     if (::IsBadStringPtrW(cstring,-))

         return "";

     int cchStr = ::WideCharToMultiByte(CP_UTF8, , cstring, -, NULL, , NULL, NULL);

     char* pstr = new char[cchStr + ];

     if(pstr != NULL)

         ::WideCharToMultiByte(CP_UTF8, , cstring, -, pstr, cchStr, NULL, NULL);

     pstr[cchStr] = '\0';

     std::string str(pstr);

     delete []pstr;

     return str;

 }

 //判断是否包含空格

 bool IsSpace(const char* str)

 {

     int i;

     for(i=;str[i]!='\0';i++)

     {

         if(isspace(str[i]))

             return true;

     }

     return false;

 }

 //正则表达式-限制输入三位数字

 bool IsNumber( CString cstr )

 {

     std::string strNum = U2A(cstr);

     std::tr1::regex rx("^\\d{1,3}$");

     std::tr1::smatch sm;

     if(std::tr1::regex_match(strNum, sm, rx))

     {

         return true;

     }

     return false;

 }

 //正则表达式-限制输入 中文、字母、数字、_、-、.

 bool IsNum_Letter_Symbol( CString cstr )

 {

     std::wstring wstr = cstr.GetBuffer();

     cstr.ReleaseBuffer();

     //因为中文占两个字节所以下面用宽字节类型函数wregex、wsmatch

     std::tr1::wregex reg(L"^[\\w\\.\\-\\u4e00-\\u9fa5]+$");

     std::tr1::wsmatch sm;

     if(std::tr1::regex_match(wstr, sm, reg))

     {

         return true;

     }

     return false;

 }

 int main()

 {

     //regular expression

     const std::tr1::regex pattern("(\\w+day)");//正则表达式

     //the source text

     std::string weekend = "Saturday and Sunday"; //要匹配的字符

     std::tr1::smatch result;//匹配的结果

     bool match = std::tr1::regex_search(weekend, result, pattern);

     if (match)

     {

         for (size_t i = ; i < result.size(); ++i)

         {

             std::cout << result[i] << std::endl;

         }

     }

     std::cout << std::endl;

     return ;

 }

转自http://www.cnblogs.com/zhuyp1015/archive/2012/04/08/2438215.html

C++11 正则表达式——基础知识介绍

C++11开始支持正则表达式，使得处理文本更加简洁方便。C++11 支持六种正则表达式语法：ECMAScript, basic(POSIX Basic Regular Expressions), extended(POSIX Extended Regular Expressions ), awk(POSIX awk) , grep(POSIX grep ), egrep(POSIX grep –E)。其中ECMAScript最为强大。

闲话不多说，首先来看正则表达式有哪些基本类型。

basic_regex: 这是一个包含一个正则表达式的模板类。通常有两种特化方式：

a) typedef basic_regex<char> regex;

b) typedef basic_regex<wchar_t> wregex;

2. match_results: 这个类包含了与给定正则表达式匹配的序列。当empty()成员返回true或者size()成员返回0，表明没有找到匹配项。否则，当empty()返回false，size()返回值>=1 表明发生了匹配。此外：match[0]: 代表整个匹配序列；match[1]:代表第一个匹配子序列；match[2]: 代表第二个匹配子序列，以此类推。match_results有如下特化方式：

a) typedef match_results<const char*> cmatch;

b) typedef match_results<const wchar_t*> wcmatch;

c) typedef match_results<string::const_iterator> smatch;

d) typedef match_results<wstring::const_iterator> wsmatch;

3. sub_match: 该模板类用来表示与一个已标记的子表达式匹配的序列。这个匹配是通过一个迭代器对来表示的，该迭代器对表明了已匹配的正则表达式的一个范围。可以特化为下面几种情况：

a) typedef sub_match<const char*> csub_match;

b) typedef sub_match<const wchar_t*> wcsub_match;

c) typedef sub_match<string::const_iterator> ssub_match;

d) typedef sub_match<wstring::const_iterator> wssub_match;

以上介绍了一种常用的类型，叙述可能比较抽象，后面会结合例子来介绍这些类型的用法，还是会比较好理解。

然后来认识一下操作正则表达式的一些常用算法。

template <class charT,class Allocator,class traits >

bool regex_match(

const charT* str,

match_results<const charT*,Allocator>& m,

const basic_regex<charT,traits >& e,

match_flag_type flags = match_default);

regex_match 判断一个正则表达式(参数 e)是否匹配整个字符序列 str. 它主要用于验证文本。注意，这个正则表达式必须匹配被分析串的全部，否则函数返回 false. 如果整个序列被成功匹配，regex_match 返回 True.

template <class traits,class charT>

basic_string<charT> regex_replace(

const basic_string<charT>& s,

const basic_regex<charT,traits >& e,

const basic_string<charT>& fmt,

match_flag_type flags = match_default);

regex_replace 在整个字符序列中查找正则表达式e的所有匹配。这个算法每次成功匹配后，就根据参数fmt对匹配字符串进行格式化。缺省情况下，不匹配的文本不会被修改，即文本会被输出但没有改变。

template <class charT,class Allocator, class traits>
bool regex_search(
    const charT* str,
    match_results<const charT*,Allocator>& m,
    const basic_regex<charT,traits >& e,
    match_flag_type flags = match_default);

regex_search 类似于 regex_match, 但它不要求整个字符序列完全匹配。你可以用 regex_search 来查找输入中的一个子序列，该子序列匹配正则表达式 e.

迭代器介绍：正则表达式迭代器用来遍历这个正则表达式序列，通过一个迭代器区间来表示匹配的区间。

regex_iterator:

a) typedef regex_iterator<const char*> cregex_iterator;

b) typedef regex_iterator<const wchar_t*> wcregex_iterator;

c) typedef regex_iterator<string::const_iterator> sregex_iterator;

d) typedef regex_iterator<wstring::const_iterator> wsregex_iterator;

2. regex_token_iterator:

a) typedef regex_token_iterator<const char*> cregex_token_iterator;

b) typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;

c) typedef regex_token_iterator<string::const_iterator> sregex_token_iterator;

d) typedef regex_token_iterator<wstring::const_iterator> wsregex_token_iterator;

C++11 正则表达式——实例1

该实例通过一个函数is_email_valid 来检查一个email地址是否是一个正确的格式。如果格式正确则返回true。

#include <regex>

#include <iostream>

#include <string>

bool is_email_valid(const std::string& email)

{

const std::regex pattern("(\\w+)(\\.|_)?(\\w*)@(\\w+)(\\.(\\w+))+");

return std::regex_match(email, pattern);

}

int main()

{

std::string email1 = "marius.bancila@domain.com";

std::string email2 = "mariusbancila@domain.com";

std::string email3 = "marius_b@domain.co.uk";

std::string email4 = "marius@domain";

std::cout << email1 << " : " << (is_email_valid(email1) ?

"valid" : "invalid") << std::endl;

std::cout << email2 << " : " << (is_email_valid(email2) ?

"valid" : "invalid") << std::endl;

std::cout << email3 << " : " << (is_email_valid(email3) ?

"valid" : "invalid") << std::endl;

std::cout << email4 << " : " << (is_email_valid(email4) ?

"valid" : "invalid") << std::endl;

return 0;

}

运行结果

这里对is_email_valid()函数中的正则表达式做一个简短的说明，如果对于正则表示不是很清楚的同学就能很容易理解了。
const std::regex pattern("(\\w+)(\\.|_)?(\\w*)@(\\w+)(\\.(\\w+))+"); 首先注意‘()’表示将正则表达式分成子表达式，每个‘()’之间的内容表示一个子表达式；‘\’是一个转义字符，‘\\’表示扔掉第二个‘\’的转义特性，‘\w+’表示匹配一个或多个单词，‘+’表示重复一次或者多次，因此第一个子表达式的意思就是匹配一个或者多个单词；接着看第二个子表达式，‘|’表示选择，出现‘.’或者‘_’，后面的‘?’表示该子表示出现一次或者零次，因此第二个子表示表示‘.’或‘_’出现不出现都匹配。第三个子表达式表示出现一个单词，‘*’表示任意个字符。后面的子表示根据已经介绍的内容，已经可以容易理解，就不再赘述。通过对正则表达式匹配模式串的分析，可以容易理解运行结果。

下面一个例子通过正则表达式识别和打印IP地址的各个部分：

#include <regex>

#include <iostream>

#include <string>

void show_ip_parts(const std::string& ip)

{

// regular expression with 4 capture groups defined with

// parenthesis (...)

const std::regex pattern("(\\d{1,3}):(\\d{1,3}):(\\d{1,3}):(\\d{1,3})");

// object that will contain the sequence of sub-matches

std:: match_results<std::string::const_iterator> result;

// match the IP address with the regular expression

bool valid = std:: regex_match(ip, result, pattern);

std::cout << ip << " \t: " << (valid ? "valid" : "invalid")

<< std::endl;

// if the IP address matched the regex, then print the parts

if(valid)

{

std::cout << "b1: " << result[1] << std::endl;

std::cout << "b2: " << result[2] << std::endl;

std::cout << "b3: " << result[3] << std::endl;

std::cout << "b4: " << result[4] << std::endl;

}

int main()

{

show_ip_parts("1:22:33:444");

show_ip_parts("1:22:33:4444");

show_ip_parts("100:200");

return 0;

}

运行结果：

是对正则表达式的模式串做一个说明：首先还是通过‘()’将这个串分成几个子表达式，其中\d表示匹配一个数字，{,}表示数字的个数，例如{1,3}可以理解为匹配一个小于1000的数字（1-3位数都符合匹配要求）。

程序中还使用了match_results类，用来保存匹配的每一个子序列。调用regex_match(ip,result,pattern)，表示将ip中与模式串pattern匹配的结果放在result中。

result最后可以通过下标来访问各个匹配的子表达式。

C++11 正则表达式——实例2

下面来介绍和regex_match()很像的regex_search()的使用实例，regex_match()要求正则表达式必须与模式串完全匹配，regex_search()只要求存在匹配项就可以。

#include <regex>

#include <iostream>

#include <string>

int main()

{

const std::tr1::regex pattern("(\\w+day)");

// the source text

std::string weekend = "Saturday and Sunday";

std::smatch result;

bool match = std::regex_search(weekend, result, pattern);

if(match)

{

for(size_t i = 1; i < result.size(); ++i)

{

std::cout << result[i] << std::endl;

}

std::cout<<std::endl;

return 0;

}

运行结果：

上面这个例子只能返回第一个匹配的项，如果要返回所有匹配的子序列，可以使用下面的方式：

#include <regex>

#include <iostream>

#include <string>

int main()

{

// regular expression

const std::regex pattern("\\w+day");

// the source text

std::string weekend = "Saturday and Sunday, but some Fridays also.";

const std::sregex_token_iterator end; //需要注意一下这里

for (std::sregex_token_iterator i(weekend.begin(),weekend.end(), pattern); i != end ; ++i)

{

std::cout << *i << std::endl;

}

std::cout<<std::endl;

return 0;

}

运行结果：

下面的例子将元音字母打头的单词前面的a替换为an：

#include <regex>

#include <iostream>

#include <string>

int main()

{

// text to transform

std::string text = "This is a element and this a unique ID.";

// regular expression with two capture groups

const std::regex pattern("(\\ba (a|e|i|u|o))+");

// the pattern for the transformation, using the second

// capture group

std::string replace = "an $2";

std::string newtext = std::regex_replace(text, pattern, replace);

std::cout << newtext << std::endl;

std::cout << std::endl;

return 0;

}

运行结果：

还是来说明一下，这里主要使用了regex_replace(text, pattern, replace)，意思是将text的内容按照pattern进行匹配，匹配成功的使用replace串进行替换，并将替换后的结果作为函数值返回。需要注意的是std::string replace = "an $2"; 这里‘$2’表示模式串的第二个子表达式，

也就是以a,e,i,o,u开头的单词。

C++11 正则表达式——实例3

下面一个例子将进行年月日格式的转换，将DD-MM-YYYY –> YYYY-MM-DD，其中‘.’或者‘/’都能正确识别。

#include <regex>

#include <iostream>

#include <string>

std::string format_date(const std::string& date)

{

// regular expression

const std:: regex pattern("(\\d{1,2})(\\.|-|/)(\\d{1,2})(\\.|-|/)(\\d{4})");

// transformation pattern, reverses the position of all capture groups

std::string replacer = "$5$4$3$2$1";

// apply the tranformation

return std:: regex_replace(date, pattern, replacer);

}

int main()

{

std::string date1 = "1/2/2008";

std::string date2 = "12.08.2008";

std::cout << date1 << " -> " << format_date(date1) << std::endl;

std::cout << date2 << " -> " << format_date(date2) << std::endl;

std::cout << std::endl;

return 0;

}

运行结果：

说明，这个例子也很有实用价值，这里用到的正则表达式的匹配模式前面都已经进行过说明就不在分析。

相信通过以上例子，对正则表达式的运用已经有了一个不错的了解，下面再来添加一个实例，加深一下理解。

下面一个例子用来查找给定文本中new的个数和delete的个数是否相等：

#include <iostream>

#include <string>

#include <regex>

int main() {

// "new" and "delete" 出现的次数是否一样？

std::regex reg("(new)|(delete)");

std::smatch m;

std::string s=

"Calls to new must be followed by delete. \

Calling simply new results in a leak!";

int new_counter=0;

int delete_counter=0;

std::string::const_iterator it=s.begin();

std::string::const_iterator end=s.end();

while (std::regex_search(it,end,m,reg))

{

// 是 new 还是 delete?

m[1].matched ? ++new_counter : ++delete_counter;

it=m[0].second;

}

if (new_counter!=delete_counter)

std::cout << "Leak detected!\n";

else

std::cout << "Seems ok...\n";

std::cout << std::endl;

}

运行结果：

运行结果表明，new和delete的数量不相等，也就是发生了“内存泄露”。

为了帮助理解，上面对于match_results类型的下标操作的意义，请看ISOIEC14882 C++11的说明：

#include <iostream>

#include <string>

#include <regex>

using namespace std;

class regex_callback {

int sum_;

public:

regex_callback() : sum_(0) {}

template <typename T> void operator()(const T& what) {

sum_+=atoi(what[1].str().c_str());

}

int sum() const {

return sum_;

}

};

int main() {

regex reg("(\\d+),?");

string s="1,1,2,3,5,8,13,21";

sregex_iterator it(s.begin(),s.end(),reg);

sregex_iterator end;

regex_callback c;

int sum=for_each(it,end,c).sum();//for_each返回的是这个函数对象，因此可以调用sum

cout<<sum<<endl;

cout<<endl;

}

运行结果：

#include <iostream>

#include <string>

#include <regex>

using namespace std;

int main()

{

regex reg("/");

vector<std::string> vec;

string s="Split/Vulue/Teather/Neusoft/Write/By/Lanwei";

sregex_token_iterator it(s.begin(),s.end(),reg,-1);//// -1逆向匹配,就是匹配除了'/'之外的

sregex_token_iterator end ;

while(it!=end)

vec.push_back(*it++);

copy(vec.begin(),vec.end(),ostream_iterator<std::string>( cout,"\n"));

}

运行结果：

C++11-新增正则表达式的更多相关文章

Python for Informatics 第11章正则表达式二（译）
注:文章原文为Dr. Charles Severance 的 <Python for Informatics>.文中代码用3.4版改写,并在本机测试通过. 11.1 正则表达式的字符匹配 ...
hive 0.10 0.11新增特性综述
我们的hive版本升迁经历了0.7.1 -> 0.8.1 -> 0.9.0,并且线上shark所依赖的hive版本也停留在0.9.0上,在这些版本上有我们自己的bug fix patch和 ...
Java SE 11 新增特性
Java SE 11 新增特性作者:Grey 原文地址:Java SE 11 新增特性源码源仓库: Github:java_new_features 镜像仓库: GitCode:java_new ...
[JS]笔记11之正则表达式
-->什么是正则表达式-->定义正则-->正则的索引-->元字符-->方括号.量词.其他符号-->RegExp 对象的方法-->String 对象方法 1.定 ...
Python for Informatics 第11章正则表达式六（译）
注:文章原文为Dr. Charles Severance 的 <Python for Informatics>.文中代码用3.4版改写,并在本机测试通过. 11.7 调试 Python有一 ...
Python for Informatics 第11章正则表达式五（译）
注:文章原文为Dr. Charles Severance 的 <Python for Informatics>.文中代码用3.4版改写,并在本机测试通过. 11.4 转义字符之前我们在正 ...
Python for Informatics 第11章正则表达式四（译）
注:文章原文为Dr. Charles Severance 的 <Python for Informatics>.文中代码用3.4版改写,并在本机测试通过. 11.3 组合查询和抽取如果我 ...
Python for Informatics 第11章正则表达式三（译）
注:文章原文为Dr. Charles Severance 的 <Python for Informatics>.文中代码用3.4版改写,并在本机测试通过. 11.2 用正则表达式抽取数据 ...
c++11新增的一些便利的算法
c++11新增加了一些便利的算法,这些新增的算法使我们的代码写起来更简洁方便,这里仅仅列举一些常用的新增算法,算是做个总结,更多的新增算法读者可以参考http://en.cppreference.co ...
C++11新增容器以及元组
上次说了C++11的部分新特性,这里我们来说说新增的容器. unordered_map unordered_set unordered_multimap unordered_multiset arra ...

随机推荐

xml大项目，增删改查
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; usin ...
BusyBox inittab
# /etc/inittab init(8) configuration for BusyBox## Copyright (C) 1999-2004 by Erik Andersen <ande ...
第一次使用bootstrap3做的响应式网站
第一次使用bootstrap3,发现对移动支持得不错,可以很快的开发出一个支持移动和PC端的网站作为一个后台程序员觉得得界面做得还可以, 按以前是只能自己看看了时间线来自国外网站,使用到的css如 ...
在linux上安装redmine
Redmine 是一个开源的.基于Web的项目管理和缺陷跟踪工具.它用日历和甘特图辅助项目及进度可视化显示.同时它又支持多项目管理.Redmine是一个自由开放源码软件解决方案,它提供集成的项目管理 ...
Android Studio：Multiple dex files define Landroid/support/annotation/AnimRes
近期真的比較忙,一不小心博客又荒了两个月. 从今天起.决定重返csdn,多多纪录和分享. 先从一个近期被折磨的死去活来的问题. 由于升级了V4包,就一直报这个问题: com.android.dex.D ...
跑在Docker下的RHEL7编译Java8源码包
1.运行Docker时需要加参数--cap-add=SYS_PTRACE,比如: docker run --cap-add=SYS_PTRACE --name buildjava8 -v /opt/r ...
Namenode HA原理详解
社区hadoop2.2.0 release版本开始支持NameNode的HA,本文将详细描述NameNode HA内部的设计与实现. 为什么要Namenode HA? 1. NameNode High ...
终于想明白一些事，关于NAS
一直以来想搞好一部NAS存储小孩的视频和照片,一直纠结用什么硬件,硬件解决后虽然不甚满意,不过无论怎么样都算投入巨资(超过7千……)组装完毕,然后就一直纠结用什么NAS系统,终于下定决心使用了OMV, ...
Hibernate--关系映射和关联关系的CRUD
Spring 4 官方文档学习（十一）Web MVC 框架
介绍Spring Web MVC 框架 Spring Web MVC的特性其他MVC实现的可插拔性 DispatcherServlet 在WebApplicationContext中的特殊的bean ...

C++11-新增正则表达式

C++11 正则表达式——基础知识介绍

C++11 正则表达式——实例1

C++11 正则表达式——实例2

C++11 正则表达式——实例3

C++11-新增正则表达式的更多相关文章

随机推荐

热门专题