https://msdn.microsoft.com/zh-cn/library/system.text.regularexpressions.regex(v=vs.110).aspx

https://github.com/StackExchange/dapper-dot-net

 /// <summary>
///
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void StatsoneForm_Load(object sender, EventArgs e)
{
string s = @"<tr height='19' style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'> <td class='xl67' height='19' style='height:14.25pt;'></td> <td class='xl71' x:num>110000</td> <td class='xl71' x:str>北京市</td> <td class='xl67'></td> <td class='xl70'></td> <td class='xl70'></td> <td class='xl70'></td> <td colspan='3' style='mso-ignore:colspan;'></td> </tr>";
string f = ExtensionPost(s);
MessageBox.Show(f);
string sb = @"<p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110000<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>北京市</span></p>";
string fb = ExtensionPostb(sb);
MessageBox.Show(fb); string strhtml = @"<p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110000<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>北京市</span></p><p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110100<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>市辖区</span></p><p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110101<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>东城区</span></p><p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110102<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>西城区</span></p><p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110105<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>朝阳区</span></p><p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110106<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>丰台区</span></p>";
IEnumerable<AreaHtmlValue> htmlValue = GetRegValue(@"<p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>(?<code>\d+)<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>(?<name>\w*)</span></p>", strhtml);
List<AreaInfo> areaList = (from v in htmlValue
// let name = v.Name.Replace("自治区直辖县级行政区划", "县").Replace("市辖区", "市").Replace("省直辖县级行政区划", "县")
// .Replace("县", "")
select new AreaInfo
{
AreaCode = v.Code.Substring(0, 5),
AreaName = v.Name,
AreaFullName = v.Name,
ParentAreaCode = "0",
ParentId = 0,
CreateTime = DateTime.Now,
AreaYear = 2015
}).ToList(); this.dataGridView1.DataSource = areaList; }
/// <summary>
/// <tr height='19' style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'>
/// <td class='xl67' height='19' style='height:14.25pt;'></td>
/// <td class='xl71' x:num>654326</td>
///<td class='xl71' x:str><span style='mso-spacerun:yes;'>    </span><font class='font3'>吉木乃县</font></td>
///<td class='xl67'></td>
///<td class='xl70'></td>
///<td class='xl70'></td>
///<td class='xl70'></td>
///<td colspan='3' style='mso-ignore:colspan;'></td>
///</tr>
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
static String ExtensionPost(String url)
{ //<tr height='19' style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'> <td class='xl67' height='19' style='height:14.25pt;'></td> <td class='xl71' x:num>110000</td> <td class='xl71' x:str>北京市</td> <td class='xl67'></td> <td class='xl70'></td> <td class='xl70'></td> <td class='xl70'></td> <td colspan='3' style='mso-ignore:colspan;'></td> </tr>
//<p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110000<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'> 北京市</span></p> //<tr height='19' style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'><td class='xl67' height='19' style='height:14.25pt;'></td><td class='xl71' x:num>654326</td><td class='xl71' x:str><span style='mso-spacerun:yes;'>    </span><font class='font3'>吉木乃县</font></td><td class='xl67'></td><td class='xl70'></td><td class='xl70'></td><td class='xl70'></td><td colspan='3' style='mso-ignore:colspan;'></td></tr>
// Regex r = new Regex(@"<tr height='19' style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'><td class='xl67' height='19' style='height:14.25pt;'></td><td class='xl71' x:num>(?<port>\d+)</td><td class='xl71' x:str><span style='mso-spacerun:yes;'>    </span><font class='font3'>(?<proto>\w+)</font></td><td class='xl67'></td><td class='xl70'></td><td class='xl70'></td><td class='xl70'></td><td colspan='3' style='mso-ignore:colspan;'></td></tr>",
Regex r = new Regex(@"<tr height='19' style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'> <td class='xl67' height='19' style='height:14.25pt;'></td> <td class='xl71' x:num>(?<port>\d+)</td> <td class='xl71' x:str>(?<proto>\w+)</td> <td class='xl67'></td> <td class='xl70'></td> <td class='xl70'></td> <td class='xl70'></td> <td colspan='3' style='mso-ignore:colspan;'></td> </tr>",
RegexOptions.Compiled);
return r.Match(url).Result("${proto}${port}");
}
/// <summary>
///
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
static string ExtensionPostb(string url)
{
Regex r = new Regex(@"<p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>(?<port>\d+)<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>(?<proto>\w*)</span></p>",
RegexOptions.Compiled);
return r.Match(url).Result("${proto}${port}");
} /// <summary>
/// http://files2.mca.gov.cn/www/201512/20151224151630189.htm
/// <tr height="19" style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'><td class="xl67" height="19" style='height:14.25pt;'></td><td class="xl71" x:num>110000</td><td class="xl71" x:str>北京市</td><td class="xl67"></td><td class="xl70"></td><td class="xl70"></td><td class="xl70"></td><td colspan="3" style='mso-ignore:colspan;'></td></tr>
/// http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201504/t20150415_712722.html
/// <p class="MsoNormal" style="line-height: 150%"><span lang="EN-US" style="line-height: 150%; font-family: 'Times New Roman', 'serif'; font-size: 12pt">110000<span>  </span></span><span style="line-height: 150%; font-family: 宋体; font-size: 12pt"> 北京市</span></p>
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button1_Click(object sender, EventArgs e)
{
try
{
//1
string url = "http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201504/t20150415_712722.html";
//2
//string url = "http://files2.mca.gov.cn/www/201512/20151224151630189.htm";
// 获取相关HTML块
//IEnumerable<AreaHtmlValue> htmlValue =GetRegValue(@"<tr class='villagetr'><td>(?<code>\d{12})</td><td>(?<type>\d{3})</td><td>(?<name>\w*)</td></tr>",GetHtml(url));
//<p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>110000<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'> 北京市</span></p>
//1
IEnumerable<AreaHtmlValue> htmlValue = GetRegValue(@"<p class='msonormal' style='line-height: 150%'><span lang='en-us' style='line-height: 150%; font-family: 'times new roman', 'serif'; font-size: 12pt'>(?<code>\d+)<span>  </span></span><span style='line-height: 150%; font-family: 宋体; font-size: 12pt'>(?<name>\w*)</span></p>", GetHtml(url));
//2
//IEnumerable<AreaHtmlValue> htmlValue = GetRegValue(@"<tr height='19' style='height:14.25pt;mso-height-source:userset;mso-height-alt:285;'><td class='xl67' height='19' style='height:14.25pt;'></td><td class='xl71' x:num>(?<code>\d+)</td><td class='xl71' x:str><span style='mso-spacerun:yes;'>    </span><font class='font3'>(?<name>\w*)</font></td><td class='xl67'></td><td class='xl70'></td><td class='xl70'></td><td class='xl70'></td><td colspan='3' style='mso-ignore:colspan;'></td></tr>", GetHtml(url));
//this.richTextBox1.Text = GetHtml(url);
// this.textBox1.Text = GetHtml(url);
// this.textBox1.SelectAll();
List<AreaInfo> areaList = (from v in htmlValue
//let name = v.Name.Replace("自治区直辖县级行政区划", "县").Replace("市辖区", "市").Replace("省直辖县级行政区划", "县")
//.Replace("县", "")
select new AreaInfo
{
AreaCode = v.Code.Substring(0, 6),
AreaName = v.Name,
AreaFullName = v.Name,
ParentAreaCode = "0", ParentId = 0,
CreateTime = DateTime.Now,
AreaYear = 2015
}).ToList(); this.dataGridView2.DataSource = areaList; WebClient wc = new WebClient();
string mainData = Encoding.UTF8.GetString(wc.DownloadData(string.Format("http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201504/t20150415_712722.html")));
this.richTextBox2.Text =RemoveScript(RemoveStyle(ReplaceEnter(mainData)));
// this.textBox2.Text = RemoveScript(RemoveStyle(ReplaceEnter(mainData)));
// this.textBox2.SelectAll(); }
catch (Exception ex)
{
ex.Message.ToString();
} }
#region 网页源码 /// <summary>
///
/// </summary>
/// <param name="url"></param>
private static void updowndimg(string url)
{
WebClient client = new WebClient();
string html = client.DownloadString(url);
MatchCollection matches = Regex.Matches(html, "<img\\s*.*src=\"(.+?)\".*/>");
for (int i = 0; i < matches.Count; i++)
{
string img = matches[i].Groups[1].Value.Replace("\"", string.Empty);
img = "url/" + img;
client.DownloadFile(img, @"c:\g\" + Path.GetFileName(img));
Console.WriteLine(img);
}
Console.ReadKey();
}
/// <summary>
/// 例如,Find_po在字开头处查找以"po"开头的字符串:
/// </summary>
static void Find_po()
{
string text = @" I can not find my position in Beijing ";
string pattern = @"\bpo\S*ion\b";
MatchCollection matches = Regex.Matches(text, pattern, RegexOptions.IgnoreCase
| RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture);
WriteMatches(text, matches);
}
/// <summary>
///
/// </summary>
/// <param name="text"></param>
/// <param name="matches"></param>
static void WriteMatches(string text, MatchCollection matches)
{
Console.WriteLine("Original text was: \n\n" + text + "\n");
Console.WriteLine("No. of matches: " + matches.Count);
foreach (Match nextMatch in matches)
{
int Index = nextMatch.Index;
string result = nextMatch.ToString();
int charsBefore = (Index < 5) ? Index : 5;
int fromEnd = text.Length - Index - result.Length;
int charsAfter = (fromEnd < 5) ? fromEnd : 5;
int charsToDisplay = charsBefore + charsAfter + result.Length;
Console.WriteLine("Index: {0}, \tString: {1}, \t{2}", Index, result,
text.Substring(Index - charsBefore, charsToDisplay));
}
}
/// <summary>
/// 如,"http://www.yahoo.com.cn:8080/index.html"将返回"http:8080"。
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
String Extension(String url)
{
Regex r = new Regex(@"^(?<proto>\w+)://[^/]+?(?<port>:\d+)?/",
RegexOptions.Compiled);
return r.Match(url).Result("${proto}${port}");
}
/// <summary>
/// 获取远程网页源码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
private static string GetHtml(string url)
{
try
{
WebRequest webRequest = WebRequest.Create(url);
WebResponse webResponse = webRequest.GetResponse();
Stream reader = webResponse.GetResponseStream(); if (reader != null)
{
var respStreamReader = new StreamReader(reader, Encoding.UTF8); // var cbuffer = new char[1024]; int byteRead = respStreamReader.Read(cbuffer, 0, 256); string strBuff = string.Empty; while (byteRead != 0)
{
var strResp = new string(cbuffer, 0, byteRead); strBuff = strBuff + strResp; byteRead = respStreamReader.Read(cbuffer, 0, 256);
} strBuff = RemoveScript(RemoveStyle(ReplaceEnter(strBuff))); return strBuff;
}
}
catch (Exception)
{
AreaLogHelper.WriteLogFile("【异常URL】" + url);
Console.WriteLine("【异常URL】" + url);
} return string.Empty;
} /// <summary>
/// 替换网页中的换行和引号
/// </summary>
/// <param name="htmlCode">HTML源代码</param>
/// <returns></returns>
private static string ReplaceEnter(string htmlCode)
{
if (string.IsNullOrEmpty(htmlCode))
return string.Empty;
return htmlCode.Replace("\r\n", "").Replace("\"", "'").Replace("\n", "").Replace("\r", "").Replace("   ", "").Replace("  ", "").Replace(" ", "").Replace(" ", "").Replace(" ", "").ToLower();//.Replace("\"", "").Replace(" ", "")
} #region private methods
private static string RemoveComment(string input)
{
string result = input;
//remove comment
result = Regex.Replace(result, @"<!--[^-]*-->", string.Empty, RegexOptions.IgnoreCase);
return result;
}
private static string RemoveStyle(string input)
{
string result = input;
//remove all styles
result = Regex.Replace(result, @"<style[^>]*?>.*?</style>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
return result;
}
private static string RemoveScript(string input)
{
string result = input;
result = Regex.Replace(result, @"<script[^>]*?>.*?</script>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
result = Regex.Replace(result, @"<noscript[^>]*?>.*?</noscript>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline);
return result;
}
private static string RemoveTags(string input)
{
string result = input;
result = result.Replace(" ", " ");
result = result.Replace("", "\"");
result = result.Replace("<", "<");
result = result.Replace(">", ">");
result = result.Replace("&", "&");
result = result.Replace("<br>", "\r\n");
result = Regex.Replace(result, @"<[\s\S]*?>", string.Empty, RegexOptions.IgnoreCase);
return result;
}
#endregion
/// <summary>
/// 执行正则提取出值
/// </summary>
/// <param name="regexString">正则表达式</param>
/// <param name="remoteStr">HtmlCode源代码</param>
/// <returns></returns>
private static IEnumerable<AreaHtmlValue> GetRegValue(string regexString, string remoteStr)
{
var reg = new Regex(regexString, RegexOptions.Compiled);//RegexOptions.Compiled
MatchCollection mc = reg.Matches(remoteStr); return (from Match m in mc
select new AreaHtmlValue
{
Code = m.Groups["code"].Value,
Name = m.Groups["name"].Value,
// Type = m.Groups["type"].Value
}).ToList();
} private class AreaHtmlValue
{
public string Code { get; set; }
public string Name { get; set; } public string Type { get; set; }
} #endregion }

  

IF EXISTS (select * from sysobjects where id = object_id(N'[dbo].AreasList') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
DROP TABLE AreasList
GO
CREATE TABLE [AreasList]
(
[AreaId] INT IDENTITY(1,1) PRIMARY KEY,
[AreaCode] nvarchar(50) NOT NULL,
[AreaName] nvarchar(50) NOT NULL,
[AreaFullName] nvarchar(250) NULL,
[ParentAreaCode] nvarchar(50) NULL,
[ParentId] INT NULL,
[AreaType] nvarchar(10) NULL,
[AreaYear] INT NOT NULL, --年份版本
[CreateTime] datetime NOT NULL DEFAULT (getdate())
)
GO
--描述
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'区域table' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'区域id' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'AreaId'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'区域标识' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'AreaCode'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'区域名称' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'AreaName'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'区域全称' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'AreaFullName'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'父级标识' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'ParentAreaCode'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'父级id' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'ParentId'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'城乡分类 (111:主城区、112:城乡结合区、121:镇中心区、122:镇乡结合区、123:特殊区域、210:乡中心区、220:村庄)' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'AreaType'
GO
--sp_updateextendedproperty
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'版本年份' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'AreaYear'
GO
EXEC sys.sp_updateextendedproperty @name=N'MS_Description', @value=N'版本年份' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'AreaYear'
GO
EXEC sys.sp_addextendedproperty @name=N'MS_Description', @value=N'创建时间' , @level0type=N'SCHEMA',@level0name=N'dbo', @level1type=N'TABLE',@level1name=N'AreasList', @level2type=N'COLUMN',@level2name=N'CreateTime'
GO SELECT * FROM [AreasList]
SELECT TOP 50 * FROM [AreasList]
IF ParentAreaCode='11'
SELECT AreaId FROM AreasList WHERE AreaCode='11'
--ParentId
SELECT * FROM AreasList WHERE ParentAreaCode='11'
---SQL Server遍历表345509 DECLARE @tempId INT,@tempName VARCHAR(100),@tempcount INT,@id INT,@ParentAreaCode VARCHAR(50),@parentId INT,@areaid INT
set @tempcount=(select COUNT(*) from AreasList)
SET @id=709245
WHILE EXISTS ( SELECT AreaId FROM AreasList)
BEGIN
SET ROWCOUNT @id
IF @id=709253
BREAK
SELECT @tempId = AreaId ,
@tempName = AreaName,
@ParentAreaCode=ParentAreaCode
FROM AreasList
SET ROWCOUNT 0
--
SELECT @areaid=AreaId FROM AreasList WHERE AreaCode=@ParentAreaCode
IF @ParentAreaCode IS NOT NULL OR @ParentAreaCode<>''
BEGIN
SET @parentId=@areaid
--更新父节点
UPDATE AreasList SET ParentId=@parentId WHERE AreaId=@tempId
END
PRINT 'Name:----' +CAST(@tempId AS VARCHAR(20))+ @tempName+'parentId'+STR(@parentId)
set @id=@id+1;
END SELECT * FROM dbo.AreasList WHERE ParentAreaCode IS NULL
UPDATE AreasList SET ParentId=null WHERE ParentAreaCode IS NULL UPDATE AreasList SET ParentId=null WHERE AreaId=709250
UPDATE AreasList SET ParentId=null WHERE AreaId=709251
UPDATE AreasList SET ParentId=null WHERE AreaId=709252
--
DECLARE @tempId INT,@tempName VARCHAR(100),@tempcount INT,@id INT
set @tempcount=(select COUNT(*) from AreasList)
SET @id=1
WHILE EXISTS ( SELECT AreaId FROM AreasList)
BEGIN
SET ROWCOUNT @id
SELECT @tempId = AreaId ,
@tempName = AreaName
FROM AreasList
SET ROWCOUNT 0
--delete from @temp where AreaId = @tempId
PRINT 'Name:----' +CAST(@tempId AS VARCHAR(20))+ @tempName
set @id=@id+1;
END --
declare @tempId int,@tempName varchar(10) ,@tempcount int
set @tempcount=(select COUNT(*) from AreasList);
set @tempId=0;
while (@tempId<=@tempcount )
begin
set @tempName=(select AreaName from AreasList where AreaId=@tempId);
print 'the id is :'+str(@tempId) +' the name is : '+@tempName;
set @tempId=@tempId+1;
END --打开游标
declare @tempId int,@tempName varchar(10),@ParentAreaCode VARCHAR(50),@parentId INT,@areaid INT
DECLARE test_Cursor CURSOR LOCAL FOR
SELECT AreaId,AreaName,ParentAreaCode FROM AreasList
OPEN test_Cursor
WHILE @@FETCH_STATUS = 0
BEGIN
FETCH NEXT FROM test_Cursor INTO @tempId,@tempname,@ParentAreaCode
--
IF @ParentAreaCode IS NOT NULL OR @ParentAreaCode<>''
BEGIN
SELECT @areaid=AreaId FROM AreasList WHERE AreaCode=@ParentAreaCode
SET @parentId=@areaid
--更新父节点
--UPDATE AreasList SET ParentId=@parentId WHERE AreaId=@tempId
UPDATE AreasList SET ParentId=@parentId WHERE AreaId=@tempId
END
PRINT 'Name:----' +str(@tempId)+ @tempName+'parentId'+STR(@parentId)
END
CLOSE test_Cursor
DEALLOCATE test_Cursor

  

https://maps.googleapis.com/maps/api/geocode/json?address=Winnetka&key=YOUR_API_KEY
https://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=YOUR_API_KEY
https://maps.googleapis.com/maps/api/geocode/xml?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=YOUR_API_KEY
https://maps.googleapis.com/maps/api/geocode/json?address=Winnetka&bounds=34.172684,-118.604794|34.236144,-118.500938&key=YOUR_API_KEY

http://restapi.amap.com/v3/geocode/geo?parameters
http://restapi.amap.com/v3/geocode/regeo?parameters

http://lbs.amap.com/api/webservice/reference/georegeo/
http://restapi.amap.com/v3/geocode/regeo?output=xml&location=116.310003,39.991957&key=<用户的key>&radius=1000&extensions=all
http://restapi.amap.com/v3/geocode/geo?address=北京市朝阳区阜通东大街6号&output=xml&key=<用户的key>

http://lbsyun.baidu.com/index.php?title=webapi/ip-api
http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-geocoding

http://api.map.baidu.com/geocoder/v2/?ak=<用户的key>&callback=renderReverse&location=39.983424,116.322987&output=json&pois=1
http://api.map.baidu.com/geocoder/v2/?ak=<用户的key>&callback=renderReverse&location=39.983424,116.322987&output=xml&pois=1

https://www.microsoft.com/maps/choose-your-bing-maps-API.aspx

https://developer.yahoo.com/maps/flash/jsGettingStarted.html

http://www.bing.com/dev/en-us/dev-center

https://msdn.microsoft.com/en-us/library/gg427610.aspx

//1.省未显示
List<AreaInfo> areaList2 = (from v in htmlValue1
let name = v.Name.Replace("自治区直辖县级行政区划", "县").Replace("市辖区", "市").Replace("省直辖县级行政区划", "县")
.Replace("县", "")
select new AreaInfo
{
AreaCode = v.Code.Substring(0, 6),
AreaName = v.Name,
AreaFullName = v.Name,
ParentAreaCode = "0",
ParentId = 0,
CreateTime = DateTime.Now,
AreaYear = ye
}).ToList();
//
List<AreaInfo> areaList4 = (from v in htmlValue4
let name = v.Name.Replace("自治区直辖县级行政区划", "县").Replace("市辖区", "市").Replace("省直辖县级行政区划", "县")
.Replace("县", "")
select new AreaInfo
{
AreaCode = v.Code.Substring(0, 6),
AreaName = v.Name,
AreaFullName = v.Name,
ParentAreaCode = "0",
ParentId = 0,
CreateTime = DateTime.Now,
AreaYear = ye
}).ToList();
//
List<AreaInfo> areaList = (from v in htmlValue
let name = v.Name.Replace("自治区直辖县级行政区划", "县").Replace("市辖区", "市").Replace("省直辖县级行政区划", "县")
.Replace("县", "")
select new AreaInfo
{
AreaCode = v.Code.Substring(0, 6),
AreaName = v.Name,
AreaFullName = v.Name,
ParentAreaCode = "0",
ParentId = 0,
CreateTime = DateTime.Now,
AreaYear = ye
}).ToList(); List<AreaInfo> areaList3 = (from v in htmlValue2
let name = v.Name.Replace("自治区直辖县级行政区划", "县").Replace("市辖区", "市").Replace("省直辖县级行政区划", "县")
.Replace("县", "")
select new AreaInfo
{
AreaCode = v.Code.Substring(0, 6),
AreaName = v.Name,
AreaFullName = v.Name,
//
ParentAreaCode = "0",
ParentId = 0,
CreateTime = DateTime.Now,
AreaYear = ye
}).ToList(); //Concat保留重复项 Union 剔除重复项
List<AreaInfo> Result = areaList.Concat(areaList3).ToList <AreaInfo>();
Result = Result.Concat(areaList2).ToList<AreaInfo>();
Result = Result.Concat(areaList4).ToList<AreaInfo>(); ///升序
List<AreaInfo> listArea1 = Result.OrderBy(s => s.AreaCode).ToList<AreaInfo>();
//降序
List<AreaInfo> listArea2 = Result.OrderByDescending(s => s.AreaCode).ToList<AreaInfo>();
//Linq排序方式
List<AreaInfo> listArea3 = (from c in Result
orderby c.AreaCode ascending//ascending descending
select c).ToList<AreaInfo>();

  

csharp:正则表达式采集网页数据的更多相关文章

  1. API例子:用Python驱动Firefox采集网页数据

    1,引言 本文讲解怎样用Python驱动Firefox浏览器写一个简易的网页数据采集器.开源Python即时网络爬虫项目将与Scrapy(基于twisted的异步网络框架)集成,所以本例将使用Scra ...

  2. 用js采集网页数据并插入数据库最快的方法

    今天教大家一个快速采集网站数据的方法,因为太晚了,直接上例子,这里以采集易车网的产品数据为例. 思路:利用js获取网页数据并生成sql命令,执行sql命令把采集的数据插入数据库. 1.用谷歌浏览器或者 ...

  3. 采集网页数据---Using Java

    http://www.cnblogs.com/longwu/archive/2011/12/24/2300110.html 1).学习网页数据采集,首先必不可少的是学习java的正则表达式(Regex ...

  4. curl模拟自动登陆&采集网页数据

    <!DOCTYPE> <html> <head> <meta http-equiv="Content-Type" content=&quo ...

  5. 用groovy采集网页数据

    首先,用 http://groovyconsole.appspot.com/ 测试下面的代码,发现引用总是失败. 下载了GGTS: https://spring.io/tools/ggts 测试成功: ...

  6. 一个我经常用到的采集网页数据抓取网页获取数据的PHP函数类

    class get_c_str { var $str; var $start_str; var $end_str; var $start_pos; var $end_pos; var $c_str_l ...

  7. 初识python 之 爬虫:使用正则表达式爬取“糗事百科 - 文字版”网页数据

    初识python 之 爬虫:使用正则表达式爬取"古诗文"网页数据 的兄弟篇. 详细代码如下: #!/user/bin env python # author:Simple-Sir ...

  8. 初识python 之 爬虫:使用正则表达式爬取“古诗文”网页数据

    通过requests.re(正则表达式) 爬取"古诗文"网页数据. 详细代码如下: #!/user/bin env python # author:Simple-Sir # tim ...

  9. java网页数据抓取实例

    在很多行业中,要对行业数据进行分类汇总,及时分析行业数据,对于公司未来的发展,有很好的参照和横向对比.所以,在实际工作,我们可能要遇到数据采集这个概念,数据采集的最终目的就是要获得数据,提取有用的数据 ...

随机推荐

  1. Maven3路程(六)用Maven创建Spring3 MVC项目

    Maven3路程(六)用Maven创建Spring3 MVC项目 一.      环境 spring-framework-3.2.4.RELEASE jdk1.7.0_11 Maven3.0.5 ec ...

  2. [转]silverlight Datagrid 行上增加ToolTip

    有两种办法: 1. 直接在后台处理在数据绑定后 ,注册LoadingRow 事件this.DataGrid.LoadingRow += new EventHandler<DataGridRowE ...

  3. CSS基础(四):盒模型

    CSS盒模型 HTML文档中可以将每个元素都看作是长方形的盒子.而CSS盒模型规定了元素框处理元素内容content.内边距padding.边框border和外边距margin的方式.下图是W3C对于 ...

  4. Android下海康实时视频解码

    折腾了一个多月,终于调出来了.....首先吐槽一下海康SDK,同时也感谢之... 手头有个项目,需要实时抓取海康摄像头,我是在Android下实现的,海康官网上没有Android SDK,这里友情提醒 ...

  5. asp.net mvc 配合前端js的CMD模块化部署思想,小思路

    1)布局视图,用于协调普通视图和共享视图,实现js库自由选择引用 @{ //具体页面定义好需要引入的js文件,在这里通过共享视图渲染出来 var jsDefines = ViewBag.JsDefin ...

  6. square开源vim,tmux配置在linux上使用

    首先安装需要的软件 apt-get install vim ack-grep git tmux gnome-terminal ctags xclip silversearcher-ag 这里tmux需 ...

  7. FFmpeg编译出错_img_convert 找不到

    问题出现在下载的ffmpeg的版本不一样,在0.4.8以前的版本中还有img_convert这个函数,新版本中用sws_getContext和sws_scale代替了.简单说明如下: 新版本的ffmp ...

  8. 逆转序列的递归/尾递归(+destructuring assignment)实现(JavaScript + ES6)

    这里是用 JavaScript 做的逆转序列(数组/字符串)的递归/尾递归实现.另外还尝鲜用了一下 ES6 的destructuring assignment + spread operator 做了 ...

  9. 彻底理解JAVA动态代理

    代理设计模式 定义:为其他对象提供一种代理以控制对这个对象的访问. 代理模式的结构如下图所示. 动态代理使用 java动态代理机制以巧妙的方式实现了代理模式的设计理念. 代理模式示例代码 public ...

  10. Linux系统下安装rz/sz命令及使用说明

    对于经常使用Linux系统的人员来说,少不了将本地的文件上传到服务器或者从服务器上下载文件到本地,rz / sz命令很方便的帮我们实现了这个功能,但是很多Linux系统初始并没有这两个命令.今天,我们 ...