抓取小视频的url地址,然后将地址信息拷贝到迅雷里批量下载就ok了

主程序 代码

            //yazhouqingseAV 35
//zhifusiwaAV 29
//zipaishipin 30
//oumeiqingseAV 28
//katongdongman 31
//tongxingAV 32
//sanjidianying 33
//fengkuangqunjiao 34 var client = new WinHttpHelper();
var type = "fengkuangqunjiao";
var classid = ; for (int i = ; i > -; i++)
{
Console.WriteLine(i);
var index = "_" + i;
if (i == )
index = ""; string pageUrl = "http://www.lang34.com/se/" + type + "/index" + index + ".html"; var trs = RegexHelper.GetMathList(client.GET(pageUrl, Encoding.UTF8), "" + type + "/(.*?).html");
foreach (var item in trs)
{
string temp = "";
if (RegexHelper.GetMatchStr(item.ToString(), "" + type + "/(.*?).html", true, out temp))
{
string url = "http://www.lang34.com/e/DownSys/play/?classid=" + classid + "&id=" + temp + "&pathid=0";
string htmltext = client.GET(url, Encoding.UTF8); string mp4 = "";
if (RegexHelper.GetMatchStr(htmltext, "f:'(.*?)',", true, out mp4))
{
string titile = "";
RegexHelper.GetMatchStr(htmltext, " <title>(.*?)</title>", true, out titile); string output = mp4 + "?title" + titile + "\r\n";
Console.WriteLine(output);
File.AppendAllText("D://" + type + ".txt", output);
}
} }
}

网络请求类

using System;
using System.Collections.Generic;
using System.Text; namespace MyHelper4Web
{
public class WinHttpHelper
{
WinHttp.WinHttpRequest request; public string Accept = "*/*";
public string UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; InfoPath.2; .NET4.0E)";
public string ContentType = "application/json";// "application/x-www-form-urlencoded";
public int SetTimeOut = ;//请求超时时间秒
public bool AllowAutoRedirect = true;//是否允许自动跳转
public bool AllowHttpstoHttp = false;//是否允许http与https转换 public WinHttpHelper()
{
request = new WinHttp.WinHttpRequest();
} /// <summary>
/// 传入请求头的HttpHelper构造函数
/// </summary>
/// <param name="Accept">Accept</param>
/// <param name="UserAgent">UserAgent</param>
/// <param name="ContentType">ContentType</param>
public WinHttpHelper(string Accept, string UserAgent, string ContentType)
{
this.Accept = Accept;
this.UserAgent = UserAgent;
this.ContentType = ContentType;
} /// <summary>
/// 传入请求头的HttpHelper构造函数
/// </summary>
/// <param name="Accept">Accept</param>
/// <param name="UserAgent">UserAgent</param>
/// <param name="ContentType">ContentType</param>
/// <param name="SetTimeOut">SetTimeOut</param>
public WinHttpHelper(string Accept, string UserAgent, string ContentType, int SetTimeOut)
{
this.Accept = Accept;
this.UserAgent = UserAgent;
this.ContentType = ContentType;
this.SetTimeOut = SetTimeOut;
} /// <summary>
/// GET方式请求网页
/// </summary>
/// <param name="Url">请求的url</param>
/// <returns>以字节数组形式返回响应内容</returns>
public byte[] GET(string Url,string refer)
{
byte[] responsebody;
try
{
//不允许自动跳转
if (AllowAutoRedirect == false)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableRedirects, false);
}
//允许https与http转换
if (AllowHttpstoHttp == true)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableHttpsToHttpRedirects, true);
}
request.Open("GET", Url, true);
request.SetRequestHeader("Accept", Accept);
request.SetRequestHeader("User-Agent", UserAgent);
if (!string.IsNullOrEmpty(refer))
{
request.SetRequestHeader("Referer", refer);
}
request.Send("");
request.WaitForResponse(SetTimeOut);
responsebody = (byte[])request.ResponseBody;
}
catch (Exception ex)
{
responsebody = Encoding.Default.GetBytes(ex.Message + ex.Source);
////LogHelper.Log.Error("GET方式请求网页异常", ex);
}
return responsebody;
} /// <summary>
/// GET方式请求网页
/// </summary>
/// <param name="Url">请求的url</param>
/// <param name="Encode">转换字符串用的编码</param>
/// <returns>以字符串形式返回响应内容</returns>
public string GET(string Url, Encoding Encode)
{
string htmltext = "";
try
{
byte[] htmlbyte = GET(Url,"");
htmltext = Encode.GetString(htmlbyte);
}
catch (Exception ex)
{
htmltext = ex.Message + ex.Source;
////LogHelper.Log.Error("GET方式请求网页异常", ex);
}
return htmltext;
} public string GET(string Url,string refer , Encoding Encode)
{
byte[] htmlbyte = GET(Url, refer); return Encode.GetString(htmlbyte);
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <param name="Refer">Refer</param>
/// <returns>以字节数组形式返回响应内容</returns>
public byte[] POST(string Url, string PostData, string Refer)
{
byte[] responsebody;
try
{
//不允许自动跳转
if (AllowAutoRedirect == false)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableRedirects, false);
}
//允许https与http转换
if (AllowHttpstoHttp == true)
{
request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableHttpsToHttpRedirects, true);
}
request.Open("POST", Url, true);
request.SetRequestHeader("Accept", Accept);
request.SetRequestHeader("User-Agent", UserAgent);
request.SetRequestHeader("Content-Type", ContentType);
if (!string.IsNullOrEmpty(Refer))
{
request.SetRequestHeader("Referer", Refer);
}
request.Send(PostData);
request.WaitForResponse(SetTimeOut);
responsebody = (byte[])request.ResponseBody;
}
catch (Exception ex)
{
responsebody = Encoding.Default.GetBytes(ex.Message + ex.Source);
////LogHelper.Log.Error("POST方式请求网页异常", ex);
}
return responsebody;
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <returns>以字节数组形式返回响应内容</returns>
public byte[] POST(string Url, string PostData)
{
byte[] responsebody;
responsebody = POST(Url, PostData, "");
return responsebody;
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <param name="Refer">Refer</param>
/// <param name="Encode">转换字符串用的编码</param>
/// <returns>以字符串形式返回响应内容</returns>
public string POST(string Url, string PostData, string Refer, Encoding Encode)
{
string htmltext = string.Empty;
try
{
byte[] responsebody = POST(Url, PostData, Refer);
htmltext = Encode.GetString(responsebody);
}
catch (Exception ex)
{
htmltext = ex.Message + ex.Source;
////LogHelper.Log.Error("POST方式请求网页异常", ex);
}
return htmltext;
} /// <summary>
/// POST方式请求网页
/// </summary>
/// <param name="Url">请求的Url</param>
/// <param name="PostData">请求传的值</param>
/// <param name="Encode">转换字符串用的编码</param>
/// <returns>以字符串形式返回响应内容</returns>
public string POST(string Url, string PostData, Encoding Encode)
{
string htmltext = string.Empty;
try
{
byte[] responsebody = POST(Url, PostData, "");
htmltext = Encode.GetString(responsebody);
}
catch (Exception ex)
{
htmltext = ex.Message + ex.Source;
////LogHelper.Log.Error("POST方式请求网页异常", ex);
}
return htmltext;
} public string GetAllCookis()
{
string cookis = "";
try
{
cookis = request.GetAllResponseHeaders();
}
catch (Exception)
{
return "";
}
return cookis;
}
}
}

正则表达式类

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections; namespace MyHelper4Web
{
public class RegexHelper
{
/// <summary>
///
/// </summary>
/// <param name="htmltext"></param>
/// <param name="pattern"></param>
/// <param name="isCut"></param>
/// <param name="result"></param>
/// <returns></returns>
public static bool GetMatchStr(string htmltext, string pattern, bool isCut, out string result)
{
bool IsGetSuccess = false;
result = "";
try
{
IsGetSuccess = GetMatchStr(htmltext, pattern, out result);
if (!isCut)
{
string[] replaceStrs = new string[];
if (pattern.Contains("(.*?)"))
{
string splitStr = pattern.Replace("(.*?)", "|");
replaceStrs = splitStr.Split('|');
}
result = replaceStrs[] + result + replaceStrs[];
}
}
catch (Exception ex)
{
IsGetSuccess = false;
} return IsGetSuccess;
} public static string GetMatchString(string htmltext, string pattern, bool isCut)
{
string result = "";
try
{
GetMatchStr(htmltext, pattern, out result);
if (isCut)
{
string[] replaceStrs = new string[];
if (pattern.Contains("(.*?)"))
{
string splitStr = pattern.Replace("(.*?)", "|");
replaceStrs = splitStr.Split('|');
}
result = result.Replace(replaceStrs[], "").Replace(replaceStrs[], "");
}
return result;
}
catch (Exception ex)
{
return "";
} } /// <summary>
/// 正则表达式dan匹配方法
/// </summary>
/// <param name="htmltext">网页内容</param>
/// <param name="pattern">模式字符串</param>
/// <param name="result">返回匹配成功的字符串</param>
/// <returns>匹配是否成功</returns>
public static bool GetMatchStr(string htmltext, string pattern, out string result)
{
bool IsGetSuccess = false;
result = "";
try
{
string[] replaceStrs=new string[];
if (pattern.Contains("(.*?)"))
{
string splitStr = pattern.Replace("(.*?)", "^");
replaceStrs = splitStr.Split('^');
}
Regex regex = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
Match match = regex.Match(htmltext);
if (match.Success)
{
result = match.ToString();
result = result.Replace(replaceStrs[], "").Replace(replaceStrs[], "");
}
else
{
IsGetSuccess = false;
}
}
catch (Exception ex)
{
IsGetSuccess = false;
}
finally
{
if (!string.IsNullOrEmpty(result))
{
IsGetSuccess = true;
}
else
{
IsGetSuccess = false;
}
}
return IsGetSuccess;
} /// <summary>
/// 正则多匹配,返回匹配ArrayList数组
/// </summary>
/// <param name="htmltext">网页内容</param>
/// <param name="pattern">模式字符串</param>
/// <returns></returns>
public static ArrayList GetMathList(string htmltext, string pattern)
{
ArrayList list = new ArrayList();
try
{
MatchCollection mc;
//定义一个Regex对象实例
Regex regex = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
//或者多行匹配模式RegexOptions.Multiline
mc = regex.Matches(htmltext);
//在输入字符串中找到所有匹配
for (int i = ; i < mc.Count; i++)
{
//匹配一条信息就处理
string groupcode = mc[i].Value.ToString();
//处理函数
list.Add(groupcode);
}
}
catch (Exception)
{
return null;
}
return list;
} ///// <summary>
///// 正则表达式duo匹配方法
///// </summary>
///// <param name="htmltext">网页内容</param>
///// <param name="patterns">模式字符串数组</param>
///// <param name="result">返回匹配成功的字符串</param>
///// <returns>匹配是否成功</returns>
//public static bool GetMathStr(string htmltext, string[] patterns, out string result)
//{
// bool IsGetSuccess = false;
// result = "";
// try
// {
// string temp = htmltext;
// for (int i = 0; i < patterns.Length; i++)
// {
// Regex regex = new Regex(patterns[i], RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Match match = regex.Match(temp);
// if (match.Success)
// {
// temp = match.ToString();
// if (i == patterns.Length - 1)
// {
// result = temp;
// }
// }
// else
// {
// break;
// }
// }
// }
// catch (Exception ex)
// {
// IsGetSuccess = false;
// }
// finally
// {
// if (!string.IsNullOrEmpty(result))
// {
// IsGetSuccess = true;
// }
// else
// {
// IsGetSuccess = false;
// }
// }
// return IsGetSuccess;
//}
}
}

【C#爬虫】抓取XX网站mp4资源地址的更多相关文章

  1. python爬虫 抓取一个网站的所有网址链接

    sklearn实战-乳腺癌细胞数据挖掘 https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campai ...

  2. python爬虫--爬取某网站电影下载地址

    前言:因为自己还是python世界的一名小学生,还有很多路要走,所以本文以目的为向导,达到目的即可,对于那些我自己都没弄懂的原理,不做去做过多解释,以免误人子弟,大家可以网上搜索. 友情提示:本代码用 ...

  3. 一个简单的scrapy爬虫抓取豆瓣刘亦菲的图片地址

    一.第一步是创建一个scrapy项目 sh-3.2# scrapy startproject liuyifeiImage sh-3.2# chmod -R 777 liuyifeiImage/ 二.分 ...

  4. 爬虫抓取页面数据原理(php爬虫框架有很多 )

    爬虫抓取页面数据原理(php爬虫框架有很多 ) 一.总结 1.php爬虫框架有很多,包括很多傻瓜式的软件 2.照以前写过java爬虫的例子来看,真的非常简单,就是一个获取网页数据的类或者方法(这里的话 ...

  5. python 爬虫抓取心得

    quanwei9958 转自 python 爬虫抓取心得分享 urllib.quote('要编码的字符串') 如果你要在url请求里面放入中文,对相应的中文进行编码的话,可以用: urllib.quo ...

  6. C# 爬虫 抓取小说

    心血来潮,想研究下爬虫,爬点小说. 通过百度选择了个小说网站,随便找了一本小书http://www.23us.so/files/article/html/13/13655/index.html. 1. ...

  7. Java 实现 HttpClients+jsoup,Jsoup,htmlunit,Headless Chrome 爬虫抓取数据

    最近整理一下手头上搞过的一些爬虫,有HttpClients+jsoup,Jsoup,htmlunit,HeadlessChrome 一,HttpClients+jsoup,这是第一代比较low,很快就 ...

  8. PID控制器的应用:控制网络爬虫抓取速度

    一.初识PID控制器 冬天乡下人喜欢烤火取暖,常见的情形就是四人围着麻将桌,桌底放一盆碳火.有人觉得火不够大,那加点木炭吧,还不够,再加点.片刻之后,又觉得火太大,脚都快被烤熟了,那就取出一些木碳…… ...

  9. Python爬虫抓取东方财富网股票数据并实现MySQL数据库存储

    Python爬虫可以说是好玩又好用了.现想利用Python爬取网页股票数据保存到本地csv数据文件中,同时想把股票数据保存到MySQL数据库中.需求有了,剩下的就是实现了. 在开始之前,保证已经安装好 ...

随机推荐

  1. mwc config.h 中文注释

    #ifndef CONFIG_H_ #define CONFIG_H_ /*************************************************************** ...

  2. SQL读取系统时间的语法(转)

    --获取当前日期(如:yyyymmdd) select CONVERT (nvarchar(12),GETDATE(),112) --获取当前日期(如:yyyymmdd hh:MM:ss)select ...

  3. 图解JavaScript知识点

  4. jQuery Easy UI (适应屏幕分辨率大小)布局(Layout)

    一.jQuery Easy UI (适应屏幕分辨率大小)布局(Layout) 1.首先应用的是jquery-easyui-1.4 版本(版本不同,兼容性不同) 2.实现整个页面的布局( layout: ...

  5. jquery mini ui 学习

    1.mini.parse(); 将html标签解析为miniui控件.解析后,才能使用mini.get获取到控件对象. 2.mini.get(id);根据id获取控件对象. 3.grid.load() ...

  6. MYSQL注释

    MYSQL扩展了SQL的注释/**/, /*! (语句)#加感叹号,内部语句会被执行 */ /*!50001 select * from test #表示数据库为5.00.01版本,内部语句会被执行 ...

  7. 常用hash函数

    常用的哈希函数   通用的哈希函数库有下面这些混合了加法和一位操作的字符串哈希算法.下面的这些算法在用法和功能方面各有不同,但是都可以作为学习哈希算法的实现的例子.   1.RS  从Robert S ...

  8. PHP XML DOM

    PHP XML DOM 内建的 DOM 解析器使在 PHP 中处理 XML 文档成为可能. DOM 是什么? W3C DOM 提供了针对 HTML 和 XML 文档的标准对象集,以及用于访问和操作这些 ...

  9. css布局之三栏布局

    <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <meta name ...

  10. php 数组操作类(整合 给意见)

    数组操作函数整理: /* 将一个二维数组按照指定字段的值分组 * * @param array $arr * @param string $keyField * * @return array */ ...