C#爬页面总结
1.2 fiddler工具
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Reflection;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace Business
{
/// <summary>
/// HttpCallHelper
/// </summary>
public class HttpCallHelper
{
/// <summary>
/// post
/// </summary>
/// <param name="param">param</param>
/// <returns>HttpResultInfo</returns>
public static HttpResultInfo Post(HttpRequestParams param)
{
HttpResultInfo res = null;
try
{
param.RequestEncoding = Encoding.Default;
byte[] bs = param.RequestEncoding.GetBytes(param.Data);
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(param.Url);
req.Method = "POST";
req.ContentType = "application/x-www-form-urlencoded";
req.ContentLength = bs.Length;
if (!string.IsNullOrEmpty(param.Cookie))
{
req.Headers[HttpRequestHeader.Cookie] = param.Cookie;
}
req.Referer = param.Cookie;
using (Stream reqStream = req.GetRequestStream())
{
reqStream.Write(bs, , bs.Length);
}
string strResponse = string.Empty;
HttpWebResponse httpResponse = (HttpWebResponse)req.GetResponse();
using (Stream responseStream = httpResponse.GetResponseStream())
{
Stream stream = responseStream;
StreamReader streamReader = new StreamReader(stream, param.ResponseEncoding);
strResponse = streamReader.ReadToEnd();
streamReader.Close();
}
string retcookie = req.GetResponse().Headers["Set-Cookie"];
res = new HttpResultInfo()
{
Cookie = retcookie,
StatusCode = httpResponse.StatusCode,
StatusDescription = httpResponse.StatusDescription,
Headers = httpResponse.Headers,
ErrorMsg = string.Empty,
Html = strResponse,
ResponseUrl = httpResponse.ResponseUri,
};
return res;
}
catch (Exception esx)
{
res = new HttpResultInfo()
{
ErrorMsg = esx.Message.ToString(),
};
Console.WriteLine(esx.Message.ToString());
}
return res;
}
/// <summary>
/// Get
/// </summary>
/// <param name="httpParam">httpParam</param>
/// <param name="param">param</param>
/// <returns>结果</returns>
public static HttpResultInfo Get(HttpRequestParams httpParam, Dictionary<string, string> param)
{
StringBuilder sb = new StringBuilder();
foreach (var item in param)
{
sb.AppendFormat("{0}={1}&", item.Key, item.Value);
}
httpParam.Data = sb.ToString();
return Get(httpParam);
}
/// <summary>
/// Get
/// </summary>
/// <param name="param">param</param>
/// <returns>结果</returns>
public static HttpResultInfo Get(HttpRequestParams param)
{
HttpResultInfo ret = null;
try
{
string strResult = string.Empty;
HttpWebRequest httpRequest;
HttpWebResponse httpResponse;
string urlStr = param.Url;
if (!string.IsNullOrEmpty(param.Data))
{
urlStr = string.Format("{0}{1}", param.Url + "?", param.Data);
}
httpRequest = (HttpWebRequest)WebRequest.Create(new Uri(urlStr));
httpRequest.Timeout = param.Timeout;
httpRequest.Method = "Get";
httpRequest.ContentType = param.ContentType;
if (!string.IsNullOrEmpty(param.Cookie))
{
httpRequest.Headers[HttpRequestHeader.Cookie] = param.Cookie;
}
//// 获取提交返回信息
httpResponse = (HttpWebResponse)httpRequest.GetResponse();
string returnStr = string.Empty;
using (Stream st = httpResponse.GetResponseStream())
{
returnStr = new StreamReader(st, param.ResponseEncoding).ReadToEnd();
}
string cookie1 = httpResponse.Headers["Set-Cookie"];
ret = new HttpResultInfo()
{
Cookie = cookie1,
StatusCode = httpResponse.StatusCode,
StatusDescription = httpResponse.StatusDescription,
Headers = httpResponse.Headers,
ErrorMsg = string.Empty,
Html = returnStr,
ResponseUrl = httpResponse.ResponseUri,
};
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString().ToString());
ret = new HttpResultInfo()
{
Html = string.Empty,
ErrorMsg = ex.Message.ToString(),
Cookie = string.Empty,
};
}
return ret;
}
/// <summary>
/// GetQueryString
/// </summary>
/// <param name="param">param</param>
/// <returns>结果</returns>
public static string GetQueryString(Dictionary<string, string> param)
{
StringBuilder sb = new StringBuilder();
foreach (var item in param)
{
sb.AppendFormat("{0}={1}&", item.Key, item.Value);
}
if (sb.Length > )
{
sb = sb.Remove(sb.Length - , );
}
return sb.ToString();
}
}
/// <summary>
/// 请求消息
/// </summary>
public class HttpRequestParams
{
/// <summary>
/// 请求编码
/// </summary>
private Encoding requestEncoding = Encoding.Default;
/// <summary>
/// 响应编码
/// </summary>
private Encoding responseEncoding = Encoding.Default;
/// <summary>
/// 请求超时时间(以毫秒为单位,默认180秒)
/// </summary>
private int timeout = ;
/// <summary>
/// 请求返回类型(默认text/html)
/// </summary>
private string contentType = "text/html";
/// <summary>
/// HttpRequestParams
/// </summary>
public HttpRequestParams()
{
}
/// <summary>
/// 请求地址
/// </summary>
public string Url
{
get;
set;
}
/// <summary>
/// 数据
/// </summary>
public string Data
{
get;
set;
}
/// <summary>
/// Cookie
/// </summary>
public string Cookie
{
get;
set;
} /// <summary>
/// ContentType
/// </summary>
public string ContentType
{
get { return this.contentType; }
set { this.contentType = value; }
}
/// <summary>
/// Referer
/// </summary>
public string Referer
{
get;
set;
}
/// <summary>
/// Timeout
/// </summary>
public int Timeout
{
get { return this.timeout; }
set { this.timeout = value; }
}
/// <summary>
/// RequestEncoding
/// </summary>
public Encoding RequestEncoding
{
get { return this.requestEncoding; }
set
{
if (value == null)
{
throw new Exception("请求编码格式不能设置为空!");
}
this.requestEncoding = value;
}
}
/// <summary>
/// 返回编码
/// </summary>
public Encoding ResponseEncoding
{
get { return this.responseEncoding; }
set
{
if (value == null)
{
throw new Exception("响应编码格式不能设置为空!");
}
this.responseEncoding = value;
}
}
}
/// <summary>
/// 返回消息
/// </summary>
public class HttpResultInfo
{
/// <summary>
/// Html
/// </summary>
public string Html
{
get;
set;
}
/// <summary>
/// Cookie
/// </summary>
public string Cookie
{
get;
set;
}
/// <summary>
/// IsSuccess
/// </summary>
public HttpStatusCode StatusCode
{
get;
set;
}
/// <summary>
/// ErrorMsg
/// </summary>
public string ErrorMsg
{
get;
set;
}
/// <summary>
/// 状态描述
/// </summary>
public string StatusDescription { get; set; }
/// <summary>
/// 响应头
/// </summary>
public WebHeaderCollection Headers { get; set; }
/// <summary>
/// 返回Uri
/// </summary>
public Uri ResponseUrl { get; set; }
}
}
C#爬页面总结的更多相关文章
- python爬取youtube视频 多线程 非中文自动翻译
声明:我写的所有文章都是发在博客园的,我看到其他复制粘贴过去的 连个出处也不写,直接打上自己的水印...真是没的说了. 前言:前段时间搞了一些爬视频的项目,代码都写好了,这里写文章那就在来重新分析一遍 ...
- 【nodejs 爬虫】使用 puppeteer 爬取链家房价信息
使用 puppeteer 爬取链家房价信息 目录 使用 puppeteer 爬取链家房价信息 页面结构 爬虫库 pupeteer 库 实现 打开待爬页面 遍历区级页面 方法一 方法二 遍历街道页面 遍 ...
- python 爬虫 汽车之家车辆参数反爬
水平有限,仅供参考. 如图所示,汽车之家的车辆详情里的数据做了反爬对策,数据被CSS伪类替换. 观察 Sources 发现数据就在当前页面. 发现若干条进行CSS替换的js 继续深入此JS 知道了数据 ...
- webmagic的设计机制及原理-如何开发一个Java爬虫
之前就有网友在博客里留言,觉得webmagic的实现比较有意思,想要借此研究一下爬虫.最近终于集中精力,花了三天时间,终于写完了这篇文章.之前垂直爬虫写了一年多,webmagic框架写了一个多月,这方 ...
- web magic 小结
缘起 写了多年的程序,鲜有产出物,于是最近打算做个不可说的东西来祭奠逝去的青春.数据,是一个程序的起点,我们没有数以亿计的用户,无法让活跃用户给我们产生数据,那就只能去别人的站点上借点数据了.这个功能 ...
- Python3学习笔记2:简易Web爬虫
开发环境 基础语法那章的内容我是在Docker容器中玩的,但是真正做项目的时候,没有IDE的强大辅助功能来协助的话是很累人的一件事.因此从本文中,我选择使用Jetbrain的Pycharm这个IDE来 ...
- selenium设置proxy、headers(phantomjs、Chrome、Firefox)
phantomjs 设置ip 方法1: service_args = [ '--proxy=%s' % ip_html, # 代理 IP:prot (eg:192.168.0.28:808) '--p ...
- Python——day11 函数(对象、名称空间、作用域、嵌套、闭包)
一.函数对象 函数名就是存放了函数的内存地址,存放了内存地址的变量都是对象,即 函数名 就是 函数对象 函数对象的应用 1. 可以直接被引用 fn = cp_fn 2 .可以当作函数参数传递 c ...
- day 9~11 函数
今日内容 '''函数四个组成部分函数名:保存的是函数的地址,是调用函数的依据函数体:就是执行特定功能的代码块函数返回值:代码块执行的结果反馈函数参数:完成功能需要的条件信息1.函数的概念2.函数的定 ...
随机推荐
- 更改ubuntu下mysql的密码
1.首先,进入环境中去,即 mysql -u root -p ,然后输入原始密码 2.此时会出现 mysql > 3.开始修改密码: mysql > use mysql ; ...
- JavaScript中判断为整数的多种方式
之前记录过JavaScript中判断为数字类型的多种方式,这篇看看如何判断为整数类型(Integer). JavaScript中不区分整数和浮点数,所有数字内部都采用64位浮点格式表示,和Java的d ...
- matlab里.*和*的区别
*:矩阵相乘 (cross) .*:矩阵你元素一对一相乘 (dot) 例子: >> a=[2 3];>> b=[4 5];>> a*b' ans = 23 > ...
- linux 删除已输入的命令行
ctrl + w —往前删除一个单词,光标放在最末尾ctrl + k —删除到末尾,光标放在最前面(可以使用ctrl+a) —————————————————————-华丽的分割线ctl + u 删除 ...
- hdu1042 N!
/* N! Time Limit: 10000/5000 MS (Java/Others) Memory Limit: 262144/262144 K (Java/Others) Total Subm ...
- JAVA基础之对象的初始化
本文主要记录JAVA中对象的初始化过程,包括实例变量的初始化和类变量的初始化以及 final 关键字对初始化的影响.另外,还讨论了由于继承原因,探讨了引用变量的编译时类型和运行时类型 一,实例变量的初 ...
- HDU 4902 Nice boat --线段树(区间更新)
题意:给一个数字序列,第一类操作是将[l,r]内的数全赋为x ,第二类操作是将[l,r]中大于x的数赋为该数与x的gcd,若干操作后输出整个序列. 解法: 本题线段树要维护的最重要的东西就是一个区间内 ...
- POJ 2407 Relatives 【欧拉函数】
裸欧拉函数. #include<stdio.h> #include<string.h> ; int p[N],pr[N],cnt; void init(){ ;i<N;i ...
- JSP中文乱码问题《转》
之前总是碰到JSP页面乱码的问题,每次都是现在网上搜,然后胡乱改,改完也不明白原因. 这次正好作下总结,中文乱码就是因为编码不符,可能出现乱码有四个地方: 1 JSP编码乱码 2 HTML编码乱码 3 ...
- Java虚拟机详解04----GC算法和种类【重要】
[声明] 欢迎转载,但请保留文章原始出处→_→ 生命壹号:http://www.cnblogs.com/smyhvae/ 文章来源:http://www.cnblogs.com/smyhvae/p/4 ...