using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks; namespace AnfleCrawler.DataAnalyzer
{
internal class Mytophome : AnalyzerBase
{
protected override void AnalyzeInternal(PageLandEntity current)
{
var lander = Crawler.Lander;
var pHandler = CreateContentHandler(current);
switch (current.Depth)
{
case :
{
var dom = lander.GetDocument(pHandler);
var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
nextNode.SetAttributeValue("id", PagingHack);
DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack)); foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
{
var Nset = QueryNodes(node, "span").ToArray();
var hUrl = GetHref(QueryNode(Nset[], "a"), current.Url);
var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
string shid = query["estateId"];
hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
Guid housesID;
try
{
CheckHouses(hUrl, out housesID);
}
catch (HtmlNodeMissingException ex)
{
App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
continue;
} var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
DateTime? transactionDate = null;
DateTime dump;
if (DateTime.TryParse(vals.Last(), out dump))
{
transactionDate = dump;
}
if (vals.Length == )
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
BuildingName = vals[],
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
else
{
Repository.SaveHouselisting(new HouselistingEntity()
{
HousesID = housesID,
TransactionDate = transactionDate,
Area = string.Format("{0}平方", vals[]),
SoldPriceOrRent = string.Format("{0}万", vals[]),
UnitPriceOrLease = string.Format("{0}元/平方", vals[]),
});
}
Crawler.OutWrite("保存小区出售记录 {0}", housesID);
}
}
break;
}
} private void CheckHouses(Uri housesUrl, out Guid housesID)
{
var pHandler = CreateContentHandler(new PageLandEntity()
{
Url = housesUrl,
Depth = DataDepth.Houses
});
pHandler.AjaxBlocks.Add(HACK);
var dom = Crawler.Lander.GetDocument(pHandler);
var attrs = new AttributeFiller(); attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li")); housesID = GenHashKey(housesUrl.OriginalString);
var bo = Crawler.Repository.LoadHouses(housesID);
if (!string.IsNullOrEmpty(bo.SiteID))
{
return;
}
bo.SiteID = "Mytophome.com";
bo.PageUrl = housesUrl.OriginalString;
bo.CityName = Crawler.Config.CityName;
attrs.FillEntity(bo, new Dictionary<string, string>()
{
{"楼盘名称", "小区名称"},
{"楼盘地址", "小区地址"},
{"发展商", "开发商"},
{"物管公司", "物业公司"},
{"物管电话", "物业办公电话"},
});
MapMark(bo);
Crawler.Repository.Save(bo);
Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
}
}
}

Mytophome Deal的更多相关文章

  1. zlhome.com Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  2. Dooioo Deal

    using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...

  3. XML节点名称中有小数点处理(deal with dot)导致使用xpath时报错解决方法

    <?xml version="1.0"?> <ModifyFiles> <_Layout.cshtml>123456</_Layout.c ...

  4. whu 1464 deal with numbers

    WHU 1464  deal with numbers 题意: 给你一串数字,对着串数字有三项操作: Minus a,b,c:对区间[a,b]总的每个数都减c. Division a,b,c:对区间[ ...

  5. OK335xS canutils deal with compile error

    /************************************************************************************** * OK335xS ca ...

  6. 能让你聪明的工作DEAL四法则,来自《每周工作四小时》书籍

    来自书籍<每周工作四小时>,作者蒂莫西·费里斯(Tim Ferriss,昵称:蒂姆)   能让你聪明的工作DEAL四法则: 第一步:D——定位(Definition) 第二步:E——精简( ...

  7. how to deal with EINTR fault

    [how to deal with EINTR fault] EINTR:interupted error.是指一个调用被信号给中断,对于同步的耗时调用来说,这个操作常见,譬如select.read. ...

  8. Spoken English Practice( Believe it or not, I don't need to make believe its a big deal. (believe,deal, You don't say))

    音标复习                                                绿色:连读:红色:略读:蓝色:浊化:橙色:弱读 口语蜕变(2017/6/25) Sorry, t ...

  9. If you want the rainbow, you have to deal with the rain.

    If you want the rainbow, you have to deal with the rain.想要彩虹,就先忍受雨水.

随机推荐

  1. Why does pthread_cond_signal not work?【转】

    转自:http://stackoverflow.com/questions/16819169/why-does-pthread-cond-signal-not-work# 0 down vote fa ...

  2. 微信JS SDK PHP Demo

    一.JSSDK类定义 <?php class JSSDK { private $appId; private $appSecret; public function __construct($a ...

  3. laravel框架总结(三) -- 路径分析

    1.直接写绝对路径,这样会用在/goods/show前面加上域名 <a href="/goods/show?id=<?php echo $item['id']; ?>&qu ...

  4. 【Spring】对象后期处理,BeanPostProcessor

    当我们使用Spring容器管理对象时,需要对对象进行一些后期处理时,比如数据处理.数据预加载,可以使用BeanPostProcessor接口. 简单演示它的用法. 定义扫描包,显示定义BeanPost ...

  5. [问题2014S03] 复旦高等代数II(13级)每周一题(第三教学周)

    [问题2014S03]  设 \(A\in M_n(\mathbb R)\) 是非异阵并且 \(A\) 的 \(n\) 个特征值都是实数. 若 \(A\) 的所有 \(n-1\) 阶主子式之和等于零, ...

  6. Python3基础 reverse 将列表倒序排列

    镇场诗:---大梦谁觉,水月中建博客.百千磨难,才知世事无常.---今持佛语,技术无量愿学.愿尽所学,铸一良心博客.------------------------------------------ ...

  7. linux passwd文件解析

    #cat/etc/passwd root:x:::Superuser:/: daemon:x:::Systemdaemons:/etc: bin:x:::Ownerofsystemcommands:/ ...

  8. Java的final关键字

    使用final关键字做标识有“最终的”含义 final可以修饰类.方法.属性和变量: 修饰类,则该类不允许被继承(即不能有子类) 修饰方法,则该方法不允许被覆盖(重写) 修饰属性,则该属性不会进行隐形 ...

  9. centos配置163源

    1.参考Centos镜像帮助 (1.1)备份原始repo shell> sudo mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/Ce ...

  10. hdu4352 XHXJ's LIS

    链接 这个题最不好想到的是状态的保存,也没有几亿的数组让你开,怎么保存前面出现了哪些数字. 题意让你求最长上升子序列的长度为k的数字的数目,可以是不连续的,可以保留一个状态栈,栈顶部依次更新,再保证长 ...