Solr5 DataImport 处理1对多关系
“问题” 和“派发处理” 是2个对象。 每个问题可以被派发N次, 最后结束问题。
实际查询中数据量可能很大,同时即可以从问题查派发部门, 也可以从派发情况查询问题(如本部门处理的问题)
使用的技巧就是:增加doc_id, doc_type字段。 实现多Entity查询。
schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="uum" version="1.2">
<types>
<fieldType name="boolean" class="solr.BoolField"/>
<fieldType name="date" class="solr.TrieDateField"/>
<fieldType name="float" class="solr.TrieFloatField"/>
<fieldType name="int" class="solr.TrieIntField"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="url" class="solr.StrField" indexed="false" stored="true" /> <fieldType name="simpletext"
class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType> <fieldType name="ignored" class="solr.StrField"
indexed="false" stored="false" /> <fieldType name="date_l" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> </types> <fields> <!--
FIELDS THAT ARE IN DOCS OF MULTIPLE TYPES
--> <!-- this will be our uniqueKey, so it has to be distinct across
all types of documents
-->
<field name="doc_id" type="string" /> <!-- the type (or domain) of our document -->
<field name="doc_type" type="string" /> <!-- external URLs -->
<dynamicField name="*_url" type="url" multiValued="false" />
<dynamicField name="*_urls" type="url" multiValued="true"/> <!-- dates -->
<dynamicField name="*_dt" type="date" /> <!-- numeric values that might come in hand for relevancy biasing
(they all relate to popularity)
-->
<dynamicField name="*_count" type="int" multiValued="false"/> <field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="false"/>
<!-- Field used by Suggester for autocompletion -->
<field name="autocomplete"
type="simpletext"
stored="false"
multiValued="true" /> <!-- quick search field -->
<field name="catchall"
type="simpletext"
stored="false"
omitNorms="true"
multiValued="true" />
<field name="ID" type="string" multiValued="false"/> <!--
PETITION
-->
<field name="TenantId" type="string" multiValued="false"/>
<field name="PetitionId" type="string" multiValued="false"/>
<field name="PetitionNumber" type="string" multiValued="false"/>
<field name="Title" type="simpletext" multiValued="false"/>
<field name="Content" type="simpletext" multiValued="false"/>
<field name="Tel" type="string" multiValued="false"/>
<field name="EventAddress" type="simpletext" multiValued="false"/>
<field name="DutyGridName" type="string" multiValued="false"/>
<field name="ComplaintType" type="string" multiValued="false"/>
<field name="IsVoid" type="boolean" multiValued="false"/>
<field name="IsEnd" type="boolean" multiValued="false"/>
<field name="GridAddress" type="simpletext" multiValued="false"/>
<field name="CategoryName" type="string" multiValued="false"/>
<field name="Category" type="string" multiValued="false"/> <field name="Status" type="string" multiValued="false"/>
<field name="RegisterOn" type="date" multiValued="false"/>
<field name="DeadLine" type="date" multiValued="false"/>
<field name="ReportOn" type="date" multiValued="false"/>
<field name="EndCaseOn" type="date" multiValued="false"/>
<field name="CreatedBy" type="string" multiValued="false"/>
<field name="SourceWay" type="string" multiValued="false"/>
<field name="ISWGXTSB" type="string" multiValued="false"/>
<field name="RegisterOffice" type="string" multiValued="false"/>
<field name="EventLevel" type="string" multiValued="false"/>
<field name="ImportantLevel" type="string" multiValued="false"/> <!--
PETITION/DISPATCH
-->
<field name="DispatchOffices" type="string" multiValued="true"/>
<!--<field name="DispatchOfficeNames" />-->
<field name="ReceiveOffices" type="string" multiValued="true"/>
<field name="ReceiveOfficeNames" type="string" multiValued="true"/> <!--
PETITION/PARTICIPANT
-->
<field name="OrgUnits" type="string" multiValued="true"/>
<field name="Participants" type="string" multiValued="true"/> <!--
DISPATCH
--> <field name="Dispatcher" type="string" multiValued="false"/>
<field name="DispatchOn" type="date" multiValued="false"/>
<field name="DispatchOffice" type="string" multiValued="false"/>
<field name="DispatchOfficeName" type="string" multiValued="false"/>
<field name="ReceiveOffice" type="string" multiValued="false"/>
<field name="ReceiveOfficeName" type="string" multiValued="false"/>
<field name="StartOn" type="date" multiValued="false"/>
<field name="DealWay" type="string" multiValued="false"/>
<field name="FeedBackType" type="string" multiValued="false"/>
<field name="FeedBackPeople" type="string" multiValued="false"/>
<field name="FeedBackOn" type="date" multiValued="false"/>
<field name="FeedBackMsg" type="simpletext" multiValued="false"/>
<field name="NoPublicOpinion" type="simpletext" multiValued="false"/>
<field name="IsPublic" type="boolean" multiValued="false"/>
<field name="IsAlreadyReply" type="boolean" multiValued="false"/>
<field name="IsAlreadyContact" type="boolean" multiValued="false"/> </fields> <!-- copy author names and title titles to a field to autocomplete
<copyField source="canonical_name" dest="autocomplete"/>
<copyField source="title" dest="autocomplete"/> --> <!-- copy everything into one big field for easy searching -->
<copyField source="PetitionNumber" dest="catchall"/>
<copyField source="Title" dest="catchall"/>
<copyField source="Content" dest="catchall"/>
<copyField source="Tel" dest="catchall"/>
<copyField source="EventAddress" dest="catchall"/> <!-- A unique Key field isn't neccessary, but it's the only way Solr -->
<!-- can automaticly replace docs when they change -->
<!-- DataImportHandler is also very unhappy if you don't have one -->
<uniqueKey>doc_id</uniqueKey> <!-- It's a *very* good idea to have a default search field -->
<defaultSearchField>catchall</defaultSearchField> </schema>
schema.xml
db-data-config.xml
<dataConfig>
<dataSource type="JdbcDataSource"
driver="oracle.jdbc.driver.OracleDriver"
url="jdbc:oracle:thin:@192.168.0.0:1521:test"
user="user"
password="pwd"
/>
<document>
<entity name="petition"
pk="ID"
transformer="TemplateTransformer"
query="select * from VW_HIS_EventInfo "
deltaImportQuery="select t.* from VW_HIS_EventInfo t where t.id='${dataimporter.delta.ID}'"
deltaQuery="select t.* from VW_HIS_EventInfo t where
RegistorOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or ReportOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') ">
>
<field column="doc_id" template="PE_${petition.ID}" />
<field column="doc_type" template="PE" /> <!--<field column="LATLON" name="LatLon_p"/>-->
<field column="TENANTID" name="TenantId" />
<field column="ID" name="PetitionId" />
<field column="PETITIONNUMBER" name="PetitionNumber" />
<field column="TITLE" name="Title" />
<field column="CONTENT" name="Content" />
<field column="TEL" name="Tel" />
<field column="EVENTADDRESS" name="EventAddress" />
<!--<field column="AREANAME" name="AreaName" />-->
<field column="DUTYGRIDNAME" name="DutyGridName" />
<field column="GRIDADDRESS" name="GridAddress" />
<field column="COMPLAINTQUALITYNAME" name="ComplaintType" />
<field column="ISVOID" name="IsVoid" />
<field column="ISEND" name="IsEnd" />
<field column="CATEGORYNAME" name="CategoryName" />
<field column="CATEGORYCODE" name="Category" />
<field column="STATUS" name="Status" />
<field column="REGISTORON" name="RegisterOn" />
<field column="DEADLINE" name="DeadLine" />
<field column="CREATEDBY" name="CreatedBy" />
<field column="REPORTON" name="ReportOn" />
<field column="SOURCEWAY" name="SourceWay" />
<field column="ISWGXTSB" name="ISWGXTSB" />
<field column="REGISTOROFFICE" name="RegisterOffice" />
<!--<field column="TOOFFICENAME" name="ToOfficeName" />-->
<field column="EVENTLEVEL" name="EventLevel" />
<field column="IMPORTANTLEVEL" name="ImportantLevel" />
<field column="ENDCASEON" name="EndCaseOn" />
<!--<field column="ENDOPINION" name="EndOpinion" />--> <entity name="petition_dispatch"
pk="ID"
query="select * from VW_HIS_DispatchInfo where petitionid='${petition.ID}'"
deltaQuery="select ID from VW_HIS_DispatchInfo where
DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petition_dispatch.PETITIONID}'">
<!--<field column="DISPATCHER" name="Dispatchers" />--> <field column="DISPATCHOFFICE" name="DispatchOffices" />
<!--<field column="DISPATCHOFFICENAME" name="DispatchOfficeNames" />-->
<field column="RECEIVEOFFICE" name="ReceiveOffices" />
<field column="RECEIVEOFFICENAME" name="ReceiveOfficeNames" />
</entity> <entity name="petiton_participant"
pk="PARTICIPANT"
query="select distinct ORGUNIT,PARTICIPANT from vw_his_participant where petitionid='${petition.ID}'"
deltaQuery="select PARTICIPANT from vw_his_participant where HandleOn > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') "
parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petiton_participant.PETITIONID}'">
<!--<field column="HANDLEON" name="HandleOns"/>-->
<field column="ORGUNIT" name="OrgUnits"/>
<!--<field column="PARTICIPANTNAME" name="ParticipantNames"/>-->
<field column="PARTICIPANT" name="Participants"/>
<!--<field column="PARTICIPANTTYPE" name="ParticipantTypes"/>-->
</entity> </entity> <entity name="dispatch"
pk="ID"
transformer="TemplateTransformer"
query="select * from VW_HIS_DispatchInfo"
deltaImportQuery="select t.* from VW_HIS_DispatchInfo t where t.id='${dataimporter.delta.ID}'"
deltaQuery="select ID from VW_HIS_DispatchInfo where DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"> <field column="doc_id" template="DIS_${dispatch.ID}" />
<field column="doc_type" template="DIS" /> <field column="TENANTID" name="TenantId" />
<field column="PETITIONID" name="PetitionId" />
<field column="DISPATCHER" name="Dispatcher" />
<field column="DISPATCHON" name="DispatchOn" />
<field column="DISPATCHOFFICE" name="DispatchOffice" />
<field column="DISPATCHOFFICENAME" name="DispatchOfficeName" />
<field column="RECEIVEOFFICE" name="ReceiveOffice" />
<field column="RECEIVEOFFICENAME" name="ReceiveOfficeName" />
<field column="STARTON" name="StartOn" />
<field column="DEADLINE" name="DeadLine" />
<field column="DEALWAY" name="DealWay" />
<field column="STATUS" name="Status" />
<field column="FEEDBACKTYPE" name="FeedBackType" />
<field column="FEEDBACKPEOPLE" name="FeedBackPeople" />
<field column="FEEDBACKON" name="FeedBackOn" />
<field column="FEEDBACKMSG" name="FeedBackMsg" />
<field column="NOPUBLICOPINION" name="NoPublicOpinion" />
<field column="ISPUBLIC" name="IsPublic" />
<field column="ISALREADYREPLY" name="IsAlreadyReply" />
<field column="ISALREADYCONCAT" name="IsAlreadyContact" /> <entity name="dispatch_petition"
pk="ID"
query="select * from VW_HIS_EventInfo where id='${dispatch.PETITIONID}'"
deltaQuery="select ID from VW_HIS_EventInfo where endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
parentDeltaQuery="select ID from VW_HIS_DispatchInfo where PETITIONID='${dispatch_petition.ID}'">
<field column="PETITIONNUMBER" name="PetitionNumber" />
<field column="TITLE" name="Title" />
<field column="CONTENT" name="Content" />
<field column="TEL" name="Tel" />
<field column="EVENTADDRESS" name="EventAddress" />
<!--<field column="AREANAME" name="AreaName" />-->
<field column="DUTYGRIDNAME" name="DutyGridName" />
<field column="GRIDADDRESS" name="GridAddress" />
<field column="COMPLAINTQUALITYNAME" name="ComplaintType" />
<field column="CATEGORYNAME" name="CategoryName" />
<field column="CATEGORYCODE" name="Category" />
</entity>
</entity> </document>
</dataConfig>
db-data-config.xml
Solr5 DataImport 处理1对多关系的更多相关文章
- Solr-5.3.1 dataimport 导入mysql数据
最近需要计算制造业领域大词表每个词的idf,词表里一共九十多万个词,语料一共三百七十多万篇分词后文献.最开始尝试用程序词表循环套语料循环得到每个词的idf,后来又尝试把语料存入mysql然后建立全文索 ...
- solr5.2 mysql 增量索引
前提:数据库里数据进行增删改操作时,相应的solr需要修改或者新建索引,之前从数据库中导入数据并创建索引的操作是全量创建,如果本身数据库数据量非常大,就需要增量创建索引 1./usr/local/sr ...
- solr5.3.1 集群服务搭建
转http://978538.blog.51cto.com/968538/1710442 一. 安装部署 zookeeper集群部署: 节点: 10.1.12.51:2181 node1 1 ...
- .Net程序员 Solr-5.3之旅 (三)Solr 从MSSQ导入索引数据
阅读目录 引言 准备工作 data-config.xml schema.xml 导入数据 结尾 附件下载 引言 Other men live to eat, while I eat to live.- ...
- .Net程序员 Solr-5.3之旅 (二)Solr 安装
阅读目录 引言 Solr5.3环境搭建 Solr5.3创建第一个Core 结尾 引言 一个糟糕的设计有好的表现形式,它会被判死缓,一个好的设计有糟糕的表现形式,它会被判死刑立即执行. 以上摘自一个设计 ...
- solr5.5索引mysql数据(新手总结)
一 solr5.5环境部署到Eclipse(luna版) solr部署参见:http://blog.csdn.net/csmnjk/article/details/64121765 二 Ik分词器设置 ...
- 使用solr批量导入mysql数据库,以及Unable to read: dataimport.properties等坑
折腾了一下午终于成功了!先放一张成功图: 成功把mysql的数据添加进去了,我这里是整合了tomcat9,整合步骤挺麻烦的,百度一大堆! 这里主要介绍批量导入数据,这里有些坑,所以记录一下: 步骤: ...
- Tomcat + solr5.2.1环境搭建
1. 下载solr并解压后的目录为:E:\solr-5.2.1 , http://lucene.apache.org/solr/downloads.html 2. 将solr部署到Tomcat中 ...
- solr 配置中文分析器/定义业务域/配置DataImport功能(测试用)
一.配置中文分析器 使用IKAnalyzer 配置方法: 1)把IK的jar包添加到solr工程中/WEB-INF/lib目录下 2)把IK的配置文件扩展词典, ...
随机推荐
- word页码上加横线&&word删除单页页眉
word(2010)页码上加横线 插入——>页脚(选择年刊型)——>如图 然后拖住“竖条条”将页码拖到正中间——>点中页脚右击——>选中“表格属性”——>“边框和底纹”— ...
- EntityFramework 连接字符串
1. Microsoft SQL Server 2016 LocalDB <connectionStrings> <add name="DefaultConnection& ...
- C# 关键字【转】
C#中的关键字 关键字是对编译器具有特殊意义的预定义保留标识符.它们不能在程序中用作标识符,除非它们有一个 @ 前缀.例如,@if 是有效的标识符,但 if 不是,因为 if 是关键字. 下面是列 ...
- OC的类别(分类)和拓展
一.分类: 1.适用范围 当你已经封装好了一个类(也可能是系统类.第三方库),不想在改动这个类了,可是随着程序功能的增加需要在类中增加一个方法,这时我们不必修改主类,只需要给你原来的类增加一 ...
- Hire Me, Microsoft China
为微软中国工作是一个愿望.对于其他的股票期权,令人难以置信的小吃店或很酷的工作室,引诱他们的可能性.很多人都想为微软中国工作,谁知道,也许你就是其中之一.这个博客是专门为在微软中国工作.做它的工作空缺 ...
- ios上position:fixed失效问题
手机端上的猫腻真是多啊~~~ 此起彼伏! 最近又遇到了 固定定位的底部导航在ios上被弹出去 此时内心1w+个草泥马奔过~~~~~~~~ 直接上解决方案: <div class="ma ...
- 基于Python的TestAgent实现
问题: 1.本人工作主要做自动化,经常要去Linux后台进行一些脚本操作,有时要去后台执行命令,如果逐个登陆比较费事,效率会大打折扣 2.虽然有可以直接去后台执行命令的AW,但是该AW存在很多问题,而 ...
- Python开发【前端】:Ajax
Ajax Ajax即"Asynchronous Javascript And XML"(异步JavaScript和XML),是指一种创建交互式网页应用的网页开发技术,AJAX = ...
- flex布局
一,啥是flex? 1.Flex是Flexible Box的缩写,意为"弹性布局",用来为盒状模型提供最大的灵活性.任何一个容器都可以指定为Flex布局. .box{ displa ...
- 手机QQ内置网页,微信内置网页中进行分享到QQ和微信的操作
微信内的网页分享: API内容详见微信开发文档 https://mp.weixin.qq.com/wiki 这里需要注意的是:调用微信API的时候修改的是微信内网页右上角三个点那里打开后,选择分享之 ...