Solr5 DataImport 处理1对多关系
“问题” 和“派发处理” 是2个对象。 每个问题可以被派发N次, 最后结束问题。
实际查询中数据量可能很大,同时即可以从问题查派发部门, 也可以从派发情况查询问题(如本部门处理的问题)
使用的技巧就是:增加doc_id, doc_type字段。 实现多Entity查询。
schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="uum" version="1.2">
<types>
<fieldType name="boolean" class="solr.BoolField"/>
<fieldType name="date" class="solr.TrieDateField"/>
<fieldType name="float" class="solr.TrieFloatField"/>
<fieldType name="int" class="solr.TrieIntField"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="string" class="solr.StrField"/>
<fieldType name="url" class="solr.StrField" indexed="false" stored="true" /> <fieldType name="simpletext"
class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType> <fieldType name="ignored" class="solr.StrField"
indexed="false" stored="false" /> <fieldType name="date_l" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> </types> <fields> <!--
FIELDS THAT ARE IN DOCS OF MULTIPLE TYPES
--> <!-- this will be our uniqueKey, so it has to be distinct across
all types of documents
-->
<field name="doc_id" type="string" /> <!-- the type (or domain) of our document -->
<field name="doc_type" type="string" /> <!-- external URLs -->
<dynamicField name="*_url" type="url" multiValued="false" />
<dynamicField name="*_urls" type="url" multiValued="true"/> <!-- dates -->
<dynamicField name="*_dt" type="date" /> <!-- numeric values that might come in hand for relevancy biasing
(they all relate to popularity)
-->
<dynamicField name="*_count" type="int" multiValued="false"/> <field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="false"/>
<!-- Field used by Suggester for autocompletion -->
<field name="autocomplete"
type="simpletext"
stored="false"
multiValued="true" /> <!-- quick search field -->
<field name="catchall"
type="simpletext"
stored="false"
omitNorms="true"
multiValued="true" />
<field name="ID" type="string" multiValued="false"/> <!--
PETITION
-->
<field name="TenantId" type="string" multiValued="false"/>
<field name="PetitionId" type="string" multiValued="false"/>
<field name="PetitionNumber" type="string" multiValued="false"/>
<field name="Title" type="simpletext" multiValued="false"/>
<field name="Content" type="simpletext" multiValued="false"/>
<field name="Tel" type="string" multiValued="false"/>
<field name="EventAddress" type="simpletext" multiValued="false"/>
<field name="DutyGridName" type="string" multiValued="false"/>
<field name="ComplaintType" type="string" multiValued="false"/>
<field name="IsVoid" type="boolean" multiValued="false"/>
<field name="IsEnd" type="boolean" multiValued="false"/>
<field name="GridAddress" type="simpletext" multiValued="false"/>
<field name="CategoryName" type="string" multiValued="false"/>
<field name="Category" type="string" multiValued="false"/> <field name="Status" type="string" multiValued="false"/>
<field name="RegisterOn" type="date" multiValued="false"/>
<field name="DeadLine" type="date" multiValued="false"/>
<field name="ReportOn" type="date" multiValued="false"/>
<field name="EndCaseOn" type="date" multiValued="false"/>
<field name="CreatedBy" type="string" multiValued="false"/>
<field name="SourceWay" type="string" multiValued="false"/>
<field name="ISWGXTSB" type="string" multiValued="false"/>
<field name="RegisterOffice" type="string" multiValued="false"/>
<field name="EventLevel" type="string" multiValued="false"/>
<field name="ImportantLevel" type="string" multiValued="false"/> <!--
PETITION/DISPATCH
-->
<field name="DispatchOffices" type="string" multiValued="true"/>
<!--<field name="DispatchOfficeNames" />-->
<field name="ReceiveOffices" type="string" multiValued="true"/>
<field name="ReceiveOfficeNames" type="string" multiValued="true"/> <!--
PETITION/PARTICIPANT
-->
<field name="OrgUnits" type="string" multiValued="true"/>
<field name="Participants" type="string" multiValued="true"/> <!--
DISPATCH
--> <field name="Dispatcher" type="string" multiValued="false"/>
<field name="DispatchOn" type="date" multiValued="false"/>
<field name="DispatchOffice" type="string" multiValued="false"/>
<field name="DispatchOfficeName" type="string" multiValued="false"/>
<field name="ReceiveOffice" type="string" multiValued="false"/>
<field name="ReceiveOfficeName" type="string" multiValued="false"/>
<field name="StartOn" type="date" multiValued="false"/>
<field name="DealWay" type="string" multiValued="false"/>
<field name="FeedBackType" type="string" multiValued="false"/>
<field name="FeedBackPeople" type="string" multiValued="false"/>
<field name="FeedBackOn" type="date" multiValued="false"/>
<field name="FeedBackMsg" type="simpletext" multiValued="false"/>
<field name="NoPublicOpinion" type="simpletext" multiValued="false"/>
<field name="IsPublic" type="boolean" multiValued="false"/>
<field name="IsAlreadyReply" type="boolean" multiValued="false"/>
<field name="IsAlreadyContact" type="boolean" multiValued="false"/> </fields> <!-- copy author names and title titles to a field to autocomplete
<copyField source="canonical_name" dest="autocomplete"/>
<copyField source="title" dest="autocomplete"/> --> <!-- copy everything into one big field for easy searching -->
<copyField source="PetitionNumber" dest="catchall"/>
<copyField source="Title" dest="catchall"/>
<copyField source="Content" dest="catchall"/>
<copyField source="Tel" dest="catchall"/>
<copyField source="EventAddress" dest="catchall"/> <!-- A unique Key field isn't neccessary, but it's the only way Solr -->
<!-- can automaticly replace docs when they change -->
<!-- DataImportHandler is also very unhappy if you don't have one -->
<uniqueKey>doc_id</uniqueKey> <!-- It's a *very* good idea to have a default search field -->
<defaultSearchField>catchall</defaultSearchField> </schema>
schema.xml
db-data-config.xml
<dataConfig>
<dataSource type="JdbcDataSource"
driver="oracle.jdbc.driver.OracleDriver"
url="jdbc:oracle:thin:@192.168.0.0:1521:test"
user="user"
password="pwd"
/>
<document>
<entity name="petition"
pk="ID"
transformer="TemplateTransformer"
query="select * from VW_HIS_EventInfo "
deltaImportQuery="select t.* from VW_HIS_EventInfo t where t.id='${dataimporter.delta.ID}'"
deltaQuery="select t.* from VW_HIS_EventInfo t where
RegistorOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or ReportOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') ">
>
<field column="doc_id" template="PE_${petition.ID}" />
<field column="doc_type" template="PE" /> <!--<field column="LATLON" name="LatLon_p"/>-->
<field column="TENANTID" name="TenantId" />
<field column="ID" name="PetitionId" />
<field column="PETITIONNUMBER" name="PetitionNumber" />
<field column="TITLE" name="Title" />
<field column="CONTENT" name="Content" />
<field column="TEL" name="Tel" />
<field column="EVENTADDRESS" name="EventAddress" />
<!--<field column="AREANAME" name="AreaName" />-->
<field column="DUTYGRIDNAME" name="DutyGridName" />
<field column="GRIDADDRESS" name="GridAddress" />
<field column="COMPLAINTQUALITYNAME" name="ComplaintType" />
<field column="ISVOID" name="IsVoid" />
<field column="ISEND" name="IsEnd" />
<field column="CATEGORYNAME" name="CategoryName" />
<field column="CATEGORYCODE" name="Category" />
<field column="STATUS" name="Status" />
<field column="REGISTORON" name="RegisterOn" />
<field column="DEADLINE" name="DeadLine" />
<field column="CREATEDBY" name="CreatedBy" />
<field column="REPORTON" name="ReportOn" />
<field column="SOURCEWAY" name="SourceWay" />
<field column="ISWGXTSB" name="ISWGXTSB" />
<field column="REGISTOROFFICE" name="RegisterOffice" />
<!--<field column="TOOFFICENAME" name="ToOfficeName" />-->
<field column="EVENTLEVEL" name="EventLevel" />
<field column="IMPORTANTLEVEL" name="ImportantLevel" />
<field column="ENDCASEON" name="EndCaseOn" />
<!--<field column="ENDOPINION" name="EndOpinion" />--> <entity name="petition_dispatch"
pk="ID"
query="select * from VW_HIS_DispatchInfo where petitionid='${petition.ID}'"
deltaQuery="select ID from VW_HIS_DispatchInfo where
DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petition_dispatch.PETITIONID}'">
<!--<field column="DISPATCHER" name="Dispatchers" />--> <field column="DISPATCHOFFICE" name="DispatchOffices" />
<!--<field column="DISPATCHOFFICENAME" name="DispatchOfficeNames" />-->
<field column="RECEIVEOFFICE" name="ReceiveOffices" />
<field column="RECEIVEOFFICENAME" name="ReceiveOfficeNames" />
</entity> <entity name="petiton_participant"
pk="PARTICIPANT"
query="select distinct ORGUNIT,PARTICIPANT from vw_his_participant where petitionid='${petition.ID}'"
deltaQuery="select PARTICIPANT from vw_his_participant where HandleOn > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') "
parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petiton_participant.PETITIONID}'">
<!--<field column="HANDLEON" name="HandleOns"/>-->
<field column="ORGUNIT" name="OrgUnits"/>
<!--<field column="PARTICIPANTNAME" name="ParticipantNames"/>-->
<field column="PARTICIPANT" name="Participants"/>
<!--<field column="PARTICIPANTTYPE" name="ParticipantTypes"/>-->
</entity> </entity> <entity name="dispatch"
pk="ID"
transformer="TemplateTransformer"
query="select * from VW_HIS_DispatchInfo"
deltaImportQuery="select t.* from VW_HIS_DispatchInfo t where t.id='${dataimporter.delta.ID}'"
deltaQuery="select ID from VW_HIS_DispatchInfo where DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
or FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"> <field column="doc_id" template="DIS_${dispatch.ID}" />
<field column="doc_type" template="DIS" /> <field column="TENANTID" name="TenantId" />
<field column="PETITIONID" name="PetitionId" />
<field column="DISPATCHER" name="Dispatcher" />
<field column="DISPATCHON" name="DispatchOn" />
<field column="DISPATCHOFFICE" name="DispatchOffice" />
<field column="DISPATCHOFFICENAME" name="DispatchOfficeName" />
<field column="RECEIVEOFFICE" name="ReceiveOffice" />
<field column="RECEIVEOFFICENAME" name="ReceiveOfficeName" />
<field column="STARTON" name="StartOn" />
<field column="DEADLINE" name="DeadLine" />
<field column="DEALWAY" name="DealWay" />
<field column="STATUS" name="Status" />
<field column="FEEDBACKTYPE" name="FeedBackType" />
<field column="FEEDBACKPEOPLE" name="FeedBackPeople" />
<field column="FEEDBACKON" name="FeedBackOn" />
<field column="FEEDBACKMSG" name="FeedBackMsg" />
<field column="NOPUBLICOPINION" name="NoPublicOpinion" />
<field column="ISPUBLIC" name="IsPublic" />
<field column="ISALREADYREPLY" name="IsAlreadyReply" />
<field column="ISALREADYCONCAT" name="IsAlreadyContact" /> <entity name="dispatch_petition"
pk="ID"
query="select * from VW_HIS_EventInfo where id='${dispatch.PETITIONID}'"
deltaQuery="select ID from VW_HIS_EventInfo where endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
parentDeltaQuery="select ID from VW_HIS_DispatchInfo where PETITIONID='${dispatch_petition.ID}'">
<field column="PETITIONNUMBER" name="PetitionNumber" />
<field column="TITLE" name="Title" />
<field column="CONTENT" name="Content" />
<field column="TEL" name="Tel" />
<field column="EVENTADDRESS" name="EventAddress" />
<!--<field column="AREANAME" name="AreaName" />-->
<field column="DUTYGRIDNAME" name="DutyGridName" />
<field column="GRIDADDRESS" name="GridAddress" />
<field column="COMPLAINTQUALITYNAME" name="ComplaintType" />
<field column="CATEGORYNAME" name="CategoryName" />
<field column="CATEGORYCODE" name="Category" />
</entity>
</entity> </document>
</dataConfig>
db-data-config.xml
Solr5 DataImport 处理1对多关系的更多相关文章
- Solr-5.3.1 dataimport 导入mysql数据
最近需要计算制造业领域大词表每个词的idf,词表里一共九十多万个词,语料一共三百七十多万篇分词后文献.最开始尝试用程序词表循环套语料循环得到每个词的idf,后来又尝试把语料存入mysql然后建立全文索 ...
- solr5.2 mysql 增量索引
前提:数据库里数据进行增删改操作时,相应的solr需要修改或者新建索引,之前从数据库中导入数据并创建索引的操作是全量创建,如果本身数据库数据量非常大,就需要增量创建索引 1./usr/local/sr ...
- solr5.3.1 集群服务搭建
转http://978538.blog.51cto.com/968538/1710442 一. 安装部署 zookeeper集群部署: 节点: 10.1.12.51:2181 node1 1 ...
- .Net程序员 Solr-5.3之旅 (三)Solr 从MSSQ导入索引数据
阅读目录 引言 准备工作 data-config.xml schema.xml 导入数据 结尾 附件下载 引言 Other men live to eat, while I eat to live.- ...
- .Net程序员 Solr-5.3之旅 (二)Solr 安装
阅读目录 引言 Solr5.3环境搭建 Solr5.3创建第一个Core 结尾 引言 一个糟糕的设计有好的表现形式,它会被判死缓,一个好的设计有糟糕的表现形式,它会被判死刑立即执行. 以上摘自一个设计 ...
- solr5.5索引mysql数据(新手总结)
一 solr5.5环境部署到Eclipse(luna版) solr部署参见:http://blog.csdn.net/csmnjk/article/details/64121765 二 Ik分词器设置 ...
- 使用solr批量导入mysql数据库,以及Unable to read: dataimport.properties等坑
折腾了一下午终于成功了!先放一张成功图: 成功把mysql的数据添加进去了,我这里是整合了tomcat9,整合步骤挺麻烦的,百度一大堆! 这里主要介绍批量导入数据,这里有些坑,所以记录一下: 步骤: ...
- Tomcat + solr5.2.1环境搭建
1. 下载solr并解压后的目录为:E:\solr-5.2.1 , http://lucene.apache.org/solr/downloads.html 2. 将solr部署到Tomcat中 ...
- solr 配置中文分析器/定义业务域/配置DataImport功能(测试用)
一.配置中文分析器 使用IKAnalyzer 配置方法: 1)把IK的jar包添加到solr工程中/WEB-INF/lib目录下 2)把IK的配置文件扩展词典, ...
随机推荐
- mysql数据库日期,ip等处理
一.日期 1.select now(); 查询当前时间,格式为:年-月-日 时:分:秒,如2015-12-17 17:37:20 2.select unix_timestamp(); 将字符串类型的日 ...
- gong server
宫 server mac os 系统 vpn 202.39.176.66 funmobigtmvpn 密码 funmobi!@ 安装 eclipse 安装mysql 1 配置 ...
- centos 安装 opencv-3.1.0
官方安装教程 http://docs.opencv.org/3.1.0/d9/d52/tutorial_java_dev_intro.html#gsc.tab=0 注意要先安装jdk和apache a ...
- context上下文 php版解释
context翻译为上下文其实不是很好,只是翻译理解大概的作用,对于开发来说,context是对定义的使用的变量,常量或者说是配置, 部分的函数功能除了缺省值之外,往往需要手动设置一些定义量来配合当前 ...
- Windows 10家庭版共享打印机
原文地址:http://blog.csdn.net/Purpleendurer/article/details/50498788P.s. 原文太罗嗦,简化了一下~ 启用Guest账户 按Win+X,从 ...
- Android--数据存储
1. 文件存储指定的文件名不可以包含路径, 默认保存到 /data/data/<package name>/files/ 目录下 2. SharedPreferences存储使用键值对的方 ...
- 暗黑战神客户端(IOS和Android)打包教程
先说下遇到的严重问题: 1.暗黑战神的资源管理有2套流程,一套开发使用(Resources.Load),一套正式上线使用(AssetBundles, 流畅),而走AssetBundles流程的代码则有 ...
- Spark会把数据都载入到内存么
转载自:https://www.iteblog.com/archives/1648 前言: 很多初学者其实对于Spark的编程模式还是RDD这个概念理解不到位,就会产生一些误解.比如,很多时候我们常常 ...
- Wordpress基础:安装主题和插件
一:安装主题 1.下载主题 2.解压至wordpress目录下的/wp-content/themes 3.访问后台>外观>主题启用即可 二:安装插件 1.下载插件 2.解压至wordpre ...
- Repeater嵌套Repeater并取得嵌套Repeater里面的控件
前台代码: <asp:Repeater ID="RepeaterScene" runat="server" OnItemDataBound=&quo ...