flume install

  1. flume 安装

    1
    2
    3
    4
    5
    6
    [root@10 app]
    [root@10 app]# mv apache-flume-1.7.0-bin /mnt/app/flume
    [root@10 app]# chown -R wisdom.wisdom /mnt/app/flume [root@10 app]# mkdir -p /mnt/{data,log}/flume
    [root@10 app]# chown -R wisdom.wisdom /mnt/{data,log}/flume
  2. flume 环境参数配置

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    [root@10 app]# cp /mnt/app/flume/conf/{flume-env.sh,flume-env.sh.bak}
    [root@10 app]# cat > /mnt/app/flume/conf/flume-env.sh <<EOF
    > export JAVA_HOME=/mnt/app/java
    > export JAVA_OPTS="-Xms100m -Xmx2000m -Dcom.sun.management.jmxremote"
    > export JAVA_OPTS="$JAVA_OPTS -Dorg.apache.flume.log.rawdata=true -Dorg.apache.flume.log.printconfig=true "
    > FLUME_CLASSPATH="/home/flume/flume/lib"
    EOF 3. flume log配置
    ```bash
    [root@10 ~]# cat /mnt/app/flume/conf/log4j.properties |grep -v ^#|grep -v ^$
    flume.root.logger=INFO,LOGFILE
    flume.log.dir=/mnt/log/flume //主要是这里
    flume.log.file=flume.log
    log4j.logger.org.apache.flume.lifecycle = INFO
    log4j.logger.org.jboss = WARN
    log4j.logger.org.mortbay = INFO
    log4j.logger.org.apache.avro.ipc.NettyTransceiver = WARN
    log4j.logger.org.apache.hadoop = INFO
    log4j.logger.org.apache.hadoop.hive = ERROR
    log4j.rootLogger=${flume.root.logger}
    log4j.appender.LOGFILE=org.apache.log4j.RollingFileAppender
    log4j.appender.LOGFILE.MaxFileSize=100MB
    log4j.appender.LOGFILE.MaxBackupIndex=10
    log4j.appender.LOGFILE.File=${flume.log.dir}/${flume.log.file}
    log4j.appender.LOGFILE.layout=org.apache.log4j.PatternLayout
    log4j.appender.LOGFILE.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n
    log4j.appender.DAILY=org.apache.log4j.rolling.RollingFileAppender
    log4j.appender.DAILY.rollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy
    log4j.appender.DAILY.rollingPolicy.ActiveFileName=${flume.log.dir}/${flume.log.file}
    log4j.appender.DAILY.rollingPolicy.FileNamePattern=${flume.log.dir}/${flume.log.file}.%d{yyyy-MM-dd}
    log4j.appender.DAILY.layout=org.apache.log4j.PatternLayout
    log4j.appender.DAILY.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} %-5p [%t] (%C.%M:%L) %x - %m%n
    log4j.appender.console=org.apache.log4j.ConsoleAppender
    log4j.appender.console.target=System.err
    log4j.appender.console.layout=org.apache.log4j.PatternLayout
    大专栏  flume install>log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
  3. flume 配置文件参数
    [root@10 app]# su - wisdom
    [wisdom@10 ~]$ vim /mnt/app/flume/conf/test.conf

    producer.sources = s_test
    producer.channels = c_test
    producer.sinks = r_test

定义通道

producer.channels.c_test.type = file
producer.channels.c_test.checkpointDir = /mnt/data/flume/test/filechannel/checkpointDir
producer.channels.c_test.dataDirs = /mnt/data/flume/test/filechannel/dataDirs
producer.channels.c_test.transactionCapacity = 40000
producer.channels.c_test.capacity = 2000000
producer.channels.c_test.maxFileSize = 2146435071
producer.channels.c_test.minimumRequiredSpace = 524288000
producer.channels.c_test.checkpointInterval = 20000

定义输出到kafka

producer.sinks.r_test.type = org.apache.flume.sink.kafka.KafkaSink
producer.sinks.r_test.kafka.bootstrap.servers = 10.0.3.40:9092,10.0.3.41:9092,10.0.3.42:9092
producer.sinks.r_test.kafka.topic = index-test
producer.sinks.r_test.kafka.flumeBatchSize = 100
producer.sinks.r_test.kafka.producer.acks = 1
producer.sinks.r_test.kafka.producer.compression.type = snappy
producer.sinks.r_test.kafka.producer.max.request.size = 10000000

定义源数据库

producer.sources.s_test.type = TAILDIR
producer.sources.s_test.filegroups = f1
producer.sources.s_test.filegroups.f1 = /mnt/log/test/^test.log$
producer.sources.s_test.positionFile = /mnt/data/flume/test/filesource/test.json

sources 和 sinks 绑定 channel,实现一条通道

producer.sinks.r_test.channel = c_test
producer.sources.s_test.channels = c_test

1
2
3
4
5. flume 执行任务
```bash
[wisdom@10 ~]$ /mnt/app/flume/bin/flume-ng agent -n producer --conf /mnt/app/flume/conf -f /mnt/app/flume/conf/service.properties &
  1. 补充
    1
    /mnt/app/flume2es/bin/flume-ng agent -n producer -f /mnt/app/flume2es/conf/test2.properties  --conf /mnt/app/flume2es/conf -Dflume.root.logger=debug,console

通过flume 从kafka读取数据写入ES

1
2
3
4
5
6
7
8
9
10
11
12
13
14
使用flume经验:
* 使用flume(apache-flume-1.7.0-bin.tar.gz)可以将本地的日志文件读取写入到kafka(kafka_2.11-0.9.0.0.tgz)
*使用flume(apache-flume-1.7.0-bin.tar.gz)从kafka(kafka_2.11-0.9.0.0.tgz)读取写入elasticsearch(elasticsearch-2.3.3.tar.gz)会发生报错.
解决方法:
1. 将flume(apache-flume-1.7.0-bin.tar.gz)解压
2. 将elasticsearch(elasticsearch-2.3.3.tar.gz)解压
3. 将zookeeper(zookeeper-3.4.6.tar.gz)解压
4. 将elasticsearch(elasticsearch-2.3.3.tar.gz)解压后的"elasticsearch-2.3.3/lib/" lib目录下的所有jar包拷贝到"apache-flume-1.7.0-bin/lib"目录下
5. 将zookeeper(zookeeper-3.4.6.tar.gz)解压后的"zookeeper-3.4.6/zookeeper-3.4.6.jar"拷贝到"apache-flume-1.7.0-bin/lib"下,并删除原有的"zookeeper-*jar"
6. 删除"elasticsearch-2.3.3/lib/"下的"guava-*.jar"和"jackson-core-*.jar"
7. 下载elasticsearch-sink2-1.0.jar(https://github.com/lucidfrontier45/ElasticsearchSink2/releases),并上传到"apache-flume-1.7.0-bin/lib"下 或者:
如果自己有能力,可以尝试重写elasticsearch-sink2.jar包

flume install的更多相关文章

  1. 大数据平台架构(flume+kafka+hbase+ELK+storm+redis+mysql)

    上次实现了flume+kafka+hbase+ELK:http://www.cnblogs.com/super-d2/p/5486739.html 这次我们可以加上storm: storm-0.9.5 ...

  2. Flume NG Getting Started(Flume NG 新手入门指南)

    Flume NG Getting Started(Flume NG 新手入门指南)翻译 新手入门 Flume NG是什么? 有什么改变? 获得Flume NG 从源码构建 配置 flume-ng全局选 ...

  3. Flume的安装与配置

    Flume的安装与配置 一.       资源下载 资源地址:http://flume.apache.org/download.html 程序地址:http://apache.fayea.com/fl ...

  4. 《OD大数据实战》Flume入门实例

    一.netcat source + memory channel + logger sink 1. 修改配置 1)修改$FLUME_HOME/conf下的flume-env.sh文件,修改内容如下 e ...

  5. 《OD学Flume》20160806Flume和Kafka

    一.Flume http://flume.apache.org/FlumeUserGuide.html Flume是一个分布式的,可靠的,可用的,非常有效率的对大数据量的日志数据进行收集.聚集.移动信 ...

  6. flume服务的搭建

    搭建前先统一时间,关闭防火墙,使用的jar包版本是1.6.0的 服务配置有两种方式 第一种:具体步骤如下: 1.将jar包传至node1上,解压至根目录 2.更改目录名,使用如下命令:mv apach ...

  7. flume 自己定义 hbase sink 类

    參考(向原作者致敬) http://ydt619.blog.51cto.com/316163/1230586 https://blogs.apache.org/flume/entry/streamin ...

  8. 日志采集框架Flume以及Flume的安装部署(一个分布式、可靠、和高可用的海量日志采集、聚合和传输的系统)

    Flume支持众多的source和sink类型,详细手册可参考官方文档,更多source和sink组件 http://flume.apache.org/FlumeUserGuide.html Flum ...

  9. 分布式日志收集系统:Flume

    Flume知识点: Event 是一行一行的数据 1.flume是分布式的日志收集系统,把收集来的数据传送到目的地去. 2.flume里面有个核心概念,叫做agent.agent是一个java进程,运 ...

随机推荐

  1. OpenCV On Android环境配置最新&最全指南(Android Studio篇)

    本文是从本人简书上搬运而来,属本人原创,如有转载,请注明出处:http://www.jianshu.com/p/6e16c0429044 简介 本文是<OpenCV On Android环境配置 ...

  2. linkage disequilibrium|linkage equilibrium

    I.9 Linkage INDEPENDENCE OF GENOTYPES AT TWO LOCI:若A,B是两个独立位点:PA是基因A的概率,PB是基因B的概率.因为基因A与基因B是相互独立的位点, ...

  3. python处理nii格式文件

    网上已经有很多代码了,但是注释的都不全,看起来很费解,我自己加了一些注释,重新发出来,尽可能的通俗易懂 读取前需要先安装库 pip install nibabel pip install matplo ...

  4. 【shell】概述

    功能简介 批量自动初始化系统(update,软件安装,时区设置,安全策略...) 批量自动部署软件(LAMP,LNMP,Nginx,LVS,Tomcat) 管理应用程序(KVM,集群管理扩容,MySQ ...

  5. Python_实战爬虫

    # -*- coding: utf-8 -*-__auther__ = "jiachaojun"__time__ = '2020/1/12 11:03'import request ...

  6. Tomcat下访问HTML页面乱码的解决方法

    问题:在 Tomcat 服务器中访问 HTML 静态页面出现中文乱码,html 页面即使 charset 设置成 UTF-8 也会是乱码,打开浏览器的开发者工具发现 response 的请求头中的 C ...

  7. Simple Random Sampling|representative sample|probability sampling|simple random sampling with replacement| simple random sampling without replacement|Random-Number Tables

    1.2 Simple Random Sampling Census, :全部信息 Sampling: 抽样方式: representative sample:有偏向,研究者选择自己觉得有代表性的sam ...

  8. 03-kubeadm初始化Kubernetes集群

    请求地址https://pc-shop.xiaoe-tech.com/appc7XFLu4K9234/video_details?id=v_5b615b9e432f5_DXDGopmB

  9. SpringBoot 系列 - 自己写starter

    原文地址: https://www.xncoding.com/2017/07/22/spring/sb-starter.html 前言: Spring Boot由众多Starter组成,随着版本的推移 ...

  10. 压缩感知与稀疏模型——Convex Methods for Sparse Signal Recovery

    第三节课的内容.这节课上课到半截困了睡着了,看着大家都很积极请教认真听讲,感觉很惭愧.周末不能熬太晚.这个博客就记录一下醒着时候听到的内容. Motivation 目前的时代需要处理的数据量维度可能很 ...