Logtash 配置文件解析
logstash 一个ELK架构中,专门用来进行接受数据进行处理,可以和很好的扩展节点
# Settings file in YAML
# Settings can be specified either in hierarchical form, e.g.:
# pipeline:
# batch:
# size: 125
# delay: 5
# Or as flat keys:
# pipeline.batch.size: 125
# pipeline.batch.delay: 5
# ------------ Node identity ------------
# Use a descriptive name for the node:
node.name: dev211133
# If omitted the node name will default to the machine's host name
# ------------ Data path ------------------
# Which directory should be used by logstash and its plugins
# for any persistent needs. Defaults to LOGSTASH_HOME/data
path.data: /data/logstash/data
# ------------ Pipeline Settings --------------
# Set the number of workers that will, in parallel, execute the filters+outputs
# stage of the pipeline.
# This defaults to the number of the host's CPU cores.
pipeline.workers: 8
# How many workers should be used per output plugin instance
# pipeline.output.workers: 1
# How many events to retrieve from inputs before sending to filters+workers
pipeline.batch.size: 4000
# How long to wait before dispatching an undersized batch to filters+workers
# Value is in milliseconds.
# pipeline.batch.delay: 5
# Force Logstash to exit during shutdown even if there are still inflight
# events in memory. By default, logstash will refuse to quit until all
# received events have been pushed to the outputs.
# WARNING: enabling this can lead to data loss during shutdown
# pipeline.unsafe_shutdown: false
# ------------ Pipeline Configuration Settings --------------
# Where to fetch the pipeline configuration for the main pipeline
path.config: /opt/logstash/config/conf.d
# Pipeline configuration string for the main pipeline
# config.string:
# At startup, test if the configuration is valid and exit (dry run)
# config.test_and_exit: false
# Periodically check if the configuration has changed and reload the pipeline
# This can also be triggered manually through the SIGHUP signal
# config.reload.automatic: false
# How often to check if the pipeline configuration has changed (in seconds)
# config.reload.interval: 3
# Show fully compiled configuration as debug log message
# NOTE: --log.level must be 'debug'
# config.debug: false
# ------------ Metrics Settings --------------
# Bind address for the metrics REST endpoint
http.host: ""
# Bind port for the metrics REST endpoint, this option also accept a range
# (9600-9700) and logstash will pick up the first available ports.
http.port: 5000-9700
# ------------ Debugging Settings --------------
# Options for log.level:
# * fatal
# * error
# * warn
# * info (default)
# * debug
# * trace
log.level: info
path.logs: /data/logstash/logs
# ------------ Other Settings --------------
# Where to find custom plugins
# path.plugins: []
# These settings are ONLY used by $LS_HOME/bin/system-install to create a custom
# startup script for Logstash. It should automagically use the init system
# (systemd, upstart, sysv, etc.) that your Linux distribution uses.
# After changing anything here, you need to re-run $LS_HOME/bin/system-install
# as root to push the changes to the init script.
# Override Java location
# Set a home directory
# logstash settings directory, the path which contains logstash.yml
# Arguments to pass to logstash
#logstash启动命令参数 指定配置文件目录
LS_OPTS="--path.settings ${LS_SETTINGS_DIR}"
# Arguments to pass to java
# pidfiles aren't used the same way for upstart and systemd; this is for sysv users.
# user and group id to be invoked as
# Enable GC logging by uncommenting the appropriate lines in the GC logging
# section in jvm.options
#logstash jvm gc日志路径
# Open file limit
# Nice level
# Change these to have the init script named and described differently
# This is useful when running multiple instances of Logstash on the same
# physical box or vm
# If you need to run a command or script before launching Logstash, put it
# between the lines beginning with `read` and `EOM`, and uncomment those lines.
## read -r -d '' PRESTART << EOM
## EOM
input {
kafka {
bootstrap_servers => ",,"
group_id => "clio-consr-weba-go1"#consumergroup
topics => ["test-webaccess"]#topic
session_timeout_ms => "60000"#session超时
request_timeout_ms => "180000"#request超时
max_poll_records => "500"
check_crcs => "true"
codec => "json"
decorate_events => true#输出kafka信息
consumer_threads => 3#消费的线程,根据threads*workers*服务器数量=partition
add_field => {
"processor_host" => ""
filter {
mutate {
add_tag => [ "invalid" ] #添加invaild标签
add_field => [ "receive" , "%{[@timestamp]}" ]#将filebeat带过来的timestamp赋值给receive
ruby {
init => "require 'time'"
code => "event.set('processor_timestamp' , Time.now());event.set('lag' , Time.now().to_i-event.get('@timestamp').to_i)"#ruby计算时间差,进入logstash时间- filebeat抓取日志时间
#后面输入到es会根据topic 生成索引
if [kafka][topic] == "test-business" {
if [app] == "audit" {
mutate {
update => { "[kafka][topic]" => "test-audit" }
} else {
mutate {
remove_tag => [ "invalid" ]#删除invalid标签
if [kafka][topic] == "system-logstash" {
mutate {
remove_tag => [ "invalid" ]
filter {
if [kafka][topic] == "test-webaccess" {
grok {
match => { "message" => "\"%{DATA:xforward}\" %{COMBINEDAPACHELOG} (?:%{NUMBER:duration:float}) (?:%{DATA:domain}) \"(?:%{DATA:protocol}|)\" \"(?:%{DATA:rawurlpath})\" \"(?:%{DATA:rawurlquery}|)\" (?:%{DATA:method}) (?:%{NUMBER:ibytes:int}) (?:%{NUMBER:obytes:int}) \"(?:%{DATA:uleck}|)\"" }
date {
match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ]
mutate {
remove_field => [ "timestamp" ]
remove_tag => [ "invalid" ]
filter {
if [kafka][topic] == "test-audit" {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp}" }
date {
match => [ "timestamp" , "yyyy-MM-dd HH:mm:ss,SSS" , "ISO8601" ]
mutate {
remove_tag => [ "invalid" ]
filter {
if [agent] and [agent] != "-" and [agent] != "" {
useragent {
source => "agent"
prefix => "UA-"
if [timestamp] {
mutate {
remove_field => ["timestamp"]
output {
if "invalid" not in [tags] {
if [kafka][topic] == "system-logstash" {
elasticsearch {
hosts => ["",""]#es服务器
index => "%{[kafka][topic]}-%{+YYYY.MM}"#指定索引格式
document_type => "%{[type]}"#文档类型
flush_size => 5000#缓存数量
} else {
elasticsearch {
hosts => ["",""]
index => "%{[kafka][topic]}-%{+YYYY.MM.dd}"
document_type => "%{[type]}"
flush_size => 5000
