|
|
<h1><center>LogStash 数据过滤</center></h1>
|
|
|
|
|
|
作者:行癫(盗版必究)
|
|
|
|
|
|
------
|
|
|
|
|
|
## 一:grok插件
|
|
|
|
|
|
#### 1.简介
|
|
|
|
|
|
grok插件有非常强大的功能,他能匹配一切数据,但是他的性能和对资源的损耗同样让人诟病
|
|
|
|
|
|
filter的grok是目前logstash中解析非结构化日志数据最好的方式
|
|
|
|
|
|
grok位于正则表达式之上,所以任何正则表达式在grok中都是有效的
|
|
|
|
|
|
#### 2.语法格式
|
|
|
|
|
|
```shell
|
|
|
%{语法:语义}
|
|
|
```
|
|
|
|
|
|
注意:
|
|
|
|
|
|
语法指的是匹配的模式
|
|
|
|
|
|
例如使用NUMBER模式可以匹配出数字,IP模式则会匹配出127.0.0.1这样的IP地址
|
|
|
|
|
|
#### 3.案例
|
|
|
|
|
|
实验数据:Nginx的访问日志
|
|
|
|
|
|
Logstash输入输出配置文件:
|
|
|
|
|
|
```shell
|
|
|
input {
|
|
|
stdin {
|
|
|
}
|
|
|
}
|
|
|
filter{
|
|
|
grok{
|
|
|
match => {"message" => "%{IP:client}"}
|
|
|
}
|
|
|
}
|
|
|
output {
|
|
|
stdout {
|
|
|
}
|
|
|
}
|
|
|
```
|
|
|
|
|
|
注意:
|
|
|
|
|
|
```shell
|
|
|
USERNAME [a-zA-Z0-9._-]+
|
|
|
USER %{USERNAME}
|
|
|
EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
|
|
|
EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
|
|
|
INT (?:[+-]?(?:[0-9]+))
|
|
|
BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
|
|
|
NUMBER (?:%{BASE10NUM})
|
|
|
BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
|
|
|
BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
|
|
|
|
|
|
POSINT \b(?:[1-9][0-9]*)\b
|
|
|
NONNEGINT \b(?:[0-9]+)\b
|
|
|
WORD \b\w+\b
|
|
|
NOTSPACE \S+
|
|
|
SPACE \s*
|
|
|
DATA .*?
|
|
|
GREEDYDATA .*
|
|
|
QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
|
|
|
UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
|
|
|
# URN, allowing use of RFC 2141 section 2.3 reserved characters
|
|
|
URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
|
|
|
# Networking
|
|
|
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
|
|
|
CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
|
|
|
WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
|
|
|
COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
|
|
|
IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
|
|
|
IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
|
|
|
IP (?:%{IPV6}|%{IPV4})
|
|
|
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
|
|
|
IPORHOST (?:%{IP}|%{HOSTNAME})
|
|
|
HOSTPORT %{IPORHOST}:%{POSINT}
|
|
|
# paths
|
|
|
PATH (?:%{UNIXPATH}|%{WINPATH})
|
|
|
UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
|
|
|
TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
|
|
|
WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
|
|
|
URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
|
|
|
URIHOST %{IPORHOST}(?::%{POSINT:port})?
|
|
|
# uripath comes loosely from RFC1738, but mostly from what Firefox
|
|
|
# doesn't turn into %XX
|
|
|
URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
|
|
|
#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
|
|
|
URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
|
|
|
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
|
|
|
URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
|
|
|
# Months: January, Feb, 3, 03, 12, December
|
|
|
MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
|
|
|
MONTHNUM (?:0?[1-9]|1[0-2])
|
|
|
MONTHNUM2 (?:0[1-9]|1[0-2])
|
|
|
MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
|
|
|
# Days: Monday, Tue, Thu, etc...
|
|
|
DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
|
|
|
# Years?
|
|
|
YEAR (?>\d\d){1,2}
|
|
|
HOUR (?:2[0123]|[01]?[0-9])
|
|
|
MINUTE (?:[0-5][0-9])
|
|
|
# '60' is a leap second in most time standards and thus is valid.
|
|
|
SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
|
|
|
TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
|
|
|
# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
|
|
|
DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
|
|
|
DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
|
|
|
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
|
|
|
ISO8601_SECOND (?:%{SECOND}|60)
|
|
|
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
|
|
|
DATE %{DATE_US}|%{DATE_EU}
|
|
|
DATESTAMP %{DATE}[- ]%{TIME}
|
|
|
TZ (?:[APMCE][SD]T|UTC)
|
|
|
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
|
|
|
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
|
|
|
DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
|
|
|
DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
|
|
|
# Syslog Dates: Month Day HH:MM:SS
|
|
|
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
|
|
|
PROG [\x21-\x5a\x5c\x5e-\x7e]+
|
|
|
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
|
|
|
SYSLOGHOST %{IPORHOST}
|
|
|
SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
|
|
|
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
|
|
|
# Shortcuts
|
|
|
QS %{QUOTEDSTRING}
|
|
|
# Log formats
|
|
|
SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|