From 51c82f1835f180121443bf6a16aa0e5bf4cd7e9f Mon Sep 17 00:00:00 2001 From: Nikhil Sinha Date: Mon, 10 Mar 2025 14:43:42 -0400 Subject: [PATCH] init --- src/event/detect_schema.rs | 113 ++++++ src/event/format/formats.json | 554 ++++++++++++++++++++++++++++++ src/event/mod.rs | 1 + src/handlers/http/ingest.rs | 27 +- src/handlers/http/modal/server.rs | 3 + 5 files changed, 693 insertions(+), 5 deletions(-) create mode 100644 src/event/detect_schema.rs create mode 100644 src/event/format/formats.json diff --git a/src/event/detect_schema.rs b/src/event/detect_schema.rs new file mode 100644 index 000000000..07dc40690 --- /dev/null +++ b/src/event/detect_schema.rs @@ -0,0 +1,113 @@ +use std::collections::HashMap; + +use once_cell::sync::OnceCell; +use regex::Regex; +use serde::Deserialize; + +const FORMATS_JSON: &str = include_str!("format/formats.json"); +// Schema definition with pattern matching +pub static KNOWN_SCHEMA_LIST: OnceCell = OnceCell::new(); + +#[derive(Debug)] +struct SchemaDefinition { + name: String, + pattern: Option, + field_mappings: Vec, // Maps field names to regex capture groups +} +#[derive(Debug, Deserialize)] +struct Format { + name: String, + regex: Vec +} +#[derive(Debug, Deserialize)] +struct Pattern { + pattern: Option, + fields: Vec +} + +#[derive(Debug)] +pub struct EventProcessor { + schema_definitions: Vec, +} + +impl EventProcessor { + pub fn new() { + let mut processor = EventProcessor { + schema_definitions: Vec::new(), + }; + + // Register known schemas + processor.register_schema(); + KNOWN_SCHEMA_LIST.set(processor).expect("only set once"); + } + + fn register_schema(&mut self) { + let json_data: serde_json::Value = serde_json::from_str(FORMATS_JSON).unwrap(); + let formats: Vec = + serde_json::from_value(json_data).expect("Failed to parse formats.json"); + + for format in formats { + let name = format.name; + for pattern in format.regex { + if let Some(pattern_str) = &pattern.pattern { + // Compile the regex pattern + match Regex::new(pattern_str) { + Ok(reg) => { + let field_mappings = pattern.fields.iter() + .map(|field| field.to_string()) + .collect(); + + self.schema_definitions.push(SchemaDefinition { + name: name.clone(), + pattern: Some(reg), + field_mappings, + }); + }, + Err(e) => { + eprintln!("Error compiling regex pattern: {}", e); + eprintln!("Pattern: {}", pattern_str); + } + } + } else { + let field_mappings = pattern.fields.iter() + .map(|field| field.to_string()) + .collect(); + + self.schema_definitions.push(SchemaDefinition { + name: name.clone(), + pattern: None, + field_mappings, + }); + } + } + } + } + + +} + +pub fn detect_schema(event: &str, log_source: &str) -> Option<(String, Vec)> { + let processor = KNOWN_SCHEMA_LIST.get().expect("Schema processor not initialized"); + for schema in processor.schema_definitions.iter() { + if log_source != schema.name { + continue; + } + if let Some(pattern) = &schema.pattern{ + if let Some(captures) = pattern.captures(event) { + let mut extracted_fields = Vec::new(); + + // With named capture groups, you can iterate over the field names + for field_name in schema.field_mappings.iter() { + if let Some(value) = captures.name(field_name) { + extracted_fields.push(value.as_str().to_string()); + } + } + + return Some((schema.name.clone(), extracted_fields)); + } + } + + } + + None // No matching schema found +} \ No newline at end of file diff --git a/src/event/format/formats.json b/src/event/format/formats.json new file mode 100644 index 000000000..95c101cca --- /dev/null +++ b/src/event/format/formats.json @@ -0,0 +1,554 @@ +[ + { + "name": "access_log", + "regex": [ + { + "pattern": "^(?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?) (?[^ ]+) (?[^ ]+) (?[A-Z]+)(?[^ \\?]+)(?:\\?(?[^ ]*))? (?:-1|\\d+) (?\\d+) \\d+\\s*(?.*)", + "fields": ["timestamp", "c_ip", "cs_username", "cs_method", "cs_uri_stem", "cs_uri_query", "sc_status", "body"] + }, + { + "pattern": "^(?P[\\w\\.:\\-]+)\\s+[\\w\\.\\-]+\\s+(?:-|(?P\\S+))\\s+\\[(?P[^\\]]+)\\] \"(?:\\-|(?P\\w+) (?P[^ \\?]+)(?:\\?(?P[^ ]*))? (?P[\\w/\\.]+))\" (?P\\d+) (?P\\d+|-)(?: \"(?:-|(?P[^\"]*))\" \"(?:-|(?P[^\"]+))\")?\\s*(?P.*)", + "fields": ["c_ip", "cs_username", "timestamp", "cs_method", "cs_uri_stem", "cs_uri_query", "cs_version", "sc_status", "sc_bytes", "cs_referer", "cs_user_agent", "body"] + }, + { + "pattern": "^(?P[\\w\\-\\.]*)(?::\\d+)?\\s+(?P[\\w\\.:\\-]+)\\s+[\\w\\.\\-]+\\s+(?:-|(?P\\S+))\\s+\\[(?P[^\\]]+)\\] \"(?:\\-|(?P\\w+) (?P[^ \\?]+)(?:\\?(?P[^ ]*))? (?P[\\w/\\.]+))\" (?P\\d+) (?P\\d+|-)(?: \"(?:-|(?P[^\"]+))\" \"(?P[^\"]+)\")?\\s*(?P.*)", + "fields": ["cs_host", "c_ip", "cs_username", "timestamp", "cs_method", "cs_uri_stem", "cs_uri_query", "cs_version", "sc_status", "sc_bytes", "cs_referer", "cs_user_agent", "body"] + }, + { + "pattern": "^(?P[\\w\\.:\\-]+)\\s+[\\w\\.\\-]+\\s+(?P\\S+)\\s+\"(?:\\-|(?P\\w+) (?P[^ \\?]+)(?:\\?(?P[^ ]*))? (?P[\\w/\\.]+))\" (?P\\d+) (?P\\d+|-)(?: \"(?P[^\"]+)\" \"(?P[^\"]+)\")?\\s*(?P.*)", + "fields": ["c_ip", "cs_username", "cs_method", "cs_uri_stem", "cs_uri_query", "cs_version", "sc_status", "sc_bytes", "cs_referer", "cs_user_agent", "body"] + } + ] + }, + { + "name": "alb_log", + "regex": [ + { + "pattern": "^(?P(http)|(https)|(h2)|(ws)|(wss)) (?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{6}Z) (?P[^ ]+) (?P[\\w\\.:]+):(?P\\d+) (?P[\\w\\.:]+):(?P\\d+) (?P(-1)|(\\d+(\\.\\d+))?) (?P(-1)|(\\d+(\\.\\d+))?) (?P(-1)|(\\d+(\\.\\d+))?) (?P\\d+|-) (?P\\d+|-) (?P\\d+) (?P\\d+) \"(?:\\-|(?P\\w+|-) (?P(?P(?:(?Phttps|http)?://)?(?:(?P[^:]+):(?P\\d+)?)?(?P[^ \\?]+)?)(?:\\?(?P[^ ]*))?) (?P[\\w/\\.]+|-)\\s*)\" \"(?P[^\"]+)\" (?P[\\w-]+) (?P[\\w\\.-]+) (?P[^ ]+) \"(?P[^ ]+)\" (?P[^ ]+) (?P[^ ]+) ?(?P(-1)|\\b([0-9]|[1-8][0-9]|9[0-9]|[1-8][0-9]{2}|9[0-8][0-9]|99[0-9]|[1-8][0-9]{3}|9[0-8][0-9]{2}|99[0-8][0-9]|999[0-9]|[1-4][0-9]{4}|50000)\\b)?", + "fields": ["type", "timestamp", "elb", "client_ip", "client_port", "target_ip", "target_port", "request_processing_time", "target_processing_time", "response_processing_time", "elb_status_code", "target_status_code", "received_bytes", "sent_bytes", "cs_method", "cs_uri_whole", "cs_uri_stem", "cs_uri_scheme", "cs_uri_hostname", "cs_uri_port", "cs_uri_path", "cs_uri_query", "cs_version", "user_agent", "ssl_cipher", "ssl_protocol", "target_group_arn", "trace_id", "domain_name", "chosen_cert_arn", "matched_rule_priority"] + } + ] + }, + { + "name": "block_log", + "regex": [ + { + "pattern": "^(?P\\S{3,8} \\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2} \\w+ \\d{4})\\s*(?P.*)", + "fields": ["timestamp", "body"] + }, + { + "pattern": "^\\[(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3,6})?(?:Z|[-+]\\d{2}:?\\d{2})?)\\]\\s*(?P.*)", + "fields": ["timestamp", "body"] + } + ] + }, + { + "name": "candlepin_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}) \\[(req=(?P[0-9a-f-]+)|=), org=(?P\\w*)\\] (?P\\w+) (?P[\\w.]+) - (?P.*)", + "fields": ["timestamp", "req", "org", "alert_level", "module", "body"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}[+-]\\d{4}) (?P.*)", + "fields": ["timestamp", "body"] + } + ] + }, + { + "name": "choose_repo_log", + "regex": [ + { + "pattern": "^\\[(?P\\w+):[^\\]]+] [^:]+:\\d+ (?P\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}:\\d{2}(?:[\\.,]\\d{3})?):(?P.*)", + "fields": ["level", "timestamp", "body"] + } + ] + }, + { + "name": "cloudvm_ram_log", + "regex": [ + { + "pattern": "^========== Start of cloudvm ram size dump at (?P[^=]+) ==========(?P.*)", + "fields": ["timestamp", "body"] + } + ] + }, + { + "name": "cups_log", + "regex": [ + { + "pattern": "^(?P[IEW]) \\[(?P\\d{2}/\\S{3,8}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\] (?P
\\w+): (?P.*)", + "fields": ["level", "timestamp", "section", "body"] + }, + { + "pattern": "^(?P[IEW]) \\[(?P\\d{2}/\\S{3,8}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{2,4})\\](?P.*)", + "fields": ["level", "timestamp", "body"] + } + ] + }, + { + "name": "dpkg_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?) (?:(?:(?Pstartup|status|configure|install|upgrade|trigproc|remove|purge)(?: (?Pconfig-files|failed-config|half-configured|half-installed|installed|not-installed|post-inst-failed|removal-failed|triggers-awaited|triggers-pending|unpacked))? (?P[^ ]+) (?P[^ ]+)(?: (?P[^ ]+))?)|update-alternatives: (?P.*))", + "fields": ["timestamp", "action", "status", "package", "installed_version", "available_version", "body"] + } + ] + }, + { + "name": "elb_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{6}Z) (?P[^ ]+) (?P[\\w\\.:]+):(?P\\d+) (?P[\\w\\.:]+):(?P\\d+) (?P\\d+(\\.\\d+)?) (?P\\d+(\\.\\d+)?) (?P\\d+(\\.\\d+)?) (?P\\d+|-) (?P\\d+|-) (?P\\d+) (?P\\d+) \"(?:\\-|(?P\\w+|-) (?P[^ \\?]+)(?:\\?(?P[^ ]*))? (?P[\\w/\\.]+|-)\\s*)\" \"(?P[^\"]+)\" (?P[\\w-]+) (?P[\\w\\.-]+)(?P.*)", + "fields": ["timestamp", "elb", "client_ip", "client_port", "backend_ip", "backend_port", "request_processing_time", "backend_processing_time", "response_processing_time", "elb_status_code", "backend_status_code", "received_bytes", "sent_bytes", "cs_method", "cs_uri_stem", "cs_uri_query", "cs_version", "user_agent", "ssl_cipher", "ssl_protocol", "body"] + } + ] + }, + { + "name": "engine_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}+)\\s+(?P\\w+)\\s+\\[(?P[^\\]]+)\\]\\s+\\((?P[^\\)]+)\\)\\s+(?P.*)", + "fields": ["timestamp", "level", "logger", "tid", "body"] + } + ] + }, + { + "name": "env_logger_log", + "regex": [ + { + "pattern": "^\\[(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}[^ ]+) (?P\\w+) (?P[^\\]]+)\\]\\s+(?P.*)", + "fields": ["timestamp", "level", "module", "body"] + } + ] + }, + { + "name": "error_log", + "regex": [ + { + "pattern": "^(?P\\w) \\[(?P[^\\]]+)\\] (?P.*)", + "fields": ["level", "timestamp", "body"] + }, + { + "pattern": "^\\[(?P[^\\]]+)\\] \\[(?:(?P[^:]+):)?(?P\\w+)\\](?: \\[pid (?P\\d+)(:tid (?P\\d+))?\\])?(?: \\[client (?P[\\w\\.:\\-]+):(?P\\d+)\\])? (?P.*)", + "fields": ["timestamp", "module", "level", "pid", "tid", "c_ip", "c_port", "body"] + } + ] + }, + { + "name": "esx_syslog_log", + "regex": [ + { + "pattern": "^(?P(?:\\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2}|\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?Z))\\s+(?P\\w+\\((?P\\d+)\\))(?:\\[\\+\\]|\\+)?(?:(?: (?P(?P(?:[^\\[:]+|[^:]+))(?:\\[(?P\\d+)\\])?):\\s*(?:\\w+ \\[(?P[^ ]+)(?: op[iI][dD]=(?P[^ \\]]+))?\\]\\s*)?(?P.*))$|:?(?:(?: ---)? last message repeated \\d+ times?(?: ---)?))", + "fields": ["timestamp", "level", "syslog_pri", "log_syslog_tag", "log_procname", "log_pid", "logger", "opid", "body"] + }, + { + "pattern": "^(?P(?:\\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2}|\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?Z))\\s+(?P\\w+\\((?P\\d+)\\))(?:\\[\\+\\]|\\+)?(?:(?: (?P(?:host-(?P\\d+))?)\\s+(?P.*))$|:?(?:(?: ---)? last message repeated \\d+ times?(?: ---)?))", + "fields": ["timestamp", "level", "syslog_pri", "log_syslog_tag", "log_pid", "body"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2})\\s+(?P\\w+\\((?P\\d+)\\))\\s+(?P[^\\[]+)\\[(?P\\d+)\\]:\\s(?P\\d{2}:\\d{2}:\\d{2}\\.\\d+)\\s+(?P.*)", + "fields": ["timestamp", "level", "syslog_pri", "log_procname", "log_pid", "new_time", "body"] + } + ] + }, + { + "name": "haproxy_log", + "regex": [ + { + "pattern": "^(?P\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?P[^ ]+) (?P\\w+)\\[(?P\\d+)\\]: Proxy (?P[^ ]+) started.", + "fields": ["timestamp", "logging_host", "process_name", "pid", "frontend_name"] + }, + { + "pattern": "^(?P\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?P[^ ]+) (?P\\w+)\\[(?P\\d+)\\]: Stopping frontend (?P[^ ]+) in (?P\\d+) ms.", + "fields": ["timestamp", "logging_host", "process_name", "pid", "frontend_name", "stopping_timeout"] + }, + { + "pattern": "^(?P\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?P[^ ]+) (?P\\w+)\\[(?P\\d+)\\]: Proxy (?P[^ ]+) stopped \\(FE: (?P\\d+) conns, BE: (?P\\d+) conns\\).", + "fields": ["timestamp", "logging_host", "process_name", "pid", "frontend_name", "frontend_connections", "backend_connections"] + }, + { + "pattern": "^(?P\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?P[^ ]+) (?P\\w+)\\[(?P\\d+)\\]: (?P[^:]+):(?P\\d+) \\[(?P\\d{2}\\/\\w{3}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2}.\\d{3})\\] (?P[^ ]+) (?P[^ ]+)\\/(?P[^ ]+) (?P\\d+)\\/(?P\\d+)\\/(?P\\d+) (?P\\d+) (?P..) (?P\\d+)\\/(?P\\d+)\\/(?P\\d+)\\/(?P\\d+)\\/(?P\\d+) (?P\\d+)\\/(?P\\d+)", + "fields": ["timestamp", "logging_host", "process_name", "pid", "client_ip", "client_port", "accept_date", "frontend_name", "backend_name", "server_name", "tw", "tc", "tt", "bytes_read", "termination_state", "actconn", "feconn", "beconn", "srv_conn", "retries", "srv_queue", "backend_queue"] + }, + { + "pattern": "^(?P\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?P[^ ]+) (?P\\w+)\\[(?P\\d+)\\]: (?P[^:]+):(?P\\d+) \\[(?P\\d{2}\\/\\w{3}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2}.\\d{3})\\] (?P[^ ]+)(?P~)? (?P[^ ]+)\\/(?P[^ ]+) (?P-?\\d+)\\/(?P-?\\d+)\\/(?P-?\\d+)\\/(?P-?\\d+)\\/(?P\\d+) (?P\\d{3}|-1) (?P\\d+) (?P.*) (?P.*) (?P....) (?P\\d+)\\/(?P\\d+)\\/(?P\\d+)\\/(?P\\d+)\\/(?P\\d+) (?P\\d+)\\/(?P\\d+) (?:\\{(?P.*)\\} \\{(?P.*)\\} )?\"(?P[A-Z<>]+)(?: (?P.*?))?(?: (?PHTTP\\/\\d+.\\d+))?\"?", + "fields": ["timestamp", "logging_host", "process_name", "pid", "client_ip", "client_port", "accept_date", "frontend_name", "ssl", "backend_name", "server_name", "tq", "tw", "tc", "tr", "tt", "status_code", "bytes_read", "captured_request_cookie", "captured_response_cookie", "termination_state", "actconn", "feconn", "beconn", "srv_conn", "retries", "srv_queue", "backend_queue", "captured_request_headers", "captured_response_headers", "http_method", "http_url", "http_version"] + }, + { + "pattern": "^(?P\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?P[^ ]+) (?P\\w+)\\[(?P\\d+)\\]: (?P[^:]+):(?P\\d+) \\[(?P\\d{2}\\/\\w{3}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2}.\\d{3})\\] (?P[^ ]+)\\/(?P[^ ]+): (?P.+)", + "fields": ["timestamp", "logging_host", "process_name", "pid", "client_ip", "client_port", "accept_date", "backend_name", "server_name", "ssl_error"] + } + ] + }, + { + "name": "katello_log", + "regex": [ + { + "pattern": "^\\[\\s?(?P\\w+)\\s(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})\\s(?P\\w+)\\]\\s+(?P.*)", + "fields": ["alert_level", "timestamp", "module", "message"] + } + ] + }, + { + "name": "lnav_debug_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(-|\\+)\\d{2}:\\d{2}) (?P\\w) (?P\\w+) (?P[^:]+):(?P\\d+) (?P.*)", + "fields": ["timestamp", "level", "thread", "srcfile", "srcline", "body"] + } + ] + }, + { + "name": "nextflow_log", + "regex": [ + { + "pattern": "^(?P\\w{3}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) \\[(?P[^\\]]+)\\] (?P[^ ]+)\\s+(?P[^ ]+) - (?P.*)", + "fields": ["timestamp", "thread", "level", "module", "body"] + } + ] + }, + { + "name": "openam_log", + "regex": [ + { + "pattern": "^\"(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})\"\\s+(?P[^ \"]+|\"(?:[^\"]*|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]*|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")\\s+(?P[^ \"]+|\"(?:[^\"]|\"\")*\")(?P.*)", + "fields": ["timestamp", "data", "loginid", "contextid", "ipaddr", "level", "domain", "loggedby", "messageid", "modulename", "nameid", "hostname", "body"] + } + ] + }, + { + "name": "openamdb_log", + "regex": [ + { + "pattern": "^(?P[\\w]+):(?P\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}:\\d{2}:\\d{3} [AP]M \\w+): Thread\\[(?P[^,]+,\\d+,[^,]+)\\]\\n?(?:\\*+|(?P.*))", + "fields": ["module", "timestamp", "thread", "body"] + } + ] + }, + { + "name": "openstack_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}.\\d{3}) (?P\\d+) (?P\\w+) (?P\\S+) \\[(?P[^\\]]+)\\] (?P.*)", + "fields": ["timestamp", "pid", "level", "logger", "tid", "body"] + }, + { + "pattern": "^(?P\\w+) (?P\\S+) \\[(?P[^\\]]+)\\] (?P.*)", + "fields": ["level", "logger", "tid", "body"] + }, + { + "pattern": "^[(](?P[^)]+)[)]: (?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}) (?P\\w+)(?P.*)", + "fields": ["logger", "timestamp", "level", "body"] + }, + { + "pattern": "^[(](?P[^)]+)[)]: (?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}) (?P\\w+) [(](?P[^)]+)[)] (?P.*)", + "fields": ["logger", "timestamp", "level", "user", "body"] + } + ] + }, + { + "name": "page_log", + "regex": [ + { + "pattern": "^(?P[\\w_\\-\\.]+) (?P[\\w\\.\\-]+) (?P\\d+) \\[(?P[^\\]]+)\\] (?Ptotal|\\d+) (?P\\d+) (?P[^ ]+) (?P[\\w\\.:\\-]+)", + "fields": ["printer", "username", "job_id", "timestamp", "page_number", "num_copies", "job_billing", "job_originating_hostname"] + }, + { + "pattern": "^(?P[\\w_\\-\\.]+) (?P[\\w\\.\\-]+) (?P\\d+) \\[(?P[^\\]]+)\\] (?Ptotal|\\d+) (?P\\d+) (?P[^ ]+) (?P[\\w\\.:\\-]+) (?P.+) (?P[^ ]+) (?P.+)(?P.*)", + "fields": ["printer", "username", "job_id", "timestamp", "page_number", "num_copies", "job_billing", "job_originating_hostname", "job_name", "media", "sides", "body"] + } + ] + }, + { + "name": "procstate_log", + "regex": [ + { + "pattern": "^========== Start of system state dump at (?P[^=]+) ==========(?P.*)", + "fields": ["timestamp", "body"] + } + ] + }, + { + "name": "proxifier_log", + "regex": [ + { + "pattern": "\\[(?P\\d{2}\\.\\d{2} \\d{2}:\\d{2}:\\d{2})\\]\\s+(?P[^ ]+(?: \\*64)?)(?:\\s+(?:-|(?P\\d+)))\\s+(?P[^:]+):(?P\\d+)\\s+(?P(?:open|close).*)", + "fields": ["timestamp", "app_name", "app_pid", "target_host", "target_port", "body"] + }, + { + "pattern": "\\[(?P\\d{2}\\.\\d{2} \\d{2}:\\d{2}:\\d{2})\\]\\s+(?P[^ ]+(?: \\*64)?)(?:\\s+(?:-|(?P\\d+)))\\s+(?P[^:]+):(?P\\d+)\\s+(?Perror) : (?P.*)", + "fields": ["timestamp", "app_name", "app_pid", "target_host", "target_port", "level", "body"] + } + ] + }, + { + "name": "rails_log", + "regex": [ + { + "pattern": "^(?P[A-Z]),\\s\\[(?P\\d{4}-\\d{2}-\\d{2}(?:T| )\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{6})?) #(?P\\d+)\\]\\s+(?P\\w+) --\\s(?P[^:]+)?:\\s(?:\\[(?P\\w{8}-\\w{4}-\\w{4}-\\w{4}-\\w{12})\\]\\s)?(?P.*)", + "fields": ["level_char", "timestamp", "pid", "level", "module", "reqid", "body"] + } + ] + }, + { + "name": "redis_log", + "regex": [ + { + "pattern": "^\\[(?P\\d+)\\]\\s+(?P\\d{1,2} [a-zA-Z]{3} \\d{2}:\\d{2}:\\d{2}\\.\\d{3})\\s+(?P[\\.\\-\\*\\#])\\s+(?P.*)", + "fields": ["pid", "timestamp", "level", "body"] + }, + { + "pattern": "^(?P\\d+):(?P[XCSM])\\s+(?P\\d{1,2} [a-zA-Z]{3} \\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{3})\\s+(?P[\\.\\*\\#\\-])\\s+(?P.*)", + "fields": ["pid", "role", "timestamp", "level", "body"] + }, + { + "pattern": "^(?P\\d+):(?Psignal-handler) \\((?P\\d+)\\) (?P.*)", + "fields": ["pid", "role", "timestamp", "body"] + } + ] + }, + { + "name": "s3_log", + "regex": [ + { + "pattern": "^(?P\\S+)\\s+(?P\\S+)\\s+\\[(?P[^\\]]+)\\]\\s+(?P[\\w*.:-]+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+\"(?P\\S+)\\s+(?P[^ \\?]+)(?:\\?(?P[^ ]*))?\\s+(?P\\S+)\"\\s+(?P\\d+|-)\\s+(?P\\S+)\\s+(?P\\d+|-)\\s+(?P\\d+|-)\\s+(?P\\d+|-)\\s+(?P\\d+|-)\\s+\"(?P.*?)\"\\s+\"(?P.*?)\"", + "fields": ["owner", "bucket", "timestamp", "c_ip", "cs_userid", "req_id", "op", "cs_key", "cs_method", "cs_uri_stem", "cs_uri_query", "cs_version", "sc_status", "sc_error_code", "sc_bytes", "obj_size", "total_time", "turn_around_time", "cs_referer", "cs_user_agent"] + }, + { + "pattern": "^(?P\\S+)\\s+(?P\\S+)\\s+\\[(?P[^\\]]+)\\]\\s+(?P[\\w*.:-]+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+\"(?P\\S+)\\s+(?P[^ \\?]+)(?:\\?(?P[^ ]*))?\\s+(?P\\S+)\"\\s+(?P\\d+|-)\\s+(?P\\S+)\\s+(?P\\d+|-)\\s+(?P\\d+|-)\\s+(?P\\d+|-)\\s+(?P\\d+|-)\\s+\"(?P.*?)\"\\s+\"(?P.*?)\"\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)\\s+(?P\\S+)", + "fields": ["owner", "bucket", "timestamp", "c_ip", "cs_userid", "req_id", "op", "cs_key", "cs_method", "cs_uri_stem", "cs_uri_query", "cs_version", "sc_status", "sc_error_code", "sc_bytes", "obj_size", "total_time", "turn_around_time", "cs_referer", "cs_user_agent", "version_id", "host_id", "sig_version", "cipher_suite", "auth_type", "cs_host", "tls_version"] + } + ] + }, + { + "name": "simple_rs_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{9}[^\\s]+)\\s+(?P\\w+)\\s+\\[(?P\\w+)\\]\\s+(?P.*)", + "fields": ["timestamp", "level", "module", "body"] + } + ] + }, + { + "name": "snaplogic_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?) (?:(?:(?P\\w{4,}) (?P[^ ]+) (?P[^ ]+) (?P[^ ]+) (?P-|\\d+)(?:\\.(?P[^ ]+))? (?P[^ ]+) (?P[^ ]+))|(?:(?:stdout|stderr): ))(?P.*)", + "fields": ["timestamp", "level", "logger", "facility", "msgid", "pipe_rid", "comp_rid", "resource_name", "invoker", "body"] + } + ] + }, + { + "name": "sssd_log", + "regex": [ + { + "pattern": "^\\((?P\\S{3,8} \\S{3,8} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2}(?:(?:\\.|:)\\d{6})? \\d{4})\\) \\[(?P\\w+)\\] \\[(?P\\w+)\\] \\((?P0x[0-9a-fA-F]{4})\\): (?P.*)", + "fields": ["timestamp", "service", "function", "debug_level", "body"] + }, + { + "pattern": "^\\((?P\\S{3,8} \\S{3,8} ( \\d|\\d{2}) \\d{2}:\\d{2}:\\d{2}(?:(?:\\.|:)\\d{6})? \\d{4})\\) \\[(?P\\w+)(?P\\[.*?\\])\\] \\[(?P\\w+)\\] \\((?P0x[0-9a-fA-F]{4})\\): (?P.*)", + "fields": ["timestamp", "service", "module", "function", "debug_level", "body"] + }, + { + "pattern": "^\\((?P\\d{4}-\\d{2}-\\d{2} [ 0-9]{2}:\\d{2}:\\d{2}(?:(?:\\.|:)\\d{6})?)\\): \\[(?P\\w+)(?P\\[.*?\\])?\\] \\[(?P\\w+)\\] \\((?P0x[0-9a-fA-F]{4})\\): (?P.*)", + "fields": ["timestamp", "service", "module", "function", "debug_level", "body"] + } + ] + }, + { + "name": "strace_log", + "regex": [ + { + "pattern": "^(?P\\d{2}:\\d{2}:\\d{2}\\.\\d{6}|\\d+\\.\\d{6}) (?P\\w+)\\((?P.*)\\)\\s+=\\s+(?P[-\\w]+)(?: (?P\\w+) \\([^\\)]+\\))?(?: <(?P\\d+\\.\\d+)>)?", + "fields": ["timestamp", "syscall", "body", "rc", "errno", "duration"] + } + ] + }, + { + "name": "sudo_log", + "regex": [ + { + "pattern": "^(?P\\S+)\\s*: (?:(?P[^;]+);)?\\s*TTY=(?P[^;]+)\\s+;\\s*PWD=(?P[^;]+)\\s+;\\s*USER=(?P[^;]+)\\s+;\\s*COMMAND=(?P.*)", + "fields": ["login", "error_msg", "tty", "pwd", "user", "command"] + } + ] + }, + { + "name": "syslog_log", + "regex": [ + { + "pattern": "^(?P(?:\\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2}|\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3,6})?(?:Z|(?:\\+|-)\\d{2}:\\d{2})))(?: (?P[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))?(?: \\[CLOUDINIT\\])?(?:(?: syslogd [\\d\\.]+|(?: (?P(?P(?:[^\\[:]+|[^ :]+))(?:\\[(?P\\d+)\\](?: \\([^\\)]+\\))?)?))):\\s*(?P.*)$|:?(?:(?: ---)? last message repeated \\d+ times?(?: ---)?))", + "fields": ["timestamp", "log_hostname", "log_syslog_tag", "log_procname", "log_pid", "body"] + }, + { + "pattern": "^<(?P\\d+)>(?P\\d+) (?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{6})?(?:[^ ]+)?) (?P[^ ]+|-) (?P(?P[^ ]+|-) (?P[^ ]+|-) (?P[^ ]+|-)) (?P\\[(?:[^\\]\"]|\"(?:\\.|[^\"])+\")*\\]|-|)\\s+(?P.*)", + "fields": ["log_pri", "syslog_version", "timestamp", "log_hostname", "log_syslog_tag", "log_procname", "log_pid", "log_msgid", "log_struct", "body"] + } + ] + }, + { + "name": "tcf_log", + "regex": [ + { + "pattern": "^TCF (?P\\d{2}:\\d{2}.\\d{3,6}): (?:Server-Properties: (?:.*)|channel server|\\w+: (?P--->|<---) (?P\\w)(?: (?P\\w+))?(?: (?P\\w+))?(?: (?P\\w+))?(?: (?P.*))?(?: ))(?P.*)", + "fields": ["timestamp", "dir", "type", "token", "service", "name", "msg", "body"] + } + ] + }, + { + "name": "tcsh_history", + "regex": [ + { + "pattern": "^#(?P\\+\\d+)\\n?(?P.*)?", + "fields": ["timestamp", "body"] + } + ] + }, + { + "name": "uwsgi_log", + "regex": [ + { + "pattern": "^\\[pid: (?P\\d+)\\|app: (?P[\\-\\d]+)\\|req: (?P[\\-\\d]+)/(?P\\d+)\\] (?P[^ ]+) \\((?P[^\\)]*)\\) \\{(?P\\d+) vars in (?P\\d+) bytes\\} \\[(?P[^\\]]+)\\] (?P[A-Z]+) (?P[^ \\?]+)(?:\\?(?P[^ ]*))? => generated (?P\\d+) bytes in (?P\\d+) (?P\\w+) \\((?P[^ ]+) (?P\\d+)\\) (?P\\d+) headers in (?P\\d+) bytes \\((?P\\d+) switches on core (?P\\d+)\\)(?P.*)", + "fields": ["s_pid", "s_app", "s_req", "s_worker_reqs", "c_ip", "cs_username", "cs_vars", "cs_bytes", "timestamp", "cs_method", "cs_uri_stem", "cs_uri_query", "sc_bytes", "s_runtime", "rt_unit", "cs_version", "sc_status", "sc_headers", "sc_header_bytes", "s_switches", "s_core", "body"] + } + ] + }, + { + "name": "vmk_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z) cpu(?P\\d+):(?P\\d+)(?: opID=(?P[^\\)]+))?\\)((?:(?PWARNING|ALERT)|(?P[^:]+)): )?(?P.*)", + "fields": ["timestamp", "cpu", "world_id", "opid", "level", "subsystem", "body"] + }, + { + "pattern": "^(?P(?:\\S{3,8}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2}|\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?Z))\\s+(?P\\w+)\\((?P\\d+)\\)(?:\\[\\+\\]|\\+)? (?:vmkernel|vmkwarning):\\s* (?:cpu(?P\\d+):(?P\\d+)(?: opID=(?P[^\\)]+))?\\))?((?:(?:WARNING|ALERT)|(?P[^:]+)): )?(?P.*)", + "fields": ["timestamp", "level", "syslog_pri", "cpu", "world_id", "opid", "subsystem", "body"] + } + ] + }, + { + "name": "vmw_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(?:Z|[-+]\\d{2}:\\d{2})) (?P\\w+)(?:\\(\\d+\\)+)? (?P[\\w\\-]+)\\[(?P\\w+)\\]:? \\[(?:opI(?:D|d)=(?P[^\\]]+))\\]\\s*(?P.*)", + "fields": ["timestamp", "level", "prc", "tid", "opid", "body"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(?:Z|[-+]\\d{2}:\\d{2})) (?:- last log rotation time, \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(?:Z|[-+]\\d{2}:\\d{2}))?\\s*(ESX KMX Agent started.|(?:- time the service was last started(?: \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{3}(?:Z|[-+]\\d{2}:\\d{2}))?, )?Section for (?:[^,]+), pid=(?P\\w+).*)", + "fields": ["timestamp", "tid"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(?:Z|[-+]\\d{2}:\\d{2})) (?P\\w+)(?:\\(\\d+\\)+) (?P[\\w\\-]+)\\[(?P\\w+)\\]: (?:Logs rotated. \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(?:Z|[-+]\\d{2}:\\d{2}))?(?:- last log rotation time, \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(?:Z|[-+]\\d{2}:\\d{2}))?\\s*(ESX KMX Agent started.|(?:- time the service was last started(?: \\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.\\d{3}Z)?, )?Section for (?:[^,]+), pid=(?:\\w+).*)", + "fields": ["timestamp", "level", "prc", "tid"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}(?:Z|[-+]\\d{2}:\\d{2})) \\[(?P\\w+) (?P\\w+) '(?P[^']+)'(?: opID=(?P[^ \\]]+))?(?: user=(?P[^ \\]]+))?\\](?P.*)(?:\\n.*)?", + "fields": ["timestamp", "tid", "level", "comp", "opid", "user", "body"] + }, + { + "pattern": "^\\[(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\w+) (?P\\w+) '(?P[^']+)'(?: opID=(?P[^ \\]]+))?(?: user=(?P[^ \\]]+))?\\](?P.*)(?:\\n.*)?", + "fields": ["timestamp", "tid", "level", "comp", "opid", "user", "body"] + }, + { + "pattern": "^\\[(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}) (?P[\\w\\-]+)\\s+(?P\\w+)\\s+(?P[^\\]]+)\\]\\s+(?P.*)", + "fields": ["timestamp", "tid", "level", "comp", "body"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}(T| )\\d{2}:\\d{2}:\\d{2}(?:.|,)\\d{3}(?:Z|[-+]\\d{2}:\\d{2})) \\[(?P[^\\[]+)\\[(?P\\w+)\\]:\\s+(?P.*)\\]", + "fields": ["timestamp", "prc", "tid", "body"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}(T| )\\d{2}:\\d{2}:\\d{2}(?:.|,)\\d{3}(?:Z|[-+]\\d{2}:\\d{2})?) (?P\\w+) (?P[^\\[]+)\\[(?P\\d+)\\]\\s+\\[(?P[^ ]+) (?P\\d+)\\]\\s+(?P.*)", + "fields": ["timestamp", "level", "prc", "tid", "file", "line", "body"] + }, + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{3})?(?:Z|[-+]\\d{2}:\\d{2})) (?P[^:]+):\\s+(?P\\d+):\\s+(?P[^:]+):(?P\\d+)?\\s+(?P\\w+):?\\s+(?P.*)(?:\\n.*)?", + "fields": ["timestamp", "prc", "tid", "comp", "line", "level", "body"] + }, + { + "pattern": "^(?P[^:]+):(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3})\\[(?P\\w+)\\](?P[^:]+):(?P\\d+) \\[(?P[a-zA-Z]+)\\]\\s+(?P.*)", + "fields": ["prc", "timestamp", "tid", "file", "line", "level", "body"] + }, + { + "pattern": "^(?P[^:]+): (?P\\d+): (?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}) (?P[^:]+):(?P\\d+) (?P[a-zA-Z]+)\\s+(?P.*)", + "fields": ["prc", "tid", "timestamp", "file", "line", "level", "body"] + } + ] + }, + { + "name": "vmw_py_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{1,3})?(?: (?:AM|PM) UTC)?) \\[(?P\\d+)\\](?PERROR|WARNING|INFO|DEBUG):(?P[\\w\\-\\.]+):(?P.*$)", + "fields": ["timestamp", "pid", "level", "module", "body"] + } + ] + }, + { + "name": "vmw_vc_svc_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{1,3}Z)\\s+(?P\\w+)\\s+(?P\\w+)\\s\\[(?P[^:]+):(?P\\d+)\\](\\s+\\[opID=(?P[^\\]]+)\\])?\\s+(?P.*)", + "fields": ["timestamp", "level", "module", "srcfile", "srcline", "opid", "body"] + } + ] + }, + { + "name": "vpostgres_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} \\S+) (?P[^ ]*) (?P[^ ]*) (?P[^ ]*) (?P[^ ]*) (?P[^ ]*) (?P[^ ]+) (?P\\d+)(?P[^:]+):\\s+(?P.*)", + "fields": ["timestamp", "session_id", "transaction_id", "db_name", "user", "remote_pair", "pid", "num_line", "level", "body"] + } + ] + }, + { + "name": "web_robot_log", + "regex": [ + { + "fields": ["ip", "timestamp", "method", "resource", "response", "bytes", "referrer", "request", "request-id", "useragent"] + } + ] + }, + { + "name": "xmlrpc_log", + "regex": [ + { + "pattern": "^(?P\\d{4}/\\d{2}/\\d{2} \\d{2}:\\d{2}:\\d{2} [+-]?\\d{2}:\\d{2}) (?P\\d+) (?P\\S+): (?P\\w+)/(?P.*)(?P\\(.*?\\))?(?P.*)", + "fields": ["timestamp", "pid", "client_ip", "module", "function", "arguments", "body"] + } + ] + }, + { + "name": "zookeeper_log", + "regex": [ + { + "pattern": "^(?P\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}) \\[myid:(?P\\d+)?\\] - (?P\\w+)\\s+\\[(?P.*):(?P[\\w\\.\\$]+)@(?P\\d+)\\] - (?P.*)", + "fields": ["timestamp", "myid", "level", "thread", "logger", "line_number", "body"] + }, + { + "pattern": "^(<(?\\d+)>)?(?\\d)\\s+(?[^\\s]+)\\s+(?[^\\s]+)\\s+(?[^\\s]+)\\s+(?[^\\s]+)\\s+(?[^\\s]+)?\\s*(\\[(?[^\\]]*)\\])?\\s*(?.*)", + "fields": ["pri", "version", "timestamp", "hostname", "appname", "procid", "msgid", "structureddata", "message"] + } + ] + }, + { + "name": "kubernetes_log", + "regex": [ + { + "pattern": "^(?[IWEF])(?\\d{2})(?\\d{2})\\s+(?