I am writing a cronjob that ingests logs in lumberjack/beats format and converts the incoming log to JSON.
Input is a string containing a list of key/value (nested) separated by =
I want to parse/map it to a JSON using Javascript
I have written this snippet to convert this which work partially.
The only problem with my approach is that it maps the nested objects to the first level and if a value contains =
sign then it splits that value also.
const parsedLog = {};
// Spit string by comma
log.split(", ").map(item => {
// Split string by equal to
let items = item.split("=");
// First element is the key and second one is the value. Applicable for single level JSON
if (items.length === 2) {
parsedLog[items[0].replace(/[^\w\s]/gi, '')] = items[1].replace(/[{}]/g, "");;
}
// First element is ommited and second element is the key from which we remove all special characters
// and third one is the value from which we remove curly braces. Applicable for second level JSON
if (items.length === 3) {
parsedLog[items[1].replace(/[^\w\s]/gi, '')] = items[2].replace(/[{}]/g, "");;
}
});
Input string:
"{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=An account failed to log on. Subject: Security ID: S-1-0-0 Account Name: - Account Domain: - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID: S-1-0-0 Account Name: test Account Domain: test Failure Information: Failure Reason: Unknown user name or bad password. Status: 0xC000006D Sub Status: 0xC000006A Process Information: Caller Process ID: 0x0 Caller Process Name: - Network Information: Workstation Name: test Source Network Address: 0.0.0.0 Source Port: 0 Detailed Authentication Information: Logon Process: NtLmSsp Authentication Package: NTLM Transited Services: - Package Name (NTLM only): - Key Length: 0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested., event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}"
Expected output:
{
"@timestamp":"2019-07-12T12:19:03.547Z",
"@metadata":{
"beat":"winlogbeat",
"type":"doc",
"version":"6.1.3"
},
"level":"Information",
"brand":"test",
"opcode":"Info",
"activity_id":"{00-0000-00000-0000-00000}",
"provider_guid":"{54849625-5478-4994-A5BA-3E3B0328C30D}",
"index_type":"Test",
"type":"AD",
"message":"An account failed to log on. Subject: Security ID: S-1-0-0 Account Name: - Account Domain: - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID: S-1-0-0 Account Name: test Account Domain: test Failure Information: Failure Reason: Unknown user name or bad password. Status: 0xC000006D Sub Status: 0xC000006A Process Information: Caller Process ID: 0x0 Caller Process Name: - Network Information: Workstation Name: test Source Network Address: 0.0.0.0 Source Port: 0 Detailed Authentication Information: Logon Process: NtLmSsp Authentication Package: NTLM Transited Services: - Package Name (NTLM only): - Key Length: 0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested.",
"event_data":{
"ProcessId":"0x0",
"IpAddress":"0.0.0.0",
"LogonProcessName":"NtLmSsp",
"KeyLength":"0",
"SubjectUserSid":"S-1-0-0",
"SubjectUserName":"-",
"SubjectLogonId":"0x0",
"LmPackageName":"-",
"FailureReason":"%%2313",
"TargetUserName":"test",
"TargetDomainName":"test",
"SubStatus":"0xc000006a",
"IpPort":"0",
"ProcessName":"-",
"LogonType":"3",
"WorkstationName":"test",
"TransmittedServices":"-",
"SubjectDomainName":"-",
"TargetUserSid":"S-1-0-0",
"Status":"0xc000006d",
"AuthenticationPackageName":"NTLM"
},
"task":"Logon",
"company":"Test",
"tags":"[windows, workstations]",
"beat":{
"name":"test",
"hostname":"test",
"version":"6.1.3"
},
"source_name":"Microsoft-Windows-Security-Auditing",
"thread_id":"4128",
"event_id":"4625",
"log_name":"Security",
"record_number":"367542159",
"process_id":"596",
"computer_name":"test",
"keywords":"[Audit Failure]"
}
Another version checking keys (in order + all keys mandatory) and extending last read value, if not found, but there are no structural keys implemented at the moment.
var LogParser = (function () {
var keyNames = [], checkInOrder = 0, json, str, pos;
LogParser.prototype.readRecord = function (record) {
str = record;
pos = 1;
keyPos = 0;
checkInOrder = checkInOrder ? 1:0;
var pair = [undefined,""];
jsonIterator = json = {};
while(pos < str.length) {
while ((key = readKey(pair)) < 0);
if (pos > 1) {
var upDown = readValue(pair);
if (upDown == 2) {
if (pair[0].indexOf('_') > 0 && pair[0].substr(pair[0].length-2) == "id") {
pos++; // x_id={y}
readValue(pair); // re-read after {
jsonIterator[pair[0]] = '{' + pair[1];
continue;
}
var parent = jsonIterator;
jsonIterator = jsonIterator[pair[0]] = {}; // create next level
jsonIterator.parent = parent; // move there and keep parent link
pos++;
} else if (upDown == 1) {
jsonIterator[pair[0]] = pair[1].substr(0, pair[1].length-1);
var parent = jsonIterator.parent;
delete jsonIterator.parent; // a must - circulated json would be hard to print
jsonIterator = parent;
if (!jsonIterator) // json has no parent
break;
} else {
jsonIterator[pair[0]] = pair[1];
}
}
}
}
// private methods
function readKey(pair) {
var rawStart = pos;
while(" \t,".indexOf(str[pos]) != -1) pos++;
var start = pos;
while(str[pos] != '=') pos++;
var key = str.substr(start, pos - start).replace(/\s+$/,'');
pos++;
if (checkInOrder) {
if (keyNames[checkInOrder - 1] == key) {
pair[0] = key;
checkInOrder++;
return;
} else ; // key not found => remaining part of value read
} else {
if (keyNames.indexOf(key) > -1) {
pair[0] = key;
return;
} else ; // key not found => remaining part of value read
}
var end = pos - 2;
while(str[end] != ',') end--;
jsonIterator[pair[0]] += str.substr(rawStart, end - rawStart);
pos = end;
return -1;
}
function readValue(pair) {
if (str[pos] == '{') return 2; // going down
var end = str.indexOf(',', pos);
if (end == -1) end = str.length;
var value = str.substr(pos, end - pos);
pair[1] = value;
pos = end;
if (value[value.length-1] == '}') return 1;
}
// public methods
function LogParser(keys, rOrdered) {
keyNames = keys;
checkInOrder = rOrdered ? 1:0;
}
LogParser.prototype.JSON = function() {
return json;
}
return LogParser;
})();
var x="{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=An account failed to log on. Subject: Security ID: S-1-0-0 Account Name: - Account Domain: - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID: S-1-0-0 Account Name: test Account Domain: test Failure Information: Failure Reason: Unknown user name or bad password. Status: 0xC000006D Sub Status: 0xC000006A Process Information: Caller Process ID: 0x0 Caller Process Name: - Network Information: Workstation Name: test Source Network Address: 0.0.0.0 Source Port: 0 Detailed Authentication Information: Logon Process: NtLmSsp Authentication Package: NTLM Transited Services: - Package Name (NTLM only): - Key Length: 0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested., event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}";
var keys = ["@timestamp", "@metadata", "beat", "type", "version", "level", "brand", "opcode", "activity_id", "provider_guid", "index_type", "type", "message", "event_data", "ProcessId", "IpAddress", "LogonProcessName", "KeyLength", "SubjectUserSid", "SubjectUserName", "SubjectLogonId", "LmPackageName", "FailureReason", "TargetUserName", "TargetDomainName", "SubStatus", "IpPort", "ProcessName", "LogonType", "WorkstationName", "TransmittedServices", "SubjectDomainName", "TargetUserSid", "Status", "AuthenticationPackageName", "task", "company", "tags", "beat", "name", "hostname", "version", "source_name", "thread_id", "event_id", "log_name", "record_number", "process_id", "computer_name", "keywords"];
var parser = new LogParser(keys, true);
parser.readRecord(x);
var resultObj = parser.JSON();
console.log(JSON.stringify(resultObj,null,4)) // common indent 4
As beat, type and version are not unique, it would be better to structure also source keys, structure them like this for example and have separate iterator similar to jsonIterator:
[
"@timestamp",
"@metadata", [
"beat",
"type",
"version"
],
"level",
"brand",
"opcode",
"activity_id",
"provider_guid",
"index_type",
"type",
"message",
"event_data", [
"ProcessId",
"IpAddress",
"LogonProcessName",
"KeyLength",
"SubjectUserSid",
"SubjectUserName",
"SubjectLogonId",
"LmPackageName",
"FailureReason",
"TargetUserName",
"TargetDomainName",
"SubStatus",
"IpPort",
"ProcessName",
"LogonType",
"WorkstationName",
"TransmittedServices",
"SubjectDomainName",
"TargetUserSid",
"Status",
"AuthenticationPackageName"
],
"task",
"company",
"tags",
"beat", [
"name",
"hostname",
"version"
],
"source_name",
"thread_id",
"event_id",
"log_name",
"record_number",
"process_id",
"computer_name",
"keywords"
]
Sorry for bad design pattern and few hacks, but almost ok, isn't it ?
Only ugly looks this line now:
"LogonProcessName": "NtLmSsp ",
But not sure why there is that space @end in source - typo, intent ?
Another hack or small code improvement to skip white spaces before comma(?) can help probably.
var x="{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=An account failed to log on. Subject: Security ID: S-1-0-0 Account Name: - Account Domain: - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID: S-1-0-0 Account Name: test Account Domain: test Failure Information: Failure Reason: Unknown user name or bad password. Status: 0xC000006D Sub Status: 0xC000006A Process Information: Caller Process ID: 0x0 Caller Process Name: - Network Information: Workstation Name: test Source Network Address: 0.0.0.0 Source Port: 0 Detailed Authentication Information: Logon Process: NtLmSsp Authentication Package: NTLM Transited Services: - Package Name (NTLM only): - Key Length: 0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested., event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}";
function convert(str) {
var json = {}; // result object
var jsonIterator = json; // trick to prevent need of recursion
var pos = 1, blockPos;
var token = 0; // 0 key / 1 value
var key, value;
while (pos < str.length) {
if (str[pos] == '}') { // block end
var parent = jsonIterator.parent;
delete jsonIterator.parent; // a must - circulated json would be hard to print
jsonIterator = parent;
while("}, ".indexOf(str[++pos]) > -1);
if (!jsonIterator || str.length == pos) break;
}
if (token == 0) { // key starts
var start = pos;
while(str[pos] != '=') pos++;
key = str.substr(start, pos-start);
//console.log(key);
blockPos = ++pos;
if (key.indexOf('_') > 0 && key.substr(key.length-2) == "id") {
if (str[pos] == '{') { // 2 IDs has own curly brackets
start = pos;
while(str[pos] != '}') pos++;
pos++;
value = str.substr(start, pos-start);
//console.log("-> " + value);
jsonIterator[key] = value;
while(" ".indexOf(str[++pos]) > -1);
continue;
}
}
token++;
}
if (token == 1) { // value starts
if (str[blockPos] == '{') {
var parent = jsonIterator;
jsonIterator = jsonIterator[key] = {}; // create next level
jsonIterator.parent = parent; // move there and keep parent link
pos++;
token = 0;
} else {
var valueStart = blockPos;
var bracket = [0,0,0], bracket2 = -1;
if(key == "message") { // terrible one
blockPos = str.indexOf(".,", blockPos) + 1; // ends as sentence by dot followed by searched comma
} else {
do {
bracket2 = " ,{[}]".indexOf(str[blockPos++]);
if (bracket2 < 0) continue;
else if (bracket2 < 2) {
if (!eval(bracket.join('+'))) break;
else continue;
}
if (bracket2 < 4) {
bracket[bracket2 - 1]++;
} else {
if (!bracket[bracket2 - 3]) {
blockPos--;
break;
}
bracket[bracket2 - 3]--;
}
} while (true);
}
if (bracket2 == 1) blockPos--;
value = str.substr(valueStart, blockPos-valueStart);
jsonIterator[key] = value;
//console.log("-> " + value);
pos = blockPos;
token = 0;
if (bracket2 == 4) continue;
while(" ".indexOf(str[++pos]) > -1); // skip comma and move next to space
}
}
}
console.log(JSON.stringify(json,null,4)) // common indent 4
}
convert(x);
I've manage to prepare some code using lodash. It is not ready to go solution, but I hope it will give you a hint how to process it. Things to do:
property: [value1, value2]
const str = "{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=shorter message, event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}";
let mapSimpleString = (pass) => _.chain(pass).split(',').map(str => _.chain(str).trim().split('=').value()).fromPairs().value();
let mapRecursive = (str) => {
if(str.length) {
const index = str.indexOf('{');
const pass = index === 0 ? str.substring(1, str.length -1) : str;
const passIndex = pass.indexOf('{');
if (passIndex > 0 ) {
const pref = pass.substring(0, passIndex);
const post = pass.substring(passIndex, pass.length - 1);
const splitted_pref = _.split(pref, ',');
const tail = (post.indexOf(',') < 0) ? {[_.chain(splitted_pref).takeRight().trim().trimEnd('=').value()]: post + '}'} : {[_.chain(splitted_pref).takeRight().trim().trimEnd('=').value()]: mapRecursive(post)};
return [mapSimpleString(splitted_pref.slice(0, length - 1)), tail]
}
else { return mapSimpleString(pass) }
} return '';
}
const mapped = _.chain(str)
.split('},')
.map(mapRecursive)
.flatten()
.value();
console.log(mapped)
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.14/lodash.js"></script>
User contributions licensed under CC BY-SA 3.0