How to map a string that contains key values separated by "=" to JSON?

0

I am writing a cronjob that ingests logs in lumberjack/beats format and converts the incoming log to JSON.

Input is a string containing a list of key/value (nested) separated by = I want to parse/map it to a JSON using Javascript

I have written this snippet to convert this which work partially.

The only problem with my approach is that it maps the nested objects to the first level and if a value contains = sign then it splits that value also.

const parsedLog = {};

        // Spit string by comma
        log.split(", ").map(item => {
            // Split string by equal to
            let items = item.split("=");
            // First element is the key and second one is the value. Applicable for single level JSON
            if (items.length === 2) {
                parsedLog[items[0].replace(/[^\w\s]/gi, '')] = items[1].replace(/[{}]/g, "");;
            }
            // First element is ommited and second element is the key from which we remove all special characters
            // and third one is the value from which we remove curly braces. Applicable for second level JSON
            if (items.length === 3) {
                parsedLog[items[1].replace(/[^\w\s]/gi, '')] = items[2].replace(/[{}]/g, "");;
            }
        });

Input string:

"{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=An account failed to log on. Subject: Security ID: S-1-0-0 Account Name:   - Account Domain:   - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID:  S-1-0-0 Account Name:   test Account Domain:    test Failure Information: Failure Reason:   Unknown user name or bad password. Status: 0xC000006D Sub Status:   0xC000006A Process Information: Caller Process ID:  0x0 Caller Process Name:    - Network Information: Workstation Name: test Source Network Address:   0.0.0.0 Source Port:    0 Detailed Authentication Information: Logon Process:   NtLmSsp Authentication Package: NTLM Transited Services:    - Package Name (NTLM only): - Key Length:   0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested., event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}"

Expected output:

{
    "@timestamp":"2019-07-12T12:19:03.547Z", 
    "@metadata":{
        "beat":"winlogbeat", 
        "type":"doc", 
        "version":"6.1.3"
    }, 
    "level":"Information", 
    "brand":"test", 
    "opcode":"Info", 
    "activity_id":"{00-0000-00000-0000-00000}", 
    "provider_guid":"{54849625-5478-4994-A5BA-3E3B0328C30D}", 
    "index_type":"Test", 
    "type":"AD", 
    "message":"An account failed to log on. Subject: Security ID:   S-1-0-0 Account Name:   - Account Domain:   - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID:  S-1-0-0 Account Name:   test Account Domain:    test Failure Information: Failure Reason:   Unknown user name or bad password. Status: 0xC000006D Sub Status:   0xC000006A Process Information: Caller Process ID:  0x0 Caller Process Name:    - Network Information: Workstation Name: test Source Network Address:   0.0.0.0 Source Port:    0 Detailed Authentication Information: Logon Process:   NtLmSsp Authentication Package: NTLM Transited Services:    - Package Name (NTLM only): - Key Length:   0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested.", 
    "event_data":{
        "ProcessId":"0x0", 
        "IpAddress":"0.0.0.0", 
        "LogonProcessName":"NtLmSsp", 
        "KeyLength":"0", 
        "SubjectUserSid":"S-1-0-0", 
        "SubjectUserName":"-", 
        "SubjectLogonId":"0x0", 
        "LmPackageName":"-", 
        "FailureReason":"%%2313", 
        "TargetUserName":"test", 
        "TargetDomainName":"test", 
        "SubStatus":"0xc000006a", 
        "IpPort":"0", 
        "ProcessName":"-", 
        "LogonType":"3", 
        "WorkstationName":"test", 
        "TransmittedServices":"-", 
        "SubjectDomainName":"-", 
        "TargetUserSid":"S-1-0-0", 
        "Status":"0xc000006d", 
        "AuthenticationPackageName":"NTLM"
    }, 
    "task":"Logon", 
    "company":"Test", 
    "tags":"[windows, workstations]", 
    "beat":{
        "name":"test", 
        "hostname":"test", 
        "version":"6.1.3"
    }, 
    "source_name":"Microsoft-Windows-Security-Auditing", 
    "thread_id":"4128", 
    "event_id":"4625", 
    "log_name":"Security", 
    "record_number":"367542159", 
    "process_id":"596", 
    "computer_name":"test", 
    "keywords":"[Audit Failure]"
}
javascript
json
asked on Stack Overflow Jul 12, 2019 by Terence Nero

3 Answers

1

Another version checking keys (in order + all keys mandatory) and extending last read value, if not found, but there are no structural keys implemented at the moment.

var LogParser = (function () {
    var keyNames = [], checkInOrder = 0, json, str, pos;

    LogParser.prototype.readRecord = function (record) {
        str = record;
        pos = 1;
        keyPos = 0;
        checkInOrder = checkInOrder ? 1:0;
        var pair = [undefined,""];
        jsonIterator = json = {};
        while(pos < str.length) {
            while ((key = readKey(pair)) < 0);
            if (pos > 1) {
                var upDown = readValue(pair);
                if (upDown == 2) {
                    if (pair[0].indexOf('_') > 0 && pair[0].substr(pair[0].length-2) == "id") {
                        pos++; // x_id={y}
                        readValue(pair); // re-read after {
                        jsonIterator[pair[0]] = '{' + pair[1];
                        continue;
                    }
                    var parent = jsonIterator;
                    jsonIterator = jsonIterator[pair[0]] = {}; // create next level
                    jsonIterator.parent = parent; // move there and keep parent link 
                    pos++;
                } else if (upDown == 1) {
                    jsonIterator[pair[0]] = pair[1].substr(0, pair[1].length-1);
                    var parent = jsonIterator.parent;
                    delete jsonIterator.parent; // a must - circulated json would be hard to print
                    jsonIterator = parent;
                    if (!jsonIterator) // json has no parent
                        break;
                } else {
                    jsonIterator[pair[0]] = pair[1];
                }
            }
        }
    }

    // private methods
    function readKey(pair) {
        var rawStart = pos;
        while(" \t,".indexOf(str[pos]) != -1) pos++;
        var start = pos;
        while(str[pos] != '=') pos++;
        var key = str.substr(start, pos - start).replace(/\s+$/,'');
        pos++;
        if (checkInOrder) {
            if (keyNames[checkInOrder - 1] == key) {
                pair[0] = key;
                checkInOrder++;
                return;
            } else ; // key not found => remaining part of value read
        } else {
            if (keyNames.indexOf(key) > -1) {
                pair[0] = key;
                return;
            } else ; // key not found => remaining part of value read
        }
        var end = pos - 2;
        while(str[end] != ',') end--;
        jsonIterator[pair[0]] += str.substr(rawStart, end - rawStart);
        pos = end;
        return -1;
    }

    function readValue(pair) {
        if (str[pos] == '{') return 2; // going down
        var end = str.indexOf(',', pos);
        if (end == -1) end = str.length;
        var value = str.substr(pos, end - pos);
        pair[1] = value;
        pos = end;
        if (value[value.length-1] == '}') return 1;
    }

    // public methods
    function LogParser(keys, rOrdered) {
        keyNames = keys;
        checkInOrder = rOrdered ? 1:0;
    }

    LogParser.prototype.JSON = function() {
        return json;
    }

    return LogParser;
})();

var x="{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=An account failed to log on. Subject: Security ID: S-1-0-0 Account Name:   - Account Domain:   - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID:  S-1-0-0 Account Name:   test Account Domain:    test Failure Information: Failure Reason:   Unknown user name or bad password. Status: 0xC000006D Sub Status:   0xC000006A Process Information: Caller Process ID:  0x0 Caller Process Name:    - Network Information: Workstation Name: test Source Network Address:   0.0.0.0 Source Port:    0 Detailed Authentication Information: Logon Process:   NtLmSsp Authentication Package: NTLM Transited Services:    - Package Name (NTLM only): - Key Length:   0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested., event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}";
var keys = ["@timestamp", "@metadata", "beat", "type", "version", "level", "brand", "opcode", "activity_id", "provider_guid", "index_type", "type", "message", "event_data", "ProcessId", "IpAddress", "LogonProcessName", "KeyLength", "SubjectUserSid", "SubjectUserName", "SubjectLogonId", "LmPackageName", "FailureReason", "TargetUserName", "TargetDomainName", "SubStatus", "IpPort", "ProcessName", "LogonType", "WorkstationName", "TransmittedServices", "SubjectDomainName", "TargetUserSid", "Status", "AuthenticationPackageName", "task", "company", "tags", "beat", "name", "hostname", "version", "source_name", "thread_id", "event_id", "log_name", "record_number", "process_id", "computer_name", "keywords"];
var parser = new LogParser(keys, true);
parser.readRecord(x);
var resultObj = parser.JSON();
console.log(JSON.stringify(resultObj,null,4)) // common indent 4

As beat, type and version are not unique, it would be better to structure also source keys, structure them like this for example and have separate iterator similar to jsonIterator:

[
    "@timestamp",
    "@metadata", [
        "beat",
        "type",
        "version"
    ],
    "level",
    "brand",
    "opcode",
    "activity_id",
    "provider_guid",
    "index_type",
    "type",
    "message",
    "event_data", [
        "ProcessId",
        "IpAddress",
        "LogonProcessName",
        "KeyLength",
        "SubjectUserSid",
        "SubjectUserName",
        "SubjectLogonId",
        "LmPackageName",
        "FailureReason",
        "TargetUserName",
        "TargetDomainName",
        "SubStatus",
        "IpPort",
        "ProcessName",
        "LogonType",
        "WorkstationName",
        "TransmittedServices",
        "SubjectDomainName",
        "TargetUserSid",
        "Status",
        "AuthenticationPackageName"
    ],
    "task",
    "company",
    "tags",
    "beat", [
        "name",
        "hostname",
        "version"
    ],
    "source_name",
    "thread_id",
    "event_id",
    "log_name",
    "record_number",
    "process_id",
    "computer_name",
    "keywords"
]
answered on Stack Overflow Jul 17, 2019 by Tom • edited Jul 17, 2019 by Tom
0

Sorry for bad design pattern and few hacks, but almost ok, isn't it ?
Only ugly looks this line now:
"LogonProcessName": "NtLmSsp ",
But not sure why there is that space @end in source - typo, intent ?
Another hack or small code improvement to skip white spaces before comma(?) can help probably.

var x="{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=An account failed to log on. Subject: Security ID: S-1-0-0 Account Name:   - Account Domain:   - Logon ID: 0x0 Logon Type: 3 Account For Which Logon Failed: Security ID:  S-1-0-0 Account Name:   test Account Domain:    test Failure Information: Failure Reason:   Unknown user name or bad password. Status: 0xC000006D Sub Status:   0xC000006A Process Information: Caller Process ID:  0x0 Caller Process Name:    - Network Information: Workstation Name: test Source Network Address:   0.0.0.0 Source Port:    0 Detailed Authentication Information: Logon Process:   NtLmSsp Authentication Package: NTLM Transited Services:    - Package Name (NTLM only): - Key Length:   0 This event is generated when a logon request fails. It is generated on the computer where access was attempted. The Subject fields indicate the account on the local system which requested the logon. This is most commonly a service such as the Server service, or a local process such as Winlogon.exe or Services.exe. The Logon Type field indicates the kind of logon that was requested. The most common types are 2 (interactive) and 3 (network). The Process Information fields indicate which account and process on the system requested the logon. The Network Information fields indicate where a remote logon request originated. Workstation name is not always available and may be left blank in some cases. The authentication information fields provide detailed information about this specific logon request. - Transited services indicate which intermediate services have participated in this logon request. - Package name indicates which sub-protocol was used among the NTLM protocols. - Key length indicates the length of the generated session key. This will be 0 if no session key was requested., event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}";
function convert(str) {
    var json = {}; // result object
    var jsonIterator = json; // trick to prevent need of recursion
    var pos = 1, blockPos;
    var token = 0; // 0 key / 1 value
    var key, value;
    while (pos < str.length) {
        if (str[pos] == '}') { // block end
            var parent = jsonIterator.parent;
            delete jsonIterator.parent; // a must - circulated json would be hard to print
            jsonIterator = parent;
            while("}, ".indexOf(str[++pos]) > -1);
            if (!jsonIterator || str.length == pos) break;
        }
        if (token == 0) { // key starts
            var start = pos;
            while(str[pos] != '=') pos++;
            key = str.substr(start, pos-start);
            //console.log(key);
            blockPos = ++pos;
            if (key.indexOf('_') > 0 && key.substr(key.length-2) == "id") {
                if (str[pos] == '{') { // 2 IDs has own curly brackets
                    start = pos;
                    while(str[pos] != '}') pos++;
                    pos++;
                    value = str.substr(start, pos-start);
                    //console.log("-> " + value);
                    jsonIterator[key] = value;
                    while(" ".indexOf(str[++pos]) > -1);
                    continue;
                }
            }
            token++;
        }
        if (token == 1) { // value starts
            if (str[blockPos] == '{') {
                var parent = jsonIterator;
                jsonIterator = jsonIterator[key] = {}; // create next level
                jsonIterator.parent = parent; // move there and keep parent link 
                pos++;
                token = 0;
            } else {
                var valueStart = blockPos;
                var bracket = [0,0,0], bracket2 = -1;
                if(key == "message") { // terrible one
                    blockPos = str.indexOf(".,", blockPos) + 1; // ends as sentence by dot followed by searched comma
                } else {
                    do {
                        bracket2 = " ,{[}]".indexOf(str[blockPos++]);
                        if (bracket2 < 0) continue;
                        else if (bracket2 < 2) {
                            if (!eval(bracket.join('+'))) break;
                            else continue;
                        }
                        if (bracket2 < 4) {
                            bracket[bracket2 - 1]++;
                        } else {
                            if (!bracket[bracket2 - 3]) {
                                blockPos--;
                                break;
                            }
                            bracket[bracket2 - 3]--;
                        }
                    } while (true);
                }
                if (bracket2 == 1) blockPos--;
                value = str.substr(valueStart, blockPos-valueStart);
                jsonIterator[key] = value;
                //console.log("-> " + value);
                pos = blockPos;
                token = 0;
                if (bracket2 == 4) continue;
                while(" ".indexOf(str[++pos]) > -1); // skip comma and move next to space
            }
        }
    }
    console.log(JSON.stringify(json,null,4)) // common indent 4
}
convert(x);

answered on Stack Overflow Jul 15, 2019 by Tom • edited Jul 17, 2019 by Tom
0

I've manage to prepare some code using lodash. It is not ready to go solution, but I hope it will give you a hint how to process it. Things to do:

  • handle value as property: [value1, value2]
  • I've escaped message field content commas. (U probably can expand my code and checking is it message field and handle it differently or pre process inputed string )

const str = "{@timestamp=2019-07-12T12:19:03.547Z, @metadata={beat=winlogbeat, type=doc, version=6.1.3}, level=Information, brand=test, opcode=Info, activity_id={B49D73AE-01D7-0001-C273-9DB4D701D501}, provider_guid={54849625-5478-4994-A5BA-3E3B0328C30D}, index_type=Test, type=AD, message=shorter message, event_data={ProcessId=0x0, IpAddress=0.0.0.0, LogonProcessName=NtLmSsp , KeyLength=0, SubjectUserSid=S-1-0-0, SubjectUserName=-, SubjectLogonId=0x0, LmPackageName=-, FailureReason=%%2313, TargetUserName=test, TargetDomainName=test, SubStatus=0xc000006a, IpPort=0, ProcessName=-, LogonType=3, WorkstationName=test, TransmittedServices=-, SubjectDomainName=-, TargetUserSid=S-1-0-0, Status=0xc000006d, AuthenticationPackageName=NTLM}, task=Logon, company=Test, tags=[windows, workstations], beat={name=test, hostname=test, version=6.1.3}, source_name=Microsoft-Windows-Security-Auditing, thread_id=4128, event_id=4625, log_name=Security, record_number=367542159, process_id=596, computer_name=test, keywords=[Audit Failure]}";

let mapSimpleString = (pass) => _.chain(pass).split(',').map(str => _.chain(str).trim().split('=').value()).fromPairs().value();

let mapRecursive = (str) => {
if(str.length) {
const index = str.indexOf('{');
const pass = index === 0 ? str.substring(1, str.length -1) : str; 
const passIndex = pass.indexOf('{');
if (passIndex > 0 ) {
const pref = pass.substring(0, passIndex);
const post = pass.substring(passIndex, pass.length - 1);
const splitted_pref = _.split(pref, ',');
const tail = (post.indexOf(',') < 0) ? {[_.chain(splitted_pref).takeRight().trim().trimEnd('=').value()]:  post + '}'} : {[_.chain(splitted_pref).takeRight().trim().trimEnd('=').value()]: mapRecursive(post)};
return [mapSimpleString(splitted_pref.slice(0, length - 1)), tail]
}
else { return mapSimpleString(pass) }
} return '';
}

const mapped = _.chain(str)
.split('},')
.map(mapRecursive)
.flatten()
.value();

console.log(mapped)
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.14/lodash.js"></script>

answered on Stack Overflow Jul 15, 2019 by Andreew4x4

User contributions licensed under CC BY-SA 3.0