The .toString()
for Java Collections (Map
, List
, etc.) is lossy because it does not disambiguate delimiters. As such, there is no way to 100% reliably reconstruct the data-structure from the output of Map.toString()
. However, if there are some constraints applied to the problem:
- the keys and values do not contain certain characters (approximately
{}=[],"
)
- arrays do not contain a mixture of primitive values and objects/arrays
then we can somewhat reliably transform the output of toString()
to JSON, and then parse the JSON into a Python data-structure. I wouldn't use this code in production, but as long as you know it can break, it could be useful in certain cases:
TEST_VALUE = "{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}"
def quote_value_array_values(match):
s = match.group()
qvalues = [f'"{value}"' for value in s.split(r", ")]
return ", ".join(qvalues)
def javastr_to_jsonstr(s):
import re
s = re.sub(r"(?<==[)[^{[]]+(?=])", quote_value_array_values, s)
s = re.sub(r'(?<={)([^"=]+)[=:](?!{|[)([^,}]+)', r'"1":"2"', s)
s = re.sub(r'(?<=, )([^"=]+)[=:](?!{|[)([^,}]+)', r'"1":"2"', s)
s = re.sub(r'(?<={)([^"=]+)=(?!")', r'"1":', s)
s = re.sub(r'(?<=, )([^"=]+)=(?!")', r'"1":', s)
return s
import json
json_str = javastr_to_jsonstr(TEST_VALUE)
json_obj = json.loads(json_str)
print(json.dumps(json_obj, indent=1))
Output:
{
"0": {
"_shards": {
"total": "1",
"failed": "0",
"successful": "1",
"skipped": "0"
},
"hits": {
"hits": [
{
"_index": "filebeat-7.10.0-2021.02.02-000001",
"_type": "_doc",
"_source": {
"input": {
"type": "log"
},
"agent": {
"hostname": "ubuntu_fresh",
"name": "ubuntu_fresh",
"id": "879f36f2-4ade-47b6-a7b9-7972634c7b8c",
"type": "filebeat",
"ephemeral_id": "5676523f-bc61-4c12-b319-8b463348ba63",
"version": "7.10.0"
},
"@timestamp": "2021-02-04T12:36:33.475Z",
"ecs": {
"version": "1.6.0"
},
"log": {
"file": {
"path": "/var/log/auth.log"
},
"offset": "46607"
},
"service": {
"type": "system"
},
"host": {
"hostname": "ubuntu_fresh",
"os": {
"kernel": "4.15.0-135-generic",
"codename": "bionic",
"name": "Ubuntu",
"family": "debian",
"version": "18.04.1 LTS (Bionic Beaver)",
"platform": "ubuntu"
},
"containerized": "false",
"ip": [
"10.0.2.15",
"fe80::a00:27ff:fe82:f598",
"192.168.56.22",
"fe80::a00:27ff:fe32:fab0"
],
"name": "ubuntu_fresh",
"id": "cdfcdf6a39d44b98b2aa51700134f415",
"mac": [
"08:00:27:82:f5:98",
"08:00:27:32:fa:b0"
],
"architecture": "x86_64"
},
"fileset": {
"name": "auth"
},
"message": "Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2",
"error": {
"message": "Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]"
},
"event": {
"ingested": "2021-02-04T12:36:39.482598548Z",
"timezone": "+00:00",
"module": "system",
"dataset": "system.auth"
}
},
"_id": "nNALbXcBbfKg8Fh6Zci7",
"_score": "25.188179"
}
],
"total": {
"value": "1",
"relation": "eq"
},
"max_score": "25.188179"
},
"took": "1",
"timed_out": "false"
}
}