(Answered yet - at least 3 solutions left there instead of original question.)
I was been trying to parse & split big JSON, but did not want to modify content.
Floating point conversions changed numbers till FloatParseHandling change.
Similar loop can split 1/4GB JSON on my machine in 40s using only 14MB of RAM comparing to 30s/5-7GB using common Stream.ReadToEnd -> out of or exhaust free RAM -> crash or "stop" approach.
Wanted also to verify results by binary comparision then, but lot of numbers changed.
jsonReader.FloatParseHandling = FloatParseHandling.Decimal;
using Newtonsoft.Json; // intentionally ugly - complete working code
long batchSize = 500000, start = 0, end = 0, pos = 0;
bool neverEnd = true;
while (neverEnd) {
end = start + batchSize - 1;
var sr = new StreamReader(File.Open("bigOne.json", FileMode.Open, FileAccess.Read));
var sw = new StreamWriter(new FileStream(@"PartNo" + start + ".json", FileMode.Create));
using (JsonWriter writer = new JsonTextWriter(sw))
using (var jsonR = new JsonTextReader(sr)) {
jsonR.FloatParseHandling = FloatParseHandling.Decimal;
while (neverEnd) {
neverEnd &= jsonR.Read();
if (jsonR.TokenType == JsonToken.StartObject
&& jsonR.Path.IndexOf("BigArrayPathStart") == 0) { // batters[0] ... batters[3]
if (pos > end) break;
if (pos++ < start) {
do { jsonR.Read(); } while (jsonR.TokenType != JsonToken.EndObject);
continue;
}
}
if (jsonR.TokenType >= JsonToken.PropertyName){ writer.WriteToken(jsonR); }
else if (jsonR.TokenType == JsonToken.StartObject) { writer.WriteStartObject(); }
else if (jsonR.TokenType == JsonToken.StartArray) { writer.WriteStartArray(); }
else if (jsonR.TokenType == JsonToken.StartConstructor) {
writer.WriteStartConstructor(jsonR.Value.ToString());
}
}
start = pos; pos = 0;
}
}
See Question&Answers more detail:os