|
@@ -1,6 +1,7 @@
|
|
|
import codecs
|
|
|
import hashlib
|
|
|
import json
|
|
|
+import json.decoder
|
|
|
import logging
|
|
|
import sys
|
|
|
from threading import Thread
|
|
@@ -13,6 +14,8 @@ from six.moves.queue import Queue
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
+json_decoder = json.JSONDecoder()
|
|
|
+
|
|
|
|
|
|
def parallel_execute(objects, obj_callable, msg_index, msg):
|
|
|
"""
|
|
@@ -96,29 +99,56 @@ def stream_as_text(stream):
|
|
|
yield data
|
|
|
|
|
|
|
|
|
-def split_buffer(reader, separator=u'\n'):
|
|
|
- """
|
|
|
- Given a generator which yields strings and a separator string,
|
|
|
+def line_splitter(buffer, separator=u'\n'):
|
|
|
+ index = buffer.find(six.text_type(separator))
|
|
|
+ if index == -1:
|
|
|
+ return None, None
|
|
|
+ return buffer[:index + 1], buffer[index + 1:]
|
|
|
+
|
|
|
+
|
|
|
+def split_buffer(stream, splitter=None, decoder=lambda a: a):
|
|
|
+ """Given a generator which yields strings and a splitter function,
|
|
|
joins all input, splits on the separator and yields each chunk.
|
|
|
|
|
|
Unlike string.split(), each chunk includes the trailing
|
|
|
separator, except for the last one if none was found on the end
|
|
|
of the input.
|
|
|
"""
|
|
|
+ splitter = splitter or line_splitter
|
|
|
buffered = six.text_type('')
|
|
|
- separator = six.text_type(separator)
|
|
|
|
|
|
- for data in stream_as_text(reader):
|
|
|
+ for data in stream_as_text(stream):
|
|
|
buffered += data
|
|
|
while True:
|
|
|
- index = buffered.find(separator)
|
|
|
- if index == -1:
|
|
|
+ item, rest = splitter(buffered)
|
|
|
+ if not item:
|
|
|
break
|
|
|
- yield buffered[:index + 1]
|
|
|
- buffered = buffered[index + 1:]
|
|
|
|
|
|
- if len(buffered) > 0:
|
|
|
- yield buffered
|
|
|
+ buffered = rest
|
|
|
+ yield item
|
|
|
+
|
|
|
+ if buffered:
|
|
|
+ yield decoder(buffered)
|
|
|
+
|
|
|
+
|
|
|
+def json_splitter(buffer):
|
|
|
+ """Attempt to parse a json object from a buffer. If there is at least one
|
|
|
+ object, return it and the rest of the buffer, otherwise return None.
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ obj, index = json_decoder.raw_decode(buffer)
|
|
|
+ rest = buffer[json.decoder.WHITESPACE.match(buffer, index).end():]
|
|
|
+ return obj, rest
|
|
|
+ except ValueError:
|
|
|
+ return None, None
|
|
|
+
|
|
|
+
|
|
|
+def json_stream(stream):
|
|
|
+ """Given a stream of text, return a stream of json objects.
|
|
|
+ This handles streams which are inconsistently buffered (some entries may
|
|
|
+ be newline delimited, and others are not).
|
|
|
+ """
|
|
|
+ return split_buffer(stream_as_text(stream), json_splitter, json_decoder.decode)
|
|
|
|
|
|
|
|
|
def write_out_msg(stream, lines, msg_index, msg, status="done"):
|