Squashed 'third_party/jsont/' content from commit 1536152d7 Change-Id: I51a80190772b74ca0d45fd3fadc130e872b57cc0 git-subtree-dir: third_party/jsont git-subtree-split: 1536152d7c1926448d42e4a691acd9a15940b20c

commit: f417eaf93c086c0695adeb0e9cacd44e7e537b6a [log] [tgz]
author: Austin Schuh <austin.linux@gmail.com> Mon Sep 16 21:58:36 2019 -0700
committer: Austin Schuh <austin.linux@gmail.com> Mon Sep 16 21:58:36 2019 -0700
tree: 1b9c3b952bf1501aacc99fbfd40e7a2c730c1b2d
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ecee4c9
--- /dev/null
+++ b/.gitignore

@@ -0,0 +1,6 @@
+example1
+example2
+*.d
+.objs
+x*
+test/build

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..03c7813
--- /dev/null
+++ b/LICENSE

@@ -0,0 +1,19 @@
+Copyright (c) 2012 Rasmus Andersson <http://rsms.me/>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..848cdc2
--- /dev/null
+++ b/Makefile

@@ -0,0 +1,57 @@
+c_sources :=	jsont.c
+
+all: example1 example2 test
+
+object_dir = .objs
+objects = $(patsubst %,$(object_dir)/%,${c_sources:.c=.o})
+object_dirs = $(sort $(foreach fn,$(objects),$(dir $(fn))))
+-include ${objects:.o=.d}
+
+test_dir = test
+test_sources  := $(wildcard test/test*.c)
+test_object_dir = $(test_dir)/.objs
+test_build_dir  = $(test_dir)/build
+test_objects    = $(patsubst test/%,$(test_object_dir)/%,${test_sources:.c=.o})
+test_programs   = $(patsubst test/%.c,$(test_build_dir)/%,$(test_sources))
+test_object_dirs = $(sort $(foreach fn,$(test_objects),$(dir $(fn))))
+
+CC = clang
+LD = clang
+
+CFLAGS 	+= -Wall -g -MMD -std=c99 -I.
+TEST_CFLAGS := $(CFLAGS) -O0
+#LDFLAGS +=
+ifneq ($(DEBUG),)
+	CFLAGS += -O0 -DDEBUG=1
+else
+	CFLAGS += -O3 -DNDEBUG
+endif
+
+clean:
+	rm -f jsont example1 example2
+	rm -rf $(object_dir)
+	rm -rf $(test_object_dir)
+	rm -rf $(test_build_dir)
+
+example1: $(objects) $(object_dir)/example1.o
+	$(LD) $(LDFLAGS) -o $@ $^
+
+example2: $(objects) $(object_dir)/example2.o
+	$(LD) $(LDFLAGS) -o $@ $^
+
+test: $(objects) $(test_programs)
+	$(test_programs)
+
+$(test_build_dir)/%: $(objects) $(test_object_dir)/%.o
+	@mkdir -p `dirname $@`
+	$(LD) $(LDFLAGS) -o $@ $^
+
+$(test_object_dir)/%.o: $(test_dir)/%.c
+	@mkdir -p `dirname $@`
+	$(CC) $(TEST_CFLAGS) -c -o $@ $<
+
+$(object_dir)/%.o: %.c
+	@mkdir -p `dirname $@`
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+.PHONY: clean all test

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..34ec56e
--- /dev/null
+++ b/README.md

@@ -0,0 +1,209 @@
+# JSON Tokenizer (jsont)
+
+A minimal and portable JSON tokenizer written in standard C and C++ (two separate versions). Performs validating and highly efficient parsing suitable for reading JSON directly into custom data structures. There are no code dependencies — simply include `jsont.{h,hh,c,cc}` in your project.
+
+Build and run unit tests:
+
+    make
+
+## Synopsis
+
+C API:
+
+```c
+jsont_ctx_t* S = jsont_create(0);
+jsont_reset(S, uint8_t* inbuf, size_t inbuf_len);
+tok = jsont_next(S)
+// branch on `tok` ...
+V = jsont_*_value(S[, ...]);
+jsont_destroy(S);
+```
+
+New C++ API:
+
+```cc
+jsont::Tokenizer S(const char* inbuf, size_t length);
+jsont::Token token;
+while ((token = S.next())) {
+  if (token == jsont::Float) {
+    printf("%g\n", S.floatValue());
+  } ... else if (t == jsont::Error) {
+    // handle error
+    break;
+  }
+}
+```
+
+```cc
+jsont::Builder json;
+json.startObject()
+    .fieldName("foo").value(123.45)
+    .fieldName("bar").startArray()
+      .value(678)
+      .value("nine \"ten\"")
+    .endArray()
+  .endObject();
+std::cout << json.toString() << std::endl;
+// {"foo":123.45,"bar":[678,"nine \"ten\""]}
+```
+
+# API overview
+
+See `jsont.h` and `jsont.hh` for a complete overview of the API, incuding more detailed documentation. Here's an overview:
+
+## C++ API `namespace jsont`
+
+- `Builder build()` — convenience builder factory
+
+### class Tokenizer
+
+Reads a sequence of bytes and produces tokens and values while doing so.
+
+- `Tokenizer(const char* bytes, size_t length, TextEncoding encoding)` — initialize a new Tokenizer to read `bytes` of `length` in `encoding`
+- `void reset(const char* bytes, size_t length, TextEncoding encoding)` — Reset the tokenizer, making it possible to reuse this parser so to avoid unnecessary memory allocation and deallocation.
+
+#### Reading tokens
+
+- `const Token& next() throw(Error)` — Read next token, possibly throwing an `Error`
+- `const Token& current() const` — Access current token
+
+#### Reading values
+
+- `bool hasValue() const` — True if the current token has a value
+- `size_t dataValue(const char const** bytes)` — Returns a slice of the input which represents the current value, or nothing (returns 0) if the current token has no value (e.g. start of an object).
+- `std::string stringValue() const` — Returns a *copy* of the current string value.
+- `double floatValue() const` — Returns the current value as a double-precision floating-point number.
+- `int64_t intValue() const` — Returns the current value as a signed 64-bit integer.
+
+#### Handling errors
+
+- `ErrorCode error() const` — Returns the error code of the last error
+- `const char* errorMessage() const` — Returns a human-readable message for the last error. Never returns NULL.
+
+#### Acessing underlying input buffer
+
+- `const char* inputBytes() const` — A pointer to the input data as passed to `reset` or the constructor.
+- `size_t inputSize() const` — Total number of input bytes
+- `size_t inputOffset() const` — The byte offset into input where the tokenizer is currently at. In the event of an error, this will point to the source of the error.
+
+### enum Token
+
+- `End` —           Input ended
+- `ObjectStart` —   {
+- `ObjectEnd` —     }
+- `ArrayStart` —    [
+- `ArrayEnd` —      ]
+- `True` —          true
+- `False` —         false
+- `Null` —          null
+- `Integer` —       number value without a fraction part (access as int64 through `Tokenizer::intValue()`)
+- `Float` —         number value with a fraction part (access as double through `Tokenizer::floatValue()`)
+- `String` —        string value (access value through `Tokenizer::stringValue()` et al)
+- `FieldName` —     field name (access value through `Tokenizer::stringValue()` et al)
+- `Error` —         an error occured (access error code through `Tokenizer::error()` et al)
+
+### enum TextEncoding
+
+- `UTF8TextEncoding` — Unicode UTF-8 text encoding
+
+### enum Tokenizer::ErrorCode
+
+- `UnspecifiedError` — Unspecified error
+- `UnexpectedComma` — Unexpected comma
+- `UnexpectedTrailingComma` — Unexpected trailing comma
+- `InvalidByte` — Invalid input byte
+- `PrematureEndOfInput` — Premature end of input
+- `MalformedUnicodeEscapeSequence` — Malformed Unicode escape sequence
+- `MalformedNumberLiteral` — Malformed number literal
+- `UnterminatedString` — Unterminated string
+- `SyntaxError` — Illegal JSON (syntax error)
+
+### class Builder
+
+Aids in building JSON, providing a final sequential byte buffer.
+
+- `Builder()` — initialize a new builder with an empty backing buffer
+- `Builder& startObject()` — Start an object (appends a `'{'` character to the backing buffer)
+- `Builder& endObject()` — End an object (a `'}'` character)
+- `Builder& startArray()` — Start an array (`'['`)
+- `Builder& endArray()` — End an array (`']'`)
+- `const void reset()` — Reset the builder to its neutral state. Note that the backing buffer is reused in this case.
+
+#### Building
+
+- `Builder& fieldName(const char* v, size_t length, TextEncoding encoding=UTF8TextEncoding)` — Adds a field name by copying `length` bytes from `v`.
+- `Builder& fieldName(const std::string& name, TextEncoding encoding=UTF8TextEncoding)` — Adds a field name by copying `name`.
+- `Builder& value(const char* v, size_t length, TextEncoding encoding=UTF8TextEncoding)` — Adds a string value by copying `length` bytes from `v` which content is encoded according to `encoding`.
+- `Builder& value(const char* v)` — Adds a string value by copying `strlen(v)` bytes from c-string `v`. Uses the default encoding of `value(const char*,size_t,TextEncoding)`.
+- `Builder& value(const std::string& v)`  — Adds a string value by copying `v`. Uses the default encoding of `value(const char*,size_t,TextEncoding)`.
+- `Builder& value(double v)` — Adds a possibly fractional number
+- `Builder& value(int64_t v)`, `void value(int v)`, `void value(unsigned int v)`, `void value(long v)` — Adds an integer number
+- `Builder& value(bool v)` — Adds the "true" or "false" atom, depending on `v`
+- `Builder& nullValue()` — Adds the "null" atom
+
+#### Managing the result
+
+- `size_t size() const` — Number of readable bytes at the pointer returned by `bytes()`
+- `const char* bytes() const` — Pointer to the backing buffer, holding the resulting JSON.
+- `std::string toString() const` — Return a `std::string` object holding a copy of the backing buffer, representing the JSON.
+- `const char* seizeBytes(size_t& size_out)` — "Steal" the backing buffer. After this call, the caller is responsible for calling `free()` on the returned pointer. Returns NULL on failure. Sets the value of `size_out` to the number of readable bytes at the returned pointer. The builder will be reset and ready to use (which will act on a new backing buffer).
+
+----
+
+## C API
+
+### Types
+
+- `jsont_ctx_t` — A tokenizer context ("instance" in OOP lingo.)
+- `jsont_tok_t` — A token type (see "Token types".)
+- `jsont_err_t` — A user-configurable error type, which defaults to `const char*`.
+
+### Managing a tokenizer context
+
+- `jsont_ctx_t* jsont_create(void* user_data)` — Create a new JSON tokenizer context.
+- `void jsont_destroy(jsont_ctx_t* ctx)` — Destroy a JSON tokenizer context.
+- `void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length)` — Reset the tokenizer to parse the data pointed to by `bytes`.
+
+### Dealing with tokens
+
+- `jsont_tok_t jsont_next(jsont_ctx_t* ctx)` — Read and return the next token.
+- `jsont_tok_t jsont_current(const jsont_ctx_t* ctx)` — Returns the current token (last token read by `jsont_next`).
+
+### Accessing and comparing values
+
+- `int64_t jsont_int_value(jsont_ctx_t* ctx)` — Returns the current integer value.
+- `double jsont_float_value(jsont_ctx_t* ctx)` — Returns the current floating-point number value.
+- `size_t jsont_data_value(jsont_ctx_t* ctx, const uint8_t** bytes)` — Returns a slice of the input which represents the current value.
+- `char* jsont_strcpy_value(jsont_ctx_t* ctx)` — Retrieve a newly allocated c-string.
+- `bool jsont_data_equals(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length)` — Returns true if the current data value is equal to `bytes` of `length`
+- `bool jsont_str_equals(jsont_ctx_t* ctx, const char* str)` — Returns true if the current data value is equal to c string `str`.
+
+Note that the data is not parsed until you call one of these functions. This means that if you know that a value transferred as a string will fit in a 64-bit signed integer, it's completely valid to call `jsont_int_value` to parse the string as an integer.
+
+### Miscellaneous
+
+- `uint8_t jsont_current_byte(jsont_ctx_t* ctx)` — Get the last byte read.
+- `size_t jsont_current_offset(jsont_ctx_t* ctx)` — Get the current offset of the last byte read.
+- `jsont_err_t jsont_error_info(jsont_ctx_t* ctx)` — Get information on the last error.
+- `void* jsont_user_data(const jsont_ctx_t* ctx)` — Returns the value passed to `jsont_create`
+
+### Token types
+
+- `JSONT_END` —            Input ended.
+- `JSONT_ERR` —            Error. Retrieve details through `jsont_error_info`
+- `JSONT_OBJECT_START` —   {
+- `JSONT_OBJECT_END` —     }
+- `JSONT_ARRAY_START` —    [
+- `JSONT_ARRAY_END` —      ]
+- `JSONT_TRUE` —           true
+- `JSONT_FALSE` —          false
+- `JSONT_NULL` —           null
+- `JSONT_NUMBER_INT` —     number value without a fraction part (access through `jsont_int_value` or `jsont_float_value`)
+- `JSONT_NUMBER_FLOAT` —   number value with a fraction part (access through `jsont_float_value`)
+- `JSONT_STRING` —         string value (access through `jsont_data_value` or `jsont_strcpy_value`)
+- `JSONT_FIELD_NAME` —     field name (access through `jsont_data_value` or `jsont_strcpy_value`)
+
+## Further reading
+
+- See `example*.c` for working sample programs.
+- See `LICENSE` for the MIT-style license under which this project is licensed.

diff --git a/example1.c b/example1.c
new file mode 100644
index 0000000..c36559c
--- /dev/null
+++ b/example1.c

@@ -0,0 +1,76 @@
+//
+// This is a simple example of running the tokenizer, outputting information
+// to stdout about what tokens we get and their values.
+//
+#include <jsont.h>
+#include <stdio.h>
+#include <string.h>
+
+static const char* _tok_name(jsont_tok_t tok);
+
+int main(int argc, const char** argv) {
+  // Create a new reusable tokenizer
+  jsont_ctx_t* S = jsont_create(0);
+
+  // Sample input
+  const char* inbuf = "{\"Ape\":123,\"Bro\":[400192,\"51\",true, false, null,"
+                      " -67,\r\n\t 6.123]}";
+
+  // Reset the parser with a pointer to our sample input
+  jsont_reset(S, (const uint8_t*)inbuf, strlen(inbuf));
+
+  // Read each token
+  jsont_tok_t tok;
+  printf("Token        | Value\n"
+         "-------------|----------------------------------------\n");
+  while ( (tok = jsont_next(S)) != JSONT_END && tok != JSONT_ERR) {
+    printf("%-12s |", _tok_name(tok));
+
+    // If the token has a value, also print its value
+    if (tok == JSONT_STRING || tok == JSONT_FIELD_NAME) {
+      const uint8_t* bytes = 0;
+      size_t len = jsont_data_value(S, &bytes);
+      if (len != 0)
+        printf(" '%.*s'", (int)len, (const char*)bytes);
+    } else if (tok == JSONT_NUMBER_INT) {
+      printf(" %lld", jsont_int_value(S));
+    } else if (tok == JSONT_NUMBER_FLOAT) {
+      printf(" %f", jsont_float_value(S));
+    }
+    
+    printf("\n");
+  }
+
+  // If we got an error, print some useful information and exit with 1
+  if (tok == JSONT_ERR) {
+    fprintf(stderr, "Error: %s ('%c' at offset %lu)\n",
+            jsont_error_info(S),
+            (char)jsont_current_byte(S),
+            (unsigned long)jsont_current_offset(S));
+    return 1;
+  }
+
+  // Destroy our reusable tokenizer and exit
+  jsont_destroy(S);
+  return 0;
+}
+
+// Utility to get a printable name for a token
+static const char* _tok_name(jsont_tok_t tok) {
+  switch (tok) {
+    case JSONT_END:           return "END";
+    case JSONT_ERR:           return "ERR";
+    case JSONT_OBJECT_START:  return "OBJECT_START";
+    case JSONT_OBJECT_END:    return "OBJECT_END";
+    case JSONT_ARRAY_START:   return "ARRAY_START";
+    case JSONT_ARRAY_END:     return "ARRAY_END";
+    case JSONT_TRUE:          return "TRUE";
+    case JSONT_FALSE:         return "FALSE";
+    case JSONT_NULL:          return "NULL";
+    case JSONT_NUMBER_INT:    return "NUMBER_INT";
+    case JSONT_NUMBER_FLOAT:  return "NUMBER_FLOAT";
+    case JSONT_STRING:        return "STRING";
+    case JSONT_FIELD_NAME:    return "FIELD_NAME";
+    default:                  return "?";
+  }
+}

diff --git a/example2.c b/example2.c
new file mode 100644
index 0000000..5077f85
--- /dev/null
+++ b/example2.c

@@ -0,0 +1,183 @@
+//
+// This is an example of parsing and building strict documents into C structs.
+//
+// The general approach is that each object type has a struct type and a
+// builder function. The struct type has members which represents its
+// properties. The builder function is more intresting: It takes a tokenizer
+// state and a struct instance. The builder function then reads each field
+// name from the tokenizer and calls other builder functions (this is how this
+// parser does flow control), and eventually stores the values into the struct
+// instance.
+//
+#include <jsont.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+
+// A simple array type
+typedef struct my_array {
+  size_t size;
+  size_t count;
+  void** items;
+} my_array_t;
+
+// Represents a user object
+typedef struct my_user {
+  const char* id;
+  const char* name;
+} my_user_t;
+
+// Represents a response from our imaginary service
+typedef struct my_response {
+  int64_t timestamp;
+  const char* viewer_id;
+  my_array_t users;
+} my_response_t;
+
+// A helper macro for allocating a new struct instance
+#define MY_NEW(T) (T*)malloc(sizeof(T))
+
+// Some helper macros for dealing with growing arrays
+#define MY_ARRAY_ALLOC(A, _size) do {\
+    (A).items = (void*)malloc(sizeof(void*)*_size); \
+    (A).count = 0; \
+    (A).size = _size; \
+  } while(0)
+#define MY_ARRAY_RESIZE(A, _size) do {\
+    (A).items = (void*)realloc((A).items, sizeof(void*)*_size); \
+    (A).size = _size; \
+  } while(0)
+#define MY_ARRAY_APPEND(A, item) (A).items[(A).count++] = (void*)(item)
+#define MY_NEXT_EXPECT(S, TOKTYPE) do { \
+  if ((tok = jsont_next(S)) != TOKTYPE) { \
+    printf("Error: Builder expected token " #TOKTYPE " (%d)\n", __LINE__); \
+    return false; \
+  }} while (0)
+
+// Builder function for user objects
+bool my_user_build(jsont_ctx_t* S, my_user_t* obj) {
+  jsont_tok_t tok = jsont_current(S);
+  if (tok != JSONT_OBJECT_START) return false;
+  
+  // for each field
+  while ((tok = jsont_next(S)) == JSONT_FIELD_NAME) {
+    const uint8_t* fieldname = 0;
+    size_t len = jsont_data_value(S, &fieldname);
+
+    if (memcmp("id", fieldname, len) == 0) {
+      MY_NEXT_EXPECT(S, JSONT_STRING);
+      obj->id = jsont_strcpy_value(S);
+    
+    } else if (memcmp("name", fieldname, len) == 0) {
+      MY_NEXT_EXPECT(S, JSONT_STRING);
+      obj->name = jsont_strcpy_value(S);
+
+    } else {
+      printf("%s: Unexpected field: \"%.*s\"\n", __FUNCTION__,
+        (int)len, (const char*)fieldname);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Builder function for response objects
+bool my_response_build(jsont_ctx_t* S, my_response_t* obj) {
+  jsont_tok_t tok = jsont_current(S);
+  if (tok != JSONT_OBJECT_START) return false;
+
+  // for each field
+  while ((tok = jsont_next(S)) == JSONT_FIELD_NAME) {
+    const uint8_t* fieldname = 0;
+    size_t len = jsont_data_value(S, &fieldname);
+
+    if (memcmp("timestamp", fieldname, len) == 0) {
+      MY_NEXT_EXPECT(S, JSONT_NUMBER_INT);
+      obj->timestamp = jsont_int_value(S);
+
+    } else if (memcmp("viewer_id", fieldname, len) == 0) {
+      MY_NEXT_EXPECT(S, JSONT_STRING);
+      obj->viewer_id = jsont_strcpy_value(S);
+
+    } else if (memcmp("users", fieldname, len) == 0) {
+      MY_NEXT_EXPECT(S, JSONT_ARRAY_START);
+      MY_ARRAY_ALLOC(obj->users, 10);
+
+      // for each user object
+      while ((tok = jsont_next(S)) == JSONT_OBJECT_START) {
+        if (obj->users.count == obj->users.size)
+          MY_ARRAY_RESIZE(obj->users, obj->users.size * 2);
+        my_user_t* user = MY_NEW(my_user_t);
+        if (!my_user_build(S, user))
+          return false;
+        MY_ARRAY_APPEND(obj->users, user);
+      }
+    } else {
+      printf("%s: Unexpected field: \"%.*s\"\n", __FUNCTION__,
+        (int)len, (const char*)fieldname);
+      return false;
+    }
+  }
+  
+  return true;
+}
+
+// Our simple response parser entry point. Returns NULL on error.
+my_response_t* my_parse_response(jsont_ctx_t* S) {
+if (jsont_next(S) != JSONT_OBJECT_START) {
+    printf("Expected JSON input to start with an object.\n");
+    return 0;
+  }
+  my_response_t* rsp = MY_NEW(my_response_t);
+  if (!my_response_build(S, rsp)) {
+    free(rsp);
+    return 0;
+  }
+  return rsp;
+}
+
+int main(int argc, const char** argv) {
+  // Create a new reusable tokenizer
+  jsont_ctx_t* S = jsont_create(0);
+
+  // Sample "response" data
+  const char* inbuf = "{"
+    "\"viewer_id\": \"abc123\","
+    "\"timestamp\": 1234567890,"
+    "\"users\":["
+      "{\"name\": \"John Smith\", \"id\": \"12c39a\"},\n"
+      "{\"name\": \"John Doe\",   \"id\": \"01dk2\"},\n"
+      "{\"name\": \"Kate Smith\", \"id\": \"apru1\"},\n"
+      "{\"name\": \"Rebecca Doe\",\"id\": \"aRm26\"}\n"
+    "]"
+  "}";
+
+  // Parse the sample "response" data
+  jsont_reset(S, (const uint8_t*)inbuf, strlen(inbuf));
+  my_response_t* rsp = my_parse_response(S);
+
+  // Epic success?
+  if (rsp) {
+    printf("Built response structure.\n");
+    printf("rsp->users.items[2]->name => \"%s\"\n",
+      ((my_user_t*)rsp->users.items[2])->name );
+
+  } else {
+    printf("Failed to build response structure.\n");
+    if (jsont_error_info(S) != 0) {
+      fprintf(stderr, "Error: %s ('%c' at offset %lu)\n",
+              jsont_error_info(S),
+              (char)jsont_current_byte(S),
+              (unsigned long)jsont_current_offset(S));
+    }
+    // Exit with error. Note: In a real application, you should call
+    // `jsont_destroy` on the reusable tokenizer when done with it. Here we
+    // just exit the program.
+    return 1;
+  }
+
+  // Destroy our reusable tokenizer and exit
+  jsont_destroy(S);
+  return 0;
+}

diff --git a/jsont.c b/jsont.c
new file mode 100644
index 0000000..5863c7a
--- /dev/null
+++ b/jsont.c

@@ -0,0 +1,569 @@
+// JSON Tokenizer. Copyright (c) 2012, Rasmus Andersson. All rights reserved.
+// Use of this source code is governed by a MIT-style license that can be
+// found in the LICENSE file.
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <ctype.h> // isdigit
+#include <errno.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+// Error info
+#ifndef JSONT_ERRINFO_CUSTOM
+#define jsont_err_t const char*
+#define DEF_EM(NAME, msg) static jsont_err_t JSONT_ERRINFO_##NAME = msg
+DEF_EM(STACK_SIZE, "Stack size limit exceeded");
+DEF_EM(UNEXPECTED_OBJECT_END,
+  "Unexpected end of object while not in an object");
+DEF_EM(UNEXPECTED_ARRAY_END, "Unexpected end of array while not in an array");
+DEF_EM(UNEXPECTED_COMMA, "Unexpected \",\"");
+DEF_EM(UNEXPECTED_COLON, "Unexpected \":\"");
+DEF_EM(UNEXPECTED, "Unexpected input");
+DEF_EM(UNEXPECTED_UNICODE_SEQ, "Malformed unicode encoded sequence in string");
+#undef DEF_EM
+#endif
+
+// Size of stack used for structures (in/out array and objects). This value
+// is a balance between memory size of a ctx and how many levels deep the
+// tokenizer can go.
+#define _STRUCT_TYPE_STACK_SIZE 512
+#define _VALUE_BUF_MIN_SIZE 64
+
+static const uint8_t kHexValueTable[55] = {
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // 0-0
+  -1, -1, -1, -1, -1, -1, -1,
+  10, 11, 12, 13, 14, 15, // A-F
+  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1,
+  10, 11, 12, 13, 14, 15 // a-f
+};
+
+typedef uint8_t jsont_tok_t;
+
+typedef struct jsont_ctx {
+  void* user_data;
+  const uint8_t* input_buf;
+  const uint8_t* input_buf_ptr;
+  size_t input_len;
+  const uint8_t* input_buf_value_start;
+  const uint8_t* input_buf_value_end;
+  struct {
+    uint8_t* data;
+    size_t size;
+    size_t length;
+    bool inuse;
+  } value_buf;
+  jsont_err_t error_info;
+  jsont_tok_t curr_tok;
+  size_t st_stack_size;
+  size_t st_stack_len;
+  jsont_tok_t st_stack[_STRUCT_TYPE_STACK_SIZE];
+} jsont_ctx_t;
+
+#define _JSONT_IN_SOURCE
+#include <jsont.h>
+
+unsigned long _hex_str_to_ul(const uint8_t* bytes, size_t len) {
+  unsigned long value = 0;
+  unsigned long cutoff = ULONG_MAX / 16;
+  int cutoff_digit = (int)(ULONG_MAX - cutoff * 16);
+
+  for (size_t i = 0; i != len; ++i) {
+    uint8_t b = bytes[i];
+    int digit = (b > '0'-1 && b < 'f'+1) ? kHexValueTable[b-'0'] : -1;
+    if (b == -1 || // bad digit
+        (value > cutoff) || // overflow
+        ((value == cutoff) && (digit > cutoff_digit)) ) {
+      return ULONG_MAX;
+    } else {
+      value = (value * 16) + digit;
+    }
+  }
+
+  return value;
+}
+
+jsont_ctx_t* jsont_create(void* user_data) {
+  jsont_ctx_t* ctx = (jsont_ctx_t*)calloc(1, sizeof(jsont_ctx_t));
+  ctx->user_data = user_data;
+  ctx->st_stack_size = _STRUCT_TYPE_STACK_SIZE;
+  return ctx;
+}
+
+void jsont_destroy(jsont_ctx_t* ctx) {
+  if (ctx->value_buf.data != 0) {
+    free(ctx->value_buf.data);
+  }
+  free(ctx);
+}
+
+void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) {
+  ctx->input_buf_ptr = ctx->input_buf = bytes;
+  ctx->input_len = length;
+  ctx->st_stack_len = 0;
+  ctx->curr_tok = JSONT_END;
+  ctx->input_buf_value_start = 0;
+  ctx->input_buf_value_end = 0;
+  ctx->value_buf.length = 0;
+  ctx->value_buf.inuse = false;
+  ctx->error_info = 0;
+}
+
+jsont_tok_t jsont_current(const jsont_ctx_t* ctx) {
+  return ctx->curr_tok;
+}
+
+void* jsont_user_data(const jsont_ctx_t* ctx) {
+  return ctx->user_data;
+}
+
+// Get the current/last byte read. Suitable for debugging JSONT_ERR
+uint8_t jsont_current_byte(jsont_ctx_t* ctx) {
+  return (ctx->input_buf_ptr == 0) ? 0 : *(ctx->input_buf_ptr-1);
+}
+
+size_t jsont_current_offset(jsont_ctx_t* ctx) {
+  return ctx->input_buf_ptr - ctx->input_buf;
+}
+
+jsont_err_t jsont_error_info(jsont_ctx_t* ctx) {
+  return ctx->error_info;
+}
+
+inline static bool _no_value(jsont_ctx_t* ctx) {
+  return ctx->input_buf_value_start == 0
+      || ctx->curr_tok < _JSONT_VALUES_START
+      || ctx->curr_tok > _JSONT_VALUES_END;
+}
+
+inline static size_t _input_avail(jsont_ctx_t* ctx) {
+  return ctx->input_len - (ctx->input_buf_ptr - ctx->input_buf);
+}
+
+inline static uint8_t _next_byte(jsont_ctx_t* ctx) {
+  return (_input_avail(ctx) == 0) ? 0 : *(ctx->input_buf_ptr++);
+}
+
+inline static jsont_tok_t _st_stack_top(const jsont_ctx_t* ctx) {
+  return (ctx->st_stack_len != 0) ? ctx->st_stack[ctx->st_stack_len-1]
+                                  : JSONT_END;
+}
+
+size_t jsont_data_value(jsont_ctx_t* ctx, const uint8_t** bytes) {
+  if (_no_value(ctx)) {
+    return 0;
+  } else {
+    if (ctx->value_buf.inuse) {
+      *bytes = ctx->value_buf.data;
+      return ctx->value_buf.length;
+    } else {
+      *bytes = ctx->input_buf_value_start;
+      return ctx->input_buf_value_end - ctx->input_buf_value_start;
+    }
+  }
+}
+
+bool jsont_data_equals(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) {
+  if (ctx->value_buf.inuse) {
+    return (ctx->value_buf.length == length) &&
+      (memcmp((const void*)ctx->value_buf.data,
+        (const void*)bytes, length) == 0);
+  } else {
+    return (ctx->input_buf_value_end - ctx->input_buf_value_start == length) &&
+      (memcmp((const void*)ctx->input_buf_value_start,
+        (const void*)bytes, length) == 0);
+  }
+}
+
+char* jsont_strcpy_value(jsont_ctx_t* ctx) {
+  if (_no_value(ctx)) {
+    return 0;
+  } else {
+    const uint8_t* bytes = 0;
+    size_t len = jsont_data_value(ctx, &bytes);
+    char* buf = (char*)malloc(len+1);
+    if (memcpy((void*)buf, (const void*)bytes, len) != buf) {
+      return 0;
+    }
+    buf[len] = 0;
+    return buf;
+  }
+}
+
+int64_t jsont_int_value(jsont_ctx_t* ctx) {
+  if (_no_value(ctx)) {
+    return INT64_MIN;
+  }
+
+  const uint8_t* start = 0;
+  size_t len = jsont_data_value(ctx, &start);
+  if (len == 0) {
+    return INT64_MIN;
+  }
+  const uint8_t* end = start + len + 1;
+
+  bool negative;
+  uint8_t b = *start++;
+  const int base = 10;
+
+  if (b == '-') {
+    negative = true;
+    b = *start++;
+    if (start == end) {
+      errno = EINVAL;
+      return INT64_MIN;
+    }
+  } else {
+    negative = false;
+    if (b == '+') {
+      b = *start++;
+      if (start == end) {
+        errno = EINVAL;
+        return INT64_MIN;
+      }
+    }
+  }
+
+  uint64_t acc = 0;
+  int any = 0;
+  uint64_t cutoff = negative
+    ? (uint64_t)-(INT64_MIN + INT64_MAX) + INT64_MAX
+    : INT64_MAX;
+  int cutlim = cutoff % base;
+  cutoff /= base;
+  for ( ; start != end; b = *start++) {
+    if (b >= '0' && b <= '9') b -= '0'; else break;
+    if (any < 0 || acc > cutoff || (acc == cutoff && b > cutlim)) {
+      any = -1;
+    } else {
+      any = 1;
+      acc *= base;
+      acc += b;
+    }
+  }
+
+  if (any < 0) {
+    acc = negative ? INT64_MIN : INT64_MAX;
+    errno = ERANGE;
+  } else if (!any) {
+    errno = EINVAL;
+    return INT64_MIN;
+  } else if (negative) {
+    acc = -acc;
+  }
+
+  return (int64_t)acc;
+}
+
+#ifdef NAN
+  #define _JSONT_NAN NAN
+#else
+  #define _JSONT_NAN nan(0)
+#endif
+
+double jsont_float_value(jsont_ctx_t* ctx) {
+  // Note: This might cause a segfault if the input is at the end, so we cause
+  // an error if we try to read a float value while at the end of the input.
+  if (_no_value(ctx) || _input_avail(ctx) == 0) {
+    errno = EINVAL;
+    return _JSONT_NAN;
+  }
+
+  const uint8_t* bytes = 0;
+  size_t len = jsont_data_value(ctx, &bytes);
+  if (len == 0) {
+    return _JSONT_NAN;
+  }
+  return atof((const char*)bytes);
+}
+
+inline static jsont_tok_t _set_tok(jsont_ctx_t* ctx, jsont_tok_t tok) {
+  ctx->curr_tok = tok;
+
+  if (tok != JSONT_END) {
+    if (tok == JSONT_OBJECT_START) {
+      if (ctx->st_stack_len == ctx->st_stack_size) {
+        ctx->error_info = JSONT_ERRINFO_STACK_SIZE;
+        return ctx->curr_tok = JSONT_ERR; // TODO: Grow st_stack
+      }
+       ctx->st_stack[ctx->st_stack_len++] = JSONT_OBJECT_START;
+
+    } else if (tok == JSONT_OBJECT_END) {
+      if (_st_stack_top(ctx) != JSONT_OBJECT_START) {
+        ctx->error_info = JSONT_ERRINFO_UNEXPECTED_OBJECT_END;
+        return ctx->curr_tok = JSONT_ERR;
+      }
+      --ctx->st_stack_len;
+
+    } else if (tok == JSONT_ARRAY_START) {
+      if (ctx->st_stack_len == ctx->st_stack_size) {
+        ctx->error_info = JSONT_ERRINFO_STACK_SIZE;
+        return ctx->curr_tok = JSONT_ERR;
+      }
+       ctx->st_stack[ctx->st_stack_len++] = JSONT_ARRAY_START;
+
+    } else if (tok == JSONT_ARRAY_END) {
+      if (_st_stack_top(ctx) != JSONT_ARRAY_START) {
+        ctx->error_info = JSONT_ERRINFO_UNEXPECTED_ARRAY_END;
+        return ctx->curr_tok = JSONT_ERR;
+      }
+      --ctx->st_stack_len;
+    }
+  }
+
+  return tok;
+}
+inline static void _rewind_one_byte(jsont_ctx_t* ctx) {
+  --ctx->input_buf_ptr;
+}
+inline static void _rewind_bytes(jsont_ctx_t* ctx, size_t n) {
+  ctx->input_buf_ptr -= n;
+}
+inline static void _skip_bytes(jsont_ctx_t* ctx, size_t n) {
+  ctx->input_buf_ptr += n;
+}
+inline static uint8_t _read_atom(jsont_ctx_t* ctx, size_t slacklen,
+                                 jsont_tok_t tok) {
+  if (_input_avail(ctx) < slacklen) {
+    // rewind and wait for buffer fill
+    _rewind_one_byte(ctx);
+    return _set_tok(ctx, JSONT_END);
+  } else {
+    _skip_bytes(ctx, slacklen); // e.g. "ull" after "n" or "alse" after "f"
+    return _set_tok(ctx, tok);
+  }
+}
+inline static bool _expects_field_name(jsont_ctx_t* ctx) {
+  return (   ctx->curr_tok == JSONT_OBJECT_START
+          || (   ctx->curr_tok == _JSONT_COMMA
+              && _st_stack_top(ctx) == JSONT_OBJECT_START) );
+}
+
+static void _value_buf_append(jsont_ctx_t* ctx, const uint8_t* data, size_t len) {
+  //printf("_value_buf_append(<ctx>, %p, %zu)\n", data, len);
+  if (ctx->value_buf.size == 0) {
+    assert(ctx->value_buf.data == 0);
+    ctx->value_buf.length = len;
+    ctx->value_buf.size = len * 2;
+    if (ctx->value_buf.size < _VALUE_BUF_MIN_SIZE) {
+      ctx->value_buf.size = _VALUE_BUF_MIN_SIZE;
+    }
+    ctx->value_buf.data = (uint8_t*)malloc(ctx->value_buf.size);
+    if (len != 0) {
+      memcpy(ctx->value_buf.data, data, len);
+    }
+  } else {
+    if (ctx->value_buf.length + len > ctx->value_buf.size) {
+      size_t new_size = ctx->value_buf.size + (len * 2);
+      ctx->value_buf.data = realloc(ctx->value_buf.data, new_size);
+      assert(ctx->value_buf.data != 0);
+      ctx->value_buf.size = new_size;
+    }
+    memcpy(ctx->value_buf.data + ctx->value_buf.length, data, len);
+    ctx->value_buf.length += len;
+  }
+  ctx->value_buf.inuse = true;
+}
+
+jsont_tok_t jsont_next(jsont_ctx_t* ctx) {
+  //
+  // { } [ ] n t f "
+  //         | | | |
+  //         | | | +- /[^"]*/ "
+  //         | | +- a l s e
+  //         | +- r u e
+  //         +- u l l
+  //
+  while (1) {
+    uint8_t b = _next_byte(ctx);
+    switch (b) {
+      case '{': return _set_tok(ctx, JSONT_OBJECT_START);
+      case '}': return _set_tok(ctx, JSONT_OBJECT_END);
+      case '[': return _set_tok(ctx, JSONT_ARRAY_START);
+      case ']': return _set_tok(ctx, JSONT_ARRAY_END);
+      case 'n': return _read_atom(ctx, 3, JSONT_NULL);
+      case 't': return _read_atom(ctx, 3, JSONT_TRUE);
+      case 'f': return _read_atom(ctx, 4, JSONT_FALSE);
+      case '"': {
+        ctx->input_buf_value_start = ctx->input_buf_ptr;
+        ctx->value_buf.inuse = false;
+        ctx->value_buf.length = 0;
+        uint8_t prev_b = 0;
+        while (1) {
+          b = _next_byte(ctx);
+
+          if (b == '\\') {
+            if (prev_b == '\\') {
+              // This is an actual '\'.
+              assert(ctx->value_buf.inuse == true); // should be buffering
+              _value_buf_append(ctx, ctx->input_buf_ptr-1, 1); // append "\"
+            } else {
+              // Okay, this is an escape prefix. Move to buffering value.
+              if (ctx->value_buf.inuse == 0) {
+                _value_buf_append(ctx,
+                  ctx->input_buf_value_start,
+                  // any data before the "\":
+                  (ctx->input_buf_ptr-1 - ctx->input_buf_value_start) );
+              }
+            }
+          } else {
+            // Any byte except '\'
+
+            if (prev_b == '\\') {
+              // Currently just after an escape character
+              assert(ctx->value_buf.inuse == true); // should be buffering
+
+              // JSON specifies a few "magic" characters that have a different
+              // meaning than their value:
+              switch (b) {
+              case 'b':
+                _value_buf_append(ctx, (const uint8_t*)"\b", 1);
+                break;
+              case 'f':
+                _value_buf_append(ctx, (const uint8_t*)"\f", 1);
+                break;
+              case 'n':
+                _value_buf_append(ctx, (const uint8_t*)"\n", 1);
+                break;
+              case 'r':
+                _value_buf_append(ctx, (const uint8_t*)"\r", 1);
+                break;
+              case 't':
+                _value_buf_append(ctx, (const uint8_t*)"\t", 1);
+                break;
+              case 'u': {
+                // 4 hex digits should follow
+                if (_input_avail(ctx) < 4) {
+                  _rewind_bytes(ctx,
+                    ctx->input_buf_ptr - (ctx->input_buf_value_start-1));
+                  return _set_tok(ctx, JSONT_END);
+                }
+                unsigned long utf16cp = _hex_str_to_ul(ctx->input_buf_ptr, 4);
+                ctx->input_buf_ptr += 4;
+                if (utf16cp == ULONG_MAX) {
+                  ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ;
+                  return _set_tok(ctx, JSONT_ERR);
+                }
+
+                uint32_t cp = (uint16_t)(0xffff & utf16cp);
+
+                // Is lead surrogate?
+                if (cp >= 0xd800u && cp <= 0xdbffu) {
+                  // TODO: Implement pairs by reading another "\uHHHH"
+                  ctx->error_info = JSONT_ERRINFO_UNEXPECTED_UNICODE_SEQ;
+                  return _set_tok(ctx, JSONT_ERR);
+                }
+
+                // Append UTF-8 byte(s) representing the Unicode codepoint `cp`
+                if (cp < 0x80) {
+                  uint8_t cp8 = ((uint8_t)cp);
+                  _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+                } else if (cp < 0x800) {
+                  uint8_t cp8 = (uint8_t)((cp >> 6) | 0xc0);
+                  _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+                  cp8 = (uint8_t)((cp & 0x3f) | 0x80);
+                  _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+                } else {
+                  uint8_t cp8 = (uint8_t)((cp >> 12) | 0xe0);
+                  _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+                  cp8 = (uint8_t)(((cp >> 6) & 0x3f) | 0x80);
+                  _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+                  cp8 = (uint8_t)((cp & 0x3f) | 0x80);
+                  _value_buf_append(ctx, (const uint8_t*)&cp8, 1);
+                }
+
+                break;
+              }
+              default: {
+                _value_buf_append(ctx, &b, 1);
+                break;
+              }
+              } // switch
+
+            } else {
+              // Previous character was NOT an escape character
+
+              if (b == '"') {
+                // Well, this marks the end of a string
+                ctx->input_buf_value_end = ctx->input_buf_ptr-1;
+                return _set_tok(ctx, _expects_field_name(ctx)
+                  ? JSONT_FIELD_NAME : JSONT_STRING);
+                break;
+              } else if (b == 0) {
+                // Input buffer ends in the middle of a string
+                _rewind_bytes(ctx,
+                  ctx->input_buf_ptr - (ctx->input_buf_value_start-1));
+                return _set_tok(ctx, JSONT_END);
+              } else {
+                if (ctx->value_buf.inuse) {
+                  _value_buf_append(ctx, &b, 1);
+                }
+              }
+            }
+          }
+
+          prev_b = b;
+        }
+      }
+      case ',':
+        if (   ctx->curr_tok == JSONT_OBJECT_START
+            || ctx->curr_tok == JSONT_ARRAY_START
+            || ctx->curr_tok == JSONT_END
+            || ctx->curr_tok == JSONT_ERR) {
+          if (ctx->curr_tok != JSONT_ERR)
+            ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COMMA;
+          return _set_tok(ctx, JSONT_ERR);
+        }
+        _set_tok(ctx, _JSONT_COMMA);
+        // read next by simply letting the outer "while" do its thing
+        break;
+
+      case ':':
+        if (ctx->curr_tok != JSONT_FIELD_NAME) {
+          ctx->error_info = JSONT_ERRINFO_UNEXPECTED_COLON;
+          return _set_tok(ctx, JSONT_ERR);
+        }
+        // let the outer "while" do its thing
+        break;
+
+      case ' ': case '\r': case '\n': case '\t':
+        // ignore whitespace and let the outer "while" do its thing
+        break;
+
+      case 0:
+        //printf("** %d\n", __LINE__);
+        return _set_tok(ctx, JSONT_END);
+
+      default:
+        if (isdigit((int)b) || b == '+' || b == '-') {
+          // We are reading a number
+          ctx->input_buf_value_start = ctx->input_buf_ptr-1;
+          //uint8_t prev_b = 0;
+          bool is_float = false;
+          while (1) {
+            b = _next_byte(ctx);
+            if (b == '.') {
+              is_float = true;
+            } else if (!isdigit((int)b)) {
+              _rewind_one_byte(ctx);
+              ctx->input_buf_value_end = ctx->input_buf_ptr;
+              return _set_tok(ctx, is_float ? JSONT_NUMBER_FLOAT
+                                            : JSONT_NUMBER_INT);
+            } else if (b == 0) {
+              // Input buffer ends before we know that the number-value ended
+              _rewind_bytes(ctx, ctx->input_buf_ptr
+                                 - (ctx->input_buf_value_start-1));
+              return _set_tok(ctx, JSONT_END);
+            }
+          }
+        }
+
+        ctx->error_info = JSONT_ERRINFO_UNEXPECTED;
+        return _set_tok(ctx, JSONT_ERR);
+    }
+  } // while (1)
+}
+

diff --git a/jsont.cc b/jsont.cc
new file mode 100644
index 0000000..09b1e45
--- /dev/null
+++ b/jsont.cc

@@ -0,0 +1,561 @@
+#include "jsont.hh"
+
+namespace jsont {
+
+static const int8_t kHexValueTable[55] = {
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // 0-0
+  -1, -1, -1, -1, -1, -1, -1,
+  10, 11, 12, 13, 14, 15, // A-F
+  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1,
+  10, 11, 12, 13, 14, 15 // a-f
+};
+
+static uint64_t _xtou64(const uint8_t* bytes, size_t len) {
+  uint64_t value = 0;
+  uint64_t cutoff = UINT64_MAX / 16;
+  int cutoff_digit = (int)(UINT64_MAX - cutoff * 16);
+
+  for (size_t i = 0; i != len; ++i) {
+    uint8_t b = bytes[i];
+    int8_t digit = (b > '0'-1 && b < 'f'+1) ? kHexValueTable[b-'0'] : -1;
+    if (b == -1 || // bad digit
+        (value > cutoff) || // overflow
+        ((value == cutoff) && (digit > cutoff_digit)) ) {
+      return UINT64_MAX;
+    } else {
+      value = (value * 16) + digit;
+    }
+  }
+
+  return value;
+}
+
+
+#ifdef NAN
+  #define _JSONT_NAN NAN
+#else
+  #define _JSONT_NAN nan(0)
+#endif
+
+
+const char* token_name(jsont::Token tok) {
+  switch (tok) {
+    case End:         return "End";
+    case ObjectStart: return "ObjectStart";
+    case ObjectEnd:   return "ObjectEnd";
+    case ArrayStart:  return "ArrayStart";
+    case ArrayEnd:    return "ArrayEnd";
+    case True:        return "True";
+    case False:       return "False";
+    case Null:        return "Null";
+    case Integer:     return "Integer";
+    case Float:       return "Float";
+    case String:      return "String";
+    case FieldName:   return "FieldName";
+    default:                 return "?";
+  }
+}
+
+
+class TokenizerInternal {
+public:
+  inline static const uint8_t* currentInput(const Tokenizer& self) {
+    return self._input.bytes + self._input.offset;
+  }
+
+  inline static const Token& readAtom(Tokenizer& self, const char* str,
+        size_t len, const Token& token) {
+    if (self.availableInput() < len) {
+      return self.setError(Tokenizer::PrematureEndOfInput);
+    } else if (memcmp(currentInput(self), str, len) != 0) {
+      return self.setError(Tokenizer::InvalidByte);
+    } else {
+      self._input.offset += len;
+      return self.setToken(token);
+    }
+  }
+};
+
+
+Tokenizer::~Tokenizer() {}
+
+
+void Tokenizer::reset(const char* bytes, size_t length, TextEncoding encoding) {
+  assert(encoding == UTF8TextEncoding); // only supported encoding
+  _input.bytes = (const uint8_t*)bytes;
+  _input.length = length;
+  _input.offset = 0;
+  _error.code = UnspecifiedError;
+  // Advance to first token
+  next();
+}
+
+
+const char* Tokenizer::errorMessage() const {
+  switch (_error.code) {
+    case UnexpectedComma:
+      return "Unexpected comma";
+    case UnexpectedTrailingComma:
+      return "Unexpected trailing comma";
+    case InvalidByte:
+      return "Invalid input byte";
+    case PrematureEndOfInput:
+      return "Premature end of input";
+    case MalformedUnicodeEscapeSequence:
+      return "Malformed Unicode escape sequence";
+    case MalformedNumberLiteral:
+      return "Malformed number literal";
+    case UnterminatedString:
+      return "Unterminated string";
+    case SyntaxError:
+      return "Illegal JSON (syntax error)";
+    default:
+      return "Unspecified error";
+  }
+}
+
+
+size_t Tokenizer::dataValue(const char const** bytes) const {
+  if (!hasValue()) { return 0; }
+  if (_value.buffered) {
+    *bytes = (const char const*)_value.buffer.data();
+    return _value.buffer.size();
+  } else {
+    *bytes = (const char const*)(_input.bytes + _value.offset);
+    return _value.length;
+  }
+}
+
+
+double Tokenizer::floatValue() const {
+  if (!hasValue()) {
+    return _token == jsont::True ? 1.0 : 0.0;
+  }
+
+  const char* bytes;
+
+  if (_value.buffered) {
+    // edge-case since only happens with string values using escape sequences
+    bytes = _value.buffer.c_str();
+  } else {
+    bytes = (const char*)_input.bytes + _value.offset;
+    if (availableInput() == 0) {
+      // In this case where the data lies at the edge of the buffer, we can't pass
+      // it directly to atof, since there will be no sentinel byte. We are fine
+      // with a copy, since this is an edge case (only happens either for broken
+      // JSON or when the whole document is just a number).
+      char* buf[128];
+      if (_value.length > 127) {
+        // We are unable to interpret such a large literal in this edge-case
+        return _JSONT_NAN;
+      }
+      memcpy((void*)buf, (const void*)bytes, _value.length);
+      buf[_value.length] = '\0';
+      return strtod((const char*)buf, (char**)0);
+    }
+  }
+
+  return strtod(bytes, (char**)0);
+}
+
+
+int64_t Tokenizer::intValue() const {
+  if (!hasValue()) {
+    return _token == jsont::True ? 1LL : 0LL;
+  }
+
+  const char* bytes;
+
+  if (_value.buffered) {
+    // edge-case since only happens with string values using escape sequences
+    bytes = _value.buffer.c_str();
+  } else {
+    bytes = (const char*)_input.bytes + _value.offset;
+    if (availableInput() == 0) {
+      // In this case where the data lies at the edge of the buffer, we can't pass
+      // it directly to atof, since there will be no sentinel byte. We are fine
+      // with a copy, since this is an edge case (only happens either for broken
+      // JSON or when the whole document is just a number).
+      char* buf[21];
+      if (_value.length > 20) {
+        // We are unable to interpret such a large literal in this edge-case
+        return 0;
+      }
+      memcpy((void*)buf, (const void*)bytes, _value.length);
+      buf[_value.length] = '\0';
+      return strtoll((const char*)buf, (char**)0, 10);
+    }
+  }
+
+  return strtoll(bytes, (char**)0, 10);
+}
+
+
+const Token& Tokenizer::next() {
+  //
+  // { } [ ] n t f "
+  //         | | | |
+  //         | | | +- /[^"]*/ "
+  //         | | +- a l s e
+  //         | +- r u e
+  //         +- u l l
+  //
+  while (!endOfInput()) {
+    uint8_t b = _input.bytes[_input.offset++];
+    switch (b) {
+      case '{': return setToken(ObjectStart);
+      case '}': {
+        if (_token == _Comma) { return setError(UnexpectedTrailingComma); }
+        return setToken(ObjectEnd);
+      }
+
+      case '[': return setToken(ArrayStart);
+      case ']': {
+        if (_token == _Comma) { return setError(UnexpectedTrailingComma); }
+        return setToken(ArrayEnd);
+      }
+
+      case 'n':
+        return TokenizerInternal::readAtom(*this, "ull", 3, jsont::Null);
+      case 't':
+        return TokenizerInternal::readAtom(*this, "rue", 3, jsont::True);
+      case 'f':
+        return TokenizerInternal::readAtom(*this, "alse", 4, jsont::False);
+
+      case ' ': case '\t': case '\r': case '\n': // IETF RFC4627
+        // ignore whitespace and let the outer "while" do its thing
+        break;
+
+      case 0:
+        return setError(InvalidByte);
+
+      // when we read a value, we don't produce a token until we either reach
+      // end of input, a colon (then the value is a field name), a comma, or an
+      // array or object terminator.
+
+      case '"': {
+        _value.beginAtOffset(_input.offset);
+
+        while (!endOfInput()) {
+          b = _input.bytes[_input.offset++];
+          assert(_input.offset < _input.length);
+          
+          switch (b) {
+
+            case '\\': {
+              // We must go buffered since the input segment != value
+              if (!_value.buffered) {
+                _value.buffered = true;
+                _value.buffer.assign(
+                  (const char*)(_input.bytes+_value.offset),
+                  _input.offset - _value.offset - 1
+                );
+              }
+
+              if (endOfInput()) {
+                return setError(PrematureEndOfInput);
+              }
+              
+              b = _input.bytes[_input.offset++];
+              switch (b) {
+                case 'b': _value.buffer.append(1, '\x08'); break;
+                case 'f': _value.buffer.append(1, '\x0C'); break;
+                case 'n': _value.buffer.append(1, '\x0A'); break;
+                case 'r': _value.buffer.append(1, '\x0D'); break;
+                case 't': _value.buffer.append(1, '\x09'); break;
+                case 'u': {
+                  // \uxxxx
+                  if (availableInput() < 4) {
+                    return setError(PrematureEndOfInput);
+                  }
+
+                  uint64_t utf16cp =
+                    _xtou64(TokenizerInternal::currentInput(*this), 4);
+                  _input.offset += 4;
+
+                  if (utf16cp > 0xffff) {
+                    return setError(MalformedUnicodeEscapeSequence);
+                  }
+
+                  uint16_t cp = (uint16_t)(0xffff & utf16cp);
+
+                  // Append UTF-8 byte(s) representing the Unicode codepoint cp
+                  if (cp < 0x80) {
+                    // U+0000 - U+007F
+                    uint8_t cp8 = ((uint8_t)cp);
+                    _value.buffer.append(1, (char)cp8);
+                  } else if (cp < 0x800) {
+                    // U+0080 - U+07FF
+                    uint8_t cp8 = (uint8_t)((cp >> 6) | 0xc0);
+                    _value.buffer.append(1, (char)cp8);
+                    cp8 = (uint8_t)((cp & 0x3f) | 0x80);
+                    _value.buffer.append(1, (char)cp8);
+                  } else if (cp >= 0xD800u && cp <= 0xDFFFu) {
+                    // UTF-16 Surrogate pairs -- according to the UTF-8
+                    // definition (RFC 3629) the high and low surrogate halves
+                    // used by UTF-16 (U+D800 through U+DFFF) are not legal
+                    // Unicode values, and the UTF-8 encoding of them is an
+                    // invalid byte sequence. Instead of throwing an error, we
+                    // substitute this character with the replacement character
+                    // U+FFFD (UTF-8: EF,BF,BD).
+                    _value.buffer.append("\xEF\xBF\xBD");
+                    // 
+                  } else {
+                    // U+0800 - U+FFFF
+                    uint8_t cp8 = (uint8_t)((cp >> 12) | 0xe0);
+                    _value.buffer.append(1, (char)cp8);
+                    cp8 = (uint8_t)(((cp >> 6) & 0x3f) | 0x80);
+                    _value.buffer.append(1, (char)cp8);
+                    cp8 = (uint8_t)((cp & 0x3f) | 0x80);
+                    _value.buffer.append(1, (char)cp8);
+                  }
+
+                  break;
+                }
+                default:
+                  _value.buffer.append(1, (char)b); break;
+              }
+              break;
+            }
+
+            case '"':
+              goto after_initial_read_b;
+
+            case 0:
+              return setError(InvalidByte);
+
+            default: {
+              if (_value.buffered) {
+                // TODO: Make this efficient by appending chunks between
+                // boundaries instead of appending per-byte
+                _value.buffer.append(1, (char)b);
+              }
+              break;
+            }
+          } // switch(b)
+        } // while (!endOfInput())
+
+        after_initial_read_b:
+        if (b != '"') {
+          return setError(UnterminatedString);
+        }
+
+        if (!_value.buffered) {
+          _value.length = _input.offset - _value.offset - 1;
+        }
+
+        // is this a field name?
+        while (!endOfInput()) {
+          b = _input.bytes[_input.offset++];
+          switch (b) {
+            case ' ': case '\t': case '\r': case '\n': break;
+            case ':': return setToken(FieldName);
+            case ',': goto string_read_return_string;
+            case ']': case '}': {
+              --_input.offset; // rewind
+              goto string_read_return_string;
+            }
+            case 0: return setError(InvalidByte);
+            default: {
+              // Expected a comma or a colon
+              return setError(SyntaxError);
+            }
+          }
+        }
+
+        string_read_return_string:
+        return setToken(jsont::String);
+      }
+
+      case ',': {
+        if (_token == ObjectStart || _token == ArrayStart || _token == _Comma) {
+          return setError(UnexpectedComma);
+        }
+        _token = _Comma;
+        break;
+      }
+
+      default: {
+        if (isdigit((int)b) || b == '+' || b == '-') {
+          // We are reading a number
+          _value.beginAtOffset(_input.offset-1);
+          Token token = jsont::Integer;
+
+          while (!endOfInput()) {
+            b = _input.bytes[_input.offset++];
+            switch (b) {
+              case '0'...'9': break;
+              case '.': token = jsont::Float; break;
+              case 'E': case 'e': case '-': case '+': {
+                if (token != jsont::Float) {
+                  return setError(MalformedNumberLiteral);
+                }
+                break;
+              }
+              default: {
+                if ( (_input.offset - _value.offset == 1) &&
+                     (_input.bytes[_value.offset] == '-' || 
+                      _input.bytes[_value.offset] == '+') ) {
+                  return setError(MalformedNumberLiteral);
+                }
+
+                // rewind the byte that terminated this number literal
+                --_input.offset;
+
+                _value.length = _input.offset - _value.offset - 1;
+                return setToken(token);
+              }
+            }
+          }
+          return setToken(End);
+        } else {
+          return setError(InvalidByte);
+        }
+      }
+    }
+  }
+
+  return setToken(End);
+}
+
+
+enum {
+  kUTF8ByteVerbatim = 0,
+  kUTF8ByteEncode1, // "\u000x"
+  kUTF8ByteEncode2, // "\u00xx"
+};
+#define V kUTF8ByteVerbatim
+#define E1 kUTF8ByteEncode1
+#define E2 kUTF8ByteEncode2
+static const uint8_t kUTF8ByteTable[256] = {
+  E1, E1, E1, E1, E1, E1, E1, E1, 'b', 't', 'n', E1, 'f', 'r', E1, E1, E2, E2,
+  E2, E2, E2, E2, E2, E2, E2, E2, E2, E2, E2, E2, E2, E2, V, V, '"', V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, '\\', V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V, V, E2, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V,
+  V, V, V, V, V, V, V, V, V, V
+};
+#undef V
+#undef E1
+#undef E2
+
+// #ifndef __has_feature
+//   #define __has_feature(x) 0
+// #endif
+// #if defined(__cplusplus) && __has_feature(cxx_static_assert)
+//   #define JSONT_CONST_ASSERT(expr, error_msg) static_assert((expr), (error_msg))
+// #elif __has_feature(c_static_assert)
+//   #define JSONT_CONST_ASSERT(expr, error_msg) _Static_assert((expr), (error_msg))
+// #else
+//   #define JSONT_CONST_ASSERT(expr, error_msg) ((void)0)
+// #endif
+
+Builder& Builder::appendString(const uint8_t* v, size_t length, TextEncoding encoding) {
+  reserve(length + 2);
+  _buf[_size++] = '"';
+
+  assert(encoding == UTF8TextEncoding /* Currently only UTF-8 is supported */);
+
+  const uint8_t* end = v+length;
+  while (v != end) {
+    uint8_t s = kUTF8ByteTable[*v];
+    switch (s) {
+      case kUTF8ByteVerbatim:
+        _buf[_size++] = *v;
+        break;
+      case kUTF8ByteEncode1: {
+        assert(*v < 16);
+        size_t remainingSize = end-v+1+5; // five additional bytes needed
+        reserve(remainingSize);
+        _buf[_size] = '\\';
+        _buf[++_size] = 'u';
+        _buf[++_size] = '0';
+        _buf[++_size] = '0';
+        _buf[++_size] = '0';
+        _buf[++_size] = *v + (*v > 10 ? 55 : 48); // A-F : 0-9
+        ++_size;
+        assert(_size <= _capacity);
+        break;
+      }
+      case kUTF8ByteEncode2: {
+        // Note: *v is guaranteed to be within the set [16,32),127. This is
+        // an affect of the kUTF8ByteTable lookup table and this code needs to
+        // be revised if the lookup table adds or removes any kUTF8ByteEncode.
+        assert((*v > 15 && *v < 32) || *v == 127);
+        size_t remainingSize = end-v+1+5; // five additional bytes needed
+        reserve(remainingSize);
+        _buf[_size] = '\\';
+        _buf[++_size] = 'u';
+        _buf[++_size] = '0';
+        _buf[++_size] = '0';
+        uint8_t b1 = (*v & 0xf0) / 16;
+        //uint8_t b1 = (*v & 0xf0) >> 4; // slightly faster but LE-specific
+        uint8_t b2 = *v & 0x0f;
+        _buf[++_size] = b1 + (b1 > 10 ? 55 : 48); // A-F : 0-9
+        _buf[++_size] = b2 + (b2 > 10 ? 55 : 48); // A-F : 0-9
+        ++_size;
+        assert(_size <= _capacity);
+        break;
+      }
+      default:
+        // reverse solidus escape
+        size_t remainingSize = end-v+1+1; // one additional byte needed
+        reserve(remainingSize);
+        _buf[_size++] = '\\';
+        _buf[_size++] = s;
+        assert(_size <= _capacity);
+        break;
+    }
+
+    ++v;
+  }
+
+  _buf[_size++] = '"';
+  assert(_size <= _capacity);
+  return *this;
+}
+
+#if JSONT_CXX_RVALUE_REFS
+  // Move constructor and assignment operator
+  Builder::Builder(Builder&& other)
+      : _buf(other._buf)
+      , _capacity(other._capacity)
+      , _size(other._size)
+      , _state(other._state) {
+    other._buf = 0;
+  }
+
+  Builder& Builder::operator=(Builder&& other) {
+    _buf = other._buf; other._buf = 0;
+    _capacity = other._capacity;
+    _size = other._size;
+    _state = other._state;
+    return *this;
+  }
+#endif
+
+Builder::Builder(const Builder& other)
+    : _buf(0)
+    , _capacity(other._capacity)
+    , _size(other._size)
+    , _state(other._state) {
+  _buf = (char*)malloc(_capacity);
+  memcpy((void*)_buf, (const void*)other._buf, _size);
+}
+
+Builder& Builder::operator=(const Builder& other) {
+  _capacity = other._capacity;
+  _size = other._size;
+  _state = other._state;
+  _buf = (char*)malloc(_capacity);
+  memcpy((void*)_buf, (const void*)other._buf, _size);
+  return *this;
+}
+
+} // namespace jsont

diff --git a/jsont.h b/jsont.h
new file mode 100644
index 0000000..22cd043
--- /dev/null
+++ b/jsont.h

@@ -0,0 +1,114 @@
+// JSON Tokenizer. Copyright (c) 2012, Rasmus Andersson. All rights reserved.
+// Use of this source code is governed by a MIT-style license that can be
+// found in the LICENSE file.
+#ifndef JSONT_INCLUDED
+#define JSONT_INCLUDED
+
+#include <stdint.h>  // uint8_t, int64_t
+#include <stdlib.h>  // size_t
+#include <string.h>  // strlen
+#include <stdbool.h> // bool
+
+#ifndef _JSONT_IN_SOURCE
+typedef struct jsont_ctx jsont_ctx_t;
+typedef uint8_t jsont_tok_t;
+#endif
+
+#ifndef JSONT_ERRINFO_CUSTOM
+#define jsont_err_t const char*
+#endif
+
+// Token types
+enum {
+  JSONT_END = 0,        // Input ended
+  JSONT_ERR,            // Error
+
+  JSONT_OBJECT_START,   // {
+  JSONT_OBJECT_END,     // }
+  
+  JSONT_ARRAY_START,    // [
+  JSONT_ARRAY_END,      // ]
+  
+  JSONT_TRUE,           // true
+  JSONT_FALSE,          // false
+  JSONT_NULL,           // null
+
+  _JSONT_VALUES_START,
+  JSONT_NUMBER_INT,     // number value without a fraction part
+  JSONT_NUMBER_FLOAT,   // number value with a fraction part
+  JSONT_STRING,         // string value
+  JSONT_FIELD_NAME,     // field name
+  _JSONT_VALUES_END,
+
+  _JSONT_COMMA,
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Create a new JSON tokenizer context. `user_data` can be anything and is
+// accessible through `jsont_user_data`.
+jsont_ctx_t* jsont_create(void* user_data);
+
+// Destroy a JSON tokenizer context. This will free any internal data, except
+// from the input buffer.
+void jsont_destroy(jsont_ctx_t* ctx);
+
+// Reset the tokenizer to parse the data pointed to by `bytes`. The tokenizer
+// does NOT take ownership of `bytes`. This function can be used to recycle a
+// tokenizer context, minimizing memory reallocation.
+void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length);
+
+// Read and return the next token. See `jsont_tok_t` enum for a list of
+// possible return values and their meaning.
+jsont_tok_t jsont_next(jsont_ctx_t* ctx);
+
+// Returns the current token (last token read by `jsont_next`).
+jsont_tok_t jsont_current(const jsont_ctx_t* ctx);
+
+// Returns a slice of the input which represents the current value, or nothing
+// (returns 0) if the current token has no value (e.g. start of an object).
+size_t jsont_data_value(jsont_ctx_t* ctx, const uint8_t** bytes);
+
+// Returns true if the current data value is equal to `bytes` of `length`
+bool jsont_data_equals(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length);
+
+// Returns true if the current data value is equal to c string `str`
+static inline bool jsont_str_equals(jsont_ctx_t* ctx, const char* str) {
+  return jsont_data_equals(ctx, (const uint8_t*)str, strlen(str));
+}
+
+// Retrieve a newly allocated c-string. Similar to `jsont_data_value` but
+// returns a newly allocated copy of the current value as a C string
+// (terminated by a null byte). The calling code is responsible for calling
+// `free()` on the returned value.
+char* jsont_strcpy_value(jsont_ctx_t* ctx);
+
+// Returns the current integer value.If the number is too large or too small,
+// this function sets errno and returns INT64_MAX or INT64_MIN.
+int64_t jsont_int_value(jsont_ctx_t* ctx);
+
+// Returns the current floating-point number value. Sets errno and returns a
+// value that isnan(N)==true on error.
+double jsont_float_value(jsont_ctx_t* ctx);
+
+// Get the last byte read. Suitable for debugging JSONT_ERR.
+uint8_t jsont_current_byte(jsont_ctx_t* ctx);
+
+// Get the current offset of the last byte read.
+size_t jsont_current_offset(jsont_ctx_t* ctx);
+
+// Get information on the last error (by default a printable text message).
+// Returns NULL if no error has occured since a call to `jsont_reset`.
+jsont_err_t jsont_error_info(jsont_ctx_t* ctx);
+
+// Returns the value passed to `jsont_create`.
+void* jsont_user_data(const jsont_ctx_t* ctx);
+
+// ----------------- C++ -----------------
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // JSONT_INCLUDED

diff --git a/jsont.hh b/jsont.hh
new file mode 100644
index 0000000..3e86cad
--- /dev/null
+++ b/jsont.hh

@@ -0,0 +1,420 @@
+// JSON Tokenizer and builder. Copyright (c) 2012, Rasmus Andersson. All rights
+// reserved. Use of this source code is governed by a MIT-style license that can
+// be found in the LICENSE file.
+#ifndef JSONT_CXX_INCLUDED
+#define JSONT_CXX_INCLUDED
+
+#include <stdint.h>  // uint8_t, int64_t
+#include <stdlib.h>  // size_t
+#include <string.h>  // strlen
+#include <stdbool.h> // bool
+#include <math.h>
+#include <assert.h>
+#include <string>
+#include <stdexcept>
+
+// Can haz rvalue references with move semantics?
+#if (defined(_MSC_VER) && _MSC_VER >= 1600) || \
+    (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__) || \
+    (defined(__has_feature) && __has_feature(cxx_rvalue_references))
+  #define JSONT_CXX_RVALUE_REFS 1
+#else
+  #define JSONT_CXX_RVALUE_REFS 0
+#endif
+
+namespace jsont {
+
+// Tokens
+typedef enum {
+  End = 0,       // Input ended
+  ObjectStart,   // {
+  ObjectEnd,     // }
+  ArrayStart,    // [
+  ArrayEnd,      // ]
+  True,          // true
+  False,         // false
+  Null,          // null
+  Integer,       // number value without a fraction part
+  Float,         // number value with a fraction part
+  String,        // string value
+  FieldName,     // field name
+  Error,         // An error occured (see `error()` for details)
+  _Comma,
+} Token;
+
+// String encoding
+typedef enum {
+  UTF8TextEncoding = 0,
+} TextEncoding;
+
+// Name of `token`
+const char* token_name(jsont::Token token);
+
+class TokenizerInternal;
+
+// Reads a sequence of bytes and produces tokens and values while doing so
+class Tokenizer {
+public:
+  Tokenizer(const char* bytes, size_t length, TextEncoding encoding);
+  ~Tokenizer();
+
+  // Read next token
+  const Token& next();
+
+  // Access current token
+  const Token& current() const;
+
+  // Reset the tokenizer, making it possible to reuse this parser so to avoid
+  // unnecessary memory allocation and deallocation.
+  void reset(const char* bytes, size_t length, TextEncoding encoding);
+
+  // True if the current token has a value
+  bool hasValue() const;
+
+  // Returns a slice of the input which represents the current value, or nothing
+  // (returns 0) if the current token has no value (e.g. start of an object).
+  size_t dataValue(const char const** bytes) const;
+
+  // Returns a *copy* of the current string value.
+  std::string stringValue() const;
+
+  // Returns the current value as a double-precision floating-point number.
+  double floatValue() const;
+
+  // Returns the current value as a signed 64-bit integer.
+  int64_t intValue() const;
+
+  // Returns the current value as a boolean
+  bool boolValue() const;
+
+  // Error codes
+  typedef enum {
+    UnspecifiedError = 0,
+    UnexpectedComma,
+    UnexpectedTrailingComma,
+    InvalidByte,
+    PrematureEndOfInput,
+    MalformedUnicodeEscapeSequence,
+    MalformedNumberLiteral,
+    UnterminatedString,
+    SyntaxError,
+  } ErrorCode;
+
+  // Returns the error code of the last error
+  ErrorCode error() const;
+
+  // Returns a human-readable message for the last error. Never returns NULL.
+  const char* errorMessage() const;
+
+  // The byte offset into input where the tokenizer is currently looking. In the
+  // event of an error, this will point to the source of the error.
+  size_t inputOffset() const;
+
+  // Total number of input bytes
+  size_t inputSize() const;
+
+  // A pointer to the input data as passed to `reset` or the constructor.
+  const char* inputBytes() const;
+
+  friend class TokenizerInternal;
+private:
+  size_t availableInput() const;
+  size_t endOfInput() const;
+  const Token& setToken(Token t);
+  const Token& setError(ErrorCode error);
+
+  struct {
+    const uint8_t* bytes;
+    size_t length;
+    size_t offset;
+  } _input;
+  struct Value {
+    Value() : offset(0), length(0), buffered(false) {}
+    void beginAtOffset(size_t z);
+    size_t offset; // into _input.bytes
+    size_t length;
+    std::string buffer;
+    bool buffered; // if true, contents lives in buffer
+  } _value;
+  Token _token;
+  struct {
+    ErrorCode code;
+  } _error;
+};
+
+
+// Helps in building JSON, providing a final sequential byte buffer
+class Builder {
+public:
+  Builder() : _buf(0), _capacity(0), _size(0), _state(NeutralState) {}
+  ~Builder() { if (_buf) { free(_buf); _buf = 0; } }
+  Builder(const Builder& other);
+  Builder& operator=(const Builder& other);
+#if JSONT_CXX_RVALUE_REFS
+  Builder(Builder&& other);
+  Builder& operator=(Builder&& other);
+#endif
+
+  Builder& startObject();
+  Builder& endObject();
+  Builder& startArray();
+  Builder& endArray();
+  Builder& fieldName(const char* v, size_t length, TextEncoding e=UTF8TextEncoding);
+  Builder& fieldName(const std::string& name, TextEncoding enc=UTF8TextEncoding);
+  Builder& value(const char* v, size_t length, TextEncoding e=UTF8TextEncoding);
+  Builder& value(const char* v);
+  Builder& value(const std::string& v);
+  Builder& value(double v);
+  Builder& value(int64_t v);
+  Builder& value(int v);
+  Builder& value(unsigned int v);
+  Builder& value(long v);
+  Builder& value(bool v);
+  Builder& nullValue();
+
+  size_t size() const;
+  const char* bytes() const;
+  std::string toString() const;
+  const char* seizeBytes(size_t& size_out);
+  const void reset();
+
+private:
+  size_t available() const;
+  void reserve(size_t size);
+  void prefix();
+  Builder& appendString(const uint8_t* v, size_t length, TextEncoding enc);
+  Builder& appendChar(char byte);
+
+  char*  _buf;
+  size_t _capacity;
+  size_t _size;
+  enum {
+    NeutralState = 0,
+    AfterFieldName,
+    AfterValue,
+    AfterObjectStart,
+    AfterArrayStart,
+  } _state;
+};
+
+
+// Convenience function
+inline Builder build() { return Builder(); }
+
+
+// ------------------- internal ---------------------
+
+inline Tokenizer::Tokenizer(const char* bytes, size_t length,
+    TextEncoding encoding) : _token(End) {
+  reset(bytes, length, encoding);
+}
+
+inline const Token& Tokenizer::current() const { return _token; }
+
+inline bool Tokenizer::hasValue() const {
+  return _token >= Integer && _token <= FieldName;
+}
+
+inline std::string Tokenizer::stringValue() const {
+  const char* bytes;
+  size_t size = dataValue(&bytes);
+  return std::string(bytes, size);
+}
+
+inline bool Tokenizer::boolValue() const {
+  return _token == True;
+}
+
+inline size_t Tokenizer::availableInput() const {
+  return _input.length - _input.offset;
+}
+inline size_t Tokenizer::endOfInput() const {
+  return _input.offset == _input.length;
+}
+inline const Token& Tokenizer::setToken(Token t) {
+  return _token = t;
+}
+inline const Token& Tokenizer::setError(Tokenizer::ErrorCode error) {
+  _error.code = error;
+  return _token = Error;
+}
+inline size_t Tokenizer::inputOffset() const {
+  return _input.offset;
+}
+inline size_t Tokenizer::inputSize() const {
+  return _input.length;
+}
+inline const char* Tokenizer::inputBytes() const {
+  return (const char*)_input.bytes;
+}
+
+inline void Tokenizer::Value::beginAtOffset(size_t z) {
+  offset = z;
+  length = 0;
+  buffered = false;
+}
+
+inline Tokenizer::ErrorCode Tokenizer::error() const {
+  return _error.code;
+}
+
+
+inline Builder& Builder::startObject() {
+  prefix();
+  _state = AfterObjectStart;
+  return appendChar('{');
+}
+
+inline Builder& Builder::endObject() {
+  _state = AfterValue;
+  return appendChar('}');
+}
+
+inline Builder& Builder::startArray() {
+  prefix();
+  _state = AfterArrayStart;
+  return appendChar('[');
+}
+
+inline Builder& Builder::endArray() {
+  _state = AfterValue;
+  return appendChar(']');
+}
+
+inline Builder& Builder::fieldName(const std::string& name, TextEncoding enc) {
+  return fieldName(name.data(), name.size(), enc);
+}
+
+inline Builder& Builder::fieldName(const char* v, size_t length,
+    TextEncoding enc) {
+  prefix();
+  _state = AfterFieldName;
+  return appendString((const uint8_t*)v, length, enc);
+}
+
+inline Builder& Builder::value(const char* v, size_t length, TextEncoding enc) {
+  prefix();
+  _state = AfterValue;
+  return appendString((const uint8_t*)v, length, enc);
+}
+
+inline Builder& Builder::value(const char* v) {
+  return value(v, strlen(v));
+}
+
+inline Builder& Builder::value(const std::string& v) {
+  return value(v.data(), v.size());
+}
+
+inline Builder& Builder::value(double v) {
+  prefix();
+  reserve(256);
+  int z = snprintf(_buf+_size, 256, "%g", v);
+  assert(z < 256);
+  _size += z;
+  _state = AfterValue;
+  return *this;
+}
+
+inline Builder& Builder::value(int64_t v) {
+  prefix();
+  reserve(21);
+  int z = snprintf(_buf+_size, 21, "%lld", v);
+  assert(z < 21);
+  _size += z;
+  _state = AfterValue;
+  return *this;
+}
+
+inline Builder& Builder::value(int v) { return value((int64_t)v); }
+inline Builder& Builder::value(unsigned int v) { return value((int64_t)v); }
+inline Builder& Builder::value(long v) { return value((int64_t)v); }
+
+inline Builder& Builder::value(bool v) {
+  prefix();
+  if (v) {
+    reserve(4);
+    _buf[_size]   = 't';
+    _buf[++_size] = 'r';
+    _buf[++_size] = 'u';
+    _buf[++_size] = 'e';
+    ++_size;
+  } else {
+    reserve(5);
+    _buf[_size]   = 'f';
+    _buf[++_size] = 'a';
+    _buf[++_size] = 'l';
+    _buf[++_size] = 's';
+    _buf[++_size] = 'e';
+    ++_size;
+  }
+  _state = AfterValue;
+  return *this;
+}
+
+inline Builder& Builder::nullValue() {
+  prefix();
+  reserve(4);
+  _buf[_size]   = 'n';
+  _buf[++_size] = 'u';
+  _buf[++_size] = 'l';
+  _buf[++_size] = 'l';
+  ++_size;
+  _state = AfterValue;
+  return *this;
+}
+
+inline size_t Builder::size() const { return _size; }
+inline const char* Builder::bytes() const { return _buf; }
+inline std::string Builder::toString() const {
+  return std::string(bytes(), size());
+}
+inline const char* Builder::seizeBytes(size_t& size_out) {
+  const char* buf = _buf;
+  size_out = _size;
+  _buf = 0;
+  _capacity = 0;
+  reset();
+  return buf;
+}
+inline const void Builder::reset() {
+  _size = 0;
+  _state = NeutralState;
+}
+
+inline size_t Builder::available() const {
+  return _capacity - _size;
+}
+
+inline void Builder::reserve(size_t size) {
+  if (available() < size) {
+    #if 0
+    // exact allocation for debugging purposes
+    printf("DEBUG Builder::reserve: size=%zu available=%zu grow_by=%zu\n",
+      size, available(), (size - available()) );
+    _capacity += size - available();
+    #else
+    _capacity += size - available();
+    _capacity = (_capacity < 64) ? 64 : (_capacity * 1.5);
+    #endif
+    _buf = (char*)realloc((void*)_buf, _capacity);
+  }
+}
+
+inline void Builder::prefix() {
+  if (_state == AfterFieldName) {
+    appendChar(':');
+  } else if (_state == AfterValue) {
+    appendChar(',');
+  }
+}
+
+inline Builder& Builder::appendChar(char byte) {
+  reserve(1);
+  _buf[_size++] = byte;
+  return *this;
+}
+
+}
+
+#endif // JSONT_CXX_INCLUDED

diff --git a/test/test_tokenizer.c b/test/test_tokenizer.c
new file mode 100644
index 0000000..f994c85
--- /dev/null
+++ b/test/test_tokenizer.c

@@ -0,0 +1,180 @@
+#include <jsont.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#define JSONT_ASSERT_FIELD_NAME(fieldName) do { \
+  assert(jsont_current(S) == JSONT_FIELD_NAME); \
+  assert(jsont_data_equals(S, (const uint8_t*)fieldName, \
+    strlen(fieldName)) == true); \
+} while(0)
+
+int main(int argc, const char** argv) {
+  // Create a new reusable tokenizer
+  jsont_ctx_t* S = jsont_create(0);
+
+  const char* inbuf = "{ "
+    "\"\\\"fo\\\"o\": \"Foo\","  // "\"fo\"o": "Foo"
+    "\"1\" :  \"\\u2192\","
+    "\"n\":1234,"
+    "\"x\"  :  \t 12.34,"
+    "\"overflow\"  :  \t 9999999999999999999999999999999999,"
+    "\"b\\/a\\/r\":["
+      "null,"
+      "true,"
+      "false,"
+      "{"
+        "\"x\":12.3"
+      "},"
+      "\n123,"
+      "\"456\","
+      "\"a\\\"b\\\"\","
+      "\"a\\u0000b\","
+      "\"a\\bb\","
+      "\"a\\fb\","
+      "\"a\\nb\","
+      "\"a\\rb\","
+      "\"a\\tb\","
+      "\"\","
+      "\"   \""
+    "]"
+  "}";
+
+  jsont_reset(S, (const uint8_t*)inbuf, strlen(inbuf));
+  jsont_tok_t tok;
+
+  tok = jsont_next(S);
+  assert(tok == JSONT_OBJECT_START);
+  assert(jsont_current(S) == JSONT_OBJECT_START);
+
+  tok = jsont_next(S);
+  assert(tok == JSONT_FIELD_NAME);
+
+  // Expect current data to be the bytes '"fo"o'
+  const char* expectedData = "\"fo\"o";
+  const uint8_t* bytes;
+  size_t size = jsont_data_value(S, &bytes);
+  size_t expectedSize = strlen(expectedData);
+  // printf("expectedData: '%s'\n", expectedData);
+  // printf("currentData:  '%.*s'\n", (int)size, (const char*)bytes);
+  assert(size == expectedSize);
+  int d = memcmp((const void*)expectedData, bytes, size);
+  assert(d == 0);
+
+  // Expect a string value "Foo"
+  tok = jsont_next(S);
+  assert(tok == JSONT_STRING);
+  char* str = jsont_strcpy_value(S);
+  assert(str != 0);
+  assert(strcmp(str, "Foo") == 0);
+  free(str); str = 0;
+
+  // Expect field name "1". Also tests the integrity of jsont_data_equals
+  tok = jsont_next(S);
+  assert(jsont_data_equals(S, (const uint8_t*)"1", 1) == true);
+  assert(jsont_str_equals(S, "1") == true);
+  size = jsont_data_value(S, &bytes);
+  assert(size == 1);
+  assert(memcmp((const void*)"1", (const void*)bytes, 1) == 0);
+
+  // Expect the string '\u2192' (RIGHTWARDS ARROW, UTF8: E2,86,92)
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "\xe2\x86\x92") == true);
+  
+  // Expect a field name 'n'
+  jsont_next(S);
+  JSONT_ASSERT_FIELD_NAME("n");
+
+  // Expect a number value '1234'
+  assert(jsont_next(S) == JSONT_NUMBER_INT);
+  //printf("int: %lld (str: '%s')\n", jsont_int_value(S), jsont_strcpy_value(S));
+  assert(jsont_int_value(S) == 1234LL);
+  assert(jsont_float_value(S) == 1234.0);
+
+  // Expect a field name 'x'
+  jsont_next(S);
+  JSONT_ASSERT_FIELD_NAME("x");
+
+  // Expect a number value '12.34'
+  assert(jsont_next(S) == JSONT_NUMBER_FLOAT);
+  assert(jsont_float_value(S) == 12.34);
+  assert(jsont_int_value(S) == 12LL); // partial expected
+
+  jsont_next(S);
+  JSONT_ASSERT_FIELD_NAME("overflow");
+
+  // Expect a cut-off integer value of INT64_MAX
+  assert(jsont_next(S) == JSONT_NUMBER_INT);
+  assert(jsont_int_value(S) == INT64_MAX);
+
+  // Expect a valid floating point value (although it will have less-than
+  // perfect precision)
+  assert(!isnan(jsont_float_value(S)));
+
+  // Expect a field name 'bar'
+  jsont_next(S);
+  JSONT_ASSERT_FIELD_NAME("b/a/r");
+
+  // Expect start of array
+  assert(jsont_next(S) == JSONT_ARRAY_START);
+
+  // Expect null, true and false
+  assert(jsont_next(S) == JSONT_NULL);
+  assert(jsont_next(S) == JSONT_TRUE);
+  assert(jsont_next(S) == JSONT_FALSE);
+
+  // { "x": 12.3 }
+  assert(jsont_next(S) == JSONT_OBJECT_START);
+  jsont_next(S);
+  JSONT_ASSERT_FIELD_NAME("x");
+  assert(jsont_next(S) == JSONT_NUMBER_FLOAT);
+  assert(jsont_float_value(S) == 12.3);
+  assert(jsont_next(S) == JSONT_OBJECT_END);
+
+  // 123, "456", "a\"b\""
+  assert(jsont_next(S) == JSONT_NUMBER_INT);
+  assert(jsont_int_value(S) == 123);
+
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "456") == true);
+
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "a\"b\"") == true);
+
+  // "a\u0000b"
+  assert(jsont_next(S) == JSONT_STRING);
+  const uint8_t b3[] = {'a',0,'b'};
+  assert(jsont_data_equals(S, b3, sizeof(b3)) == true);
+
+  // "a\{b,f,n,r,t}b"
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "a\bb") == true);
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "a\fb") == true);
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "a\nb") == true);
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "a\rb") == true);
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "a\tb") == true);
+
+  // ""
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "") == true);
+  assert(jsont_str_equals(S, "   ") == false);
+
+  // "   "
+  assert(jsont_next(S) == JSONT_STRING);
+  assert(jsont_str_equals(S, "   ") == true);
+  assert(jsont_str_equals(S, "") == false);
+
+  // ] }
+  assert(jsont_next(S) == JSONT_ARRAY_END);
+  assert(jsont_next(S) == JSONT_OBJECT_END);
+
+
+  jsont_destroy(S);
+  printf("PASS\n");
+  return 0;
+}
commit	f417eaf93c086c0695adeb0e9cacd44e7e537b6a	[log] [tgz]
author	Austin Schuh <austin.linux@gmail.com>	Mon Sep 16 21:58:36 2019 -0700
committer	Austin Schuh <austin.linux@gmail.com>	Mon Sep 16 21:58:36 2019 -0700
tree	1b9c3b952bf1501aacc99fbfd40e7a2c730c1b2d