blob: 3e86cad828e75ff7adc48174e8318820c7f09c47 [file] [log] [blame]
// JSON Tokenizer and builder. Copyright (c) 2012, Rasmus Andersson. All rights
// reserved. Use of this source code is governed by a MIT-style license that can
// be found in the LICENSE file.
#ifndef JSONT_CXX_INCLUDED
#define JSONT_CXX_INCLUDED
#include <stdint.h> // uint8_t, int64_t
#include <stdlib.h> // size_t
#include <string.h> // strlen
#include <stdbool.h> // bool
#include <math.h>
#include <assert.h>
#include <string>
#include <stdexcept>
// Can haz rvalue references with move semantics?
#if (defined(_MSC_VER) && _MSC_VER >= 1600) || \
(defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__) || \
(defined(__has_feature) && __has_feature(cxx_rvalue_references))
#define JSONT_CXX_RVALUE_REFS 1
#else
#define JSONT_CXX_RVALUE_REFS 0
#endif
namespace jsont {
// Tokens
typedef enum {
End = 0, // Input ended
ObjectStart, // {
ObjectEnd, // }
ArrayStart, // [
ArrayEnd, // ]
True, // true
False, // false
Null, // null
Integer, // number value without a fraction part
Float, // number value with a fraction part
String, // string value
FieldName, // field name
Error, // An error occured (see `error()` for details)
_Comma,
} Token;
// String encoding
typedef enum {
UTF8TextEncoding = 0,
} TextEncoding;
// Name of `token`
const char* token_name(jsont::Token token);
class TokenizerInternal;
// Reads a sequence of bytes and produces tokens and values while doing so
class Tokenizer {
public:
Tokenizer(const char* bytes, size_t length, TextEncoding encoding);
~Tokenizer();
// Read next token
const Token& next();
// Access current token
const Token& current() const;
// Reset the tokenizer, making it possible to reuse this parser so to avoid
// unnecessary memory allocation and deallocation.
void reset(const char* bytes, size_t length, TextEncoding encoding);
// True if the current token has a value
bool hasValue() const;
// Returns a slice of the input which represents the current value, or nothing
// (returns 0) if the current token has no value (e.g. start of an object).
size_t dataValue(const char const** bytes) const;
// Returns a *copy* of the current string value.
std::string stringValue() const;
// Returns the current value as a double-precision floating-point number.
double floatValue() const;
// Returns the current value as a signed 64-bit integer.
int64_t intValue() const;
// Returns the current value as a boolean
bool boolValue() const;
// Error codes
typedef enum {
UnspecifiedError = 0,
UnexpectedComma,
UnexpectedTrailingComma,
InvalidByte,
PrematureEndOfInput,
MalformedUnicodeEscapeSequence,
MalformedNumberLiteral,
UnterminatedString,
SyntaxError,
} ErrorCode;
// Returns the error code of the last error
ErrorCode error() const;
// Returns a human-readable message for the last error. Never returns NULL.
const char* errorMessage() const;
// The byte offset into input where the tokenizer is currently looking. In the
// event of an error, this will point to the source of the error.
size_t inputOffset() const;
// Total number of input bytes
size_t inputSize() const;
// A pointer to the input data as passed to `reset` or the constructor.
const char* inputBytes() const;
friend class TokenizerInternal;
private:
size_t availableInput() const;
size_t endOfInput() const;
const Token& setToken(Token t);
const Token& setError(ErrorCode error);
struct {
const uint8_t* bytes;
size_t length;
size_t offset;
} _input;
struct Value {
Value() : offset(0), length(0), buffered(false) {}
void beginAtOffset(size_t z);
size_t offset; // into _input.bytes
size_t length;
std::string buffer;
bool buffered; // if true, contents lives in buffer
} _value;
Token _token;
struct {
ErrorCode code;
} _error;
};
// Helps in building JSON, providing a final sequential byte buffer
class Builder {
public:
Builder() : _buf(0), _capacity(0), _size(0), _state(NeutralState) {}
~Builder() { if (_buf) { free(_buf); _buf = 0; } }
Builder(const Builder& other);
Builder& operator=(const Builder& other);
#if JSONT_CXX_RVALUE_REFS
Builder(Builder&& other);
Builder& operator=(Builder&& other);
#endif
Builder& startObject();
Builder& endObject();
Builder& startArray();
Builder& endArray();
Builder& fieldName(const char* v, size_t length, TextEncoding e=UTF8TextEncoding);
Builder& fieldName(const std::string& name, TextEncoding enc=UTF8TextEncoding);
Builder& value(const char* v, size_t length, TextEncoding e=UTF8TextEncoding);
Builder& value(const char* v);
Builder& value(const std::string& v);
Builder& value(double v);
Builder& value(int64_t v);
Builder& value(int v);
Builder& value(unsigned int v);
Builder& value(long v);
Builder& value(bool v);
Builder& nullValue();
size_t size() const;
const char* bytes() const;
std::string toString() const;
const char* seizeBytes(size_t& size_out);
const void reset();
private:
size_t available() const;
void reserve(size_t size);
void prefix();
Builder& appendString(const uint8_t* v, size_t length, TextEncoding enc);
Builder& appendChar(char byte);
char* _buf;
size_t _capacity;
size_t _size;
enum {
NeutralState = 0,
AfterFieldName,
AfterValue,
AfterObjectStart,
AfterArrayStart,
} _state;
};
// Convenience function
inline Builder build() { return Builder(); }
// ------------------- internal ---------------------
inline Tokenizer::Tokenizer(const char* bytes, size_t length,
TextEncoding encoding) : _token(End) {
reset(bytes, length, encoding);
}
inline const Token& Tokenizer::current() const { return _token; }
inline bool Tokenizer::hasValue() const {
return _token >= Integer && _token <= FieldName;
}
inline std::string Tokenizer::stringValue() const {
const char* bytes;
size_t size = dataValue(&bytes);
return std::string(bytes, size);
}
inline bool Tokenizer::boolValue() const {
return _token == True;
}
inline size_t Tokenizer::availableInput() const {
return _input.length - _input.offset;
}
inline size_t Tokenizer::endOfInput() const {
return _input.offset == _input.length;
}
inline const Token& Tokenizer::setToken(Token t) {
return _token = t;
}
inline const Token& Tokenizer::setError(Tokenizer::ErrorCode error) {
_error.code = error;
return _token = Error;
}
inline size_t Tokenizer::inputOffset() const {
return _input.offset;
}
inline size_t Tokenizer::inputSize() const {
return _input.length;
}
inline const char* Tokenizer::inputBytes() const {
return (const char*)_input.bytes;
}
inline void Tokenizer::Value::beginAtOffset(size_t z) {
offset = z;
length = 0;
buffered = false;
}
inline Tokenizer::ErrorCode Tokenizer::error() const {
return _error.code;
}
inline Builder& Builder::startObject() {
prefix();
_state = AfterObjectStart;
return appendChar('{');
}
inline Builder& Builder::endObject() {
_state = AfterValue;
return appendChar('}');
}
inline Builder& Builder::startArray() {
prefix();
_state = AfterArrayStart;
return appendChar('[');
}
inline Builder& Builder::endArray() {
_state = AfterValue;
return appendChar(']');
}
inline Builder& Builder::fieldName(const std::string& name, TextEncoding enc) {
return fieldName(name.data(), name.size(), enc);
}
inline Builder& Builder::fieldName(const char* v, size_t length,
TextEncoding enc) {
prefix();
_state = AfterFieldName;
return appendString((const uint8_t*)v, length, enc);
}
inline Builder& Builder::value(const char* v, size_t length, TextEncoding enc) {
prefix();
_state = AfterValue;
return appendString((const uint8_t*)v, length, enc);
}
inline Builder& Builder::value(const char* v) {
return value(v, strlen(v));
}
inline Builder& Builder::value(const std::string& v) {
return value(v.data(), v.size());
}
inline Builder& Builder::value(double v) {
prefix();
reserve(256);
int z = snprintf(_buf+_size, 256, "%g", v);
assert(z < 256);
_size += z;
_state = AfterValue;
return *this;
}
inline Builder& Builder::value(int64_t v) {
prefix();
reserve(21);
int z = snprintf(_buf+_size, 21, "%lld", v);
assert(z < 21);
_size += z;
_state = AfterValue;
return *this;
}
inline Builder& Builder::value(int v) { return value((int64_t)v); }
inline Builder& Builder::value(unsigned int v) { return value((int64_t)v); }
inline Builder& Builder::value(long v) { return value((int64_t)v); }
inline Builder& Builder::value(bool v) {
prefix();
if (v) {
reserve(4);
_buf[_size] = 't';
_buf[++_size] = 'r';
_buf[++_size] = 'u';
_buf[++_size] = 'e';
++_size;
} else {
reserve(5);
_buf[_size] = 'f';
_buf[++_size] = 'a';
_buf[++_size] = 'l';
_buf[++_size] = 's';
_buf[++_size] = 'e';
++_size;
}
_state = AfterValue;
return *this;
}
inline Builder& Builder::nullValue() {
prefix();
reserve(4);
_buf[_size] = 'n';
_buf[++_size] = 'u';
_buf[++_size] = 'l';
_buf[++_size] = 'l';
++_size;
_state = AfterValue;
return *this;
}
inline size_t Builder::size() const { return _size; }
inline const char* Builder::bytes() const { return _buf; }
inline std::string Builder::toString() const {
return std::string(bytes(), size());
}
inline const char* Builder::seizeBytes(size_t& size_out) {
const char* buf = _buf;
size_out = _size;
_buf = 0;
_capacity = 0;
reset();
return buf;
}
inline const void Builder::reset() {
_size = 0;
_state = NeutralState;
}
inline size_t Builder::available() const {
return _capacity - _size;
}
inline void Builder::reserve(size_t size) {
if (available() < size) {
#if 0
// exact allocation for debugging purposes
printf("DEBUG Builder::reserve: size=%zu available=%zu grow_by=%zu\n",
size, available(), (size - available()) );
_capacity += size - available();
#else
_capacity += size - available();
_capacity = (_capacity < 64) ? 64 : (_capacity * 1.5);
#endif
_buf = (char*)realloc((void*)_buf, _capacity);
}
}
inline void Builder::prefix() {
if (_state == AfterFieldName) {
appendChar(':');
} else if (_state == AfterValue) {
appendChar(',');
}
}
inline Builder& Builder::appendChar(char byte) {
reserve(1);
_buf[_size++] = byte;
return *this;
}
}
#endif // JSONT_CXX_INCLUDED