blob: 3e86cad828e75ff7adc48174e8318820c7f09c47 [file] [log] [blame]
Austin Schuhf417eaf2019-09-16 21:58:36 -07001// JSON Tokenizer and builder. Copyright (c) 2012, Rasmus Andersson. All rights
2// reserved. Use of this source code is governed by a MIT-style license that can
3// be found in the LICENSE file.
4#ifndef JSONT_CXX_INCLUDED
5#define JSONT_CXX_INCLUDED
6
7#include <stdint.h> // uint8_t, int64_t
8#include <stdlib.h> // size_t
9#include <string.h> // strlen
10#include <stdbool.h> // bool
11#include <math.h>
12#include <assert.h>
13#include <string>
14#include <stdexcept>
15
16// Can haz rvalue references with move semantics?
17#if (defined(_MSC_VER) && _MSC_VER >= 1600) || \
18 (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__) || \
19 (defined(__has_feature) && __has_feature(cxx_rvalue_references))
20 #define JSONT_CXX_RVALUE_REFS 1
21#else
22 #define JSONT_CXX_RVALUE_REFS 0
23#endif
24
25namespace jsont {
26
27// Tokens
28typedef enum {
29 End = 0, // Input ended
30 ObjectStart, // {
31 ObjectEnd, // }
32 ArrayStart, // [
33 ArrayEnd, // ]
34 True, // true
35 False, // false
36 Null, // null
37 Integer, // number value without a fraction part
38 Float, // number value with a fraction part
39 String, // string value
40 FieldName, // field name
41 Error, // An error occured (see `error()` for details)
42 _Comma,
43} Token;
44
45// String encoding
46typedef enum {
47 UTF8TextEncoding = 0,
48} TextEncoding;
49
50// Name of `token`
51const char* token_name(jsont::Token token);
52
53class TokenizerInternal;
54
55// Reads a sequence of bytes and produces tokens and values while doing so
56class Tokenizer {
57public:
58 Tokenizer(const char* bytes, size_t length, TextEncoding encoding);
59 ~Tokenizer();
60
61 // Read next token
62 const Token& next();
63
64 // Access current token
65 const Token& current() const;
66
67 // Reset the tokenizer, making it possible to reuse this parser so to avoid
68 // unnecessary memory allocation and deallocation.
69 void reset(const char* bytes, size_t length, TextEncoding encoding);
70
71 // True if the current token has a value
72 bool hasValue() const;
73
74 // Returns a slice of the input which represents the current value, or nothing
75 // (returns 0) if the current token has no value (e.g. start of an object).
76 size_t dataValue(const char const** bytes) const;
77
78 // Returns a *copy* of the current string value.
79 std::string stringValue() const;
80
81 // Returns the current value as a double-precision floating-point number.
82 double floatValue() const;
83
84 // Returns the current value as a signed 64-bit integer.
85 int64_t intValue() const;
86
87 // Returns the current value as a boolean
88 bool boolValue() const;
89
90 // Error codes
91 typedef enum {
92 UnspecifiedError = 0,
93 UnexpectedComma,
94 UnexpectedTrailingComma,
95 InvalidByte,
96 PrematureEndOfInput,
97 MalformedUnicodeEscapeSequence,
98 MalformedNumberLiteral,
99 UnterminatedString,
100 SyntaxError,
101 } ErrorCode;
102
103 // Returns the error code of the last error
104 ErrorCode error() const;
105
106 // Returns a human-readable message for the last error. Never returns NULL.
107 const char* errorMessage() const;
108
109 // The byte offset into input where the tokenizer is currently looking. In the
110 // event of an error, this will point to the source of the error.
111 size_t inputOffset() const;
112
113 // Total number of input bytes
114 size_t inputSize() const;
115
116 // A pointer to the input data as passed to `reset` or the constructor.
117 const char* inputBytes() const;
118
119 friend class TokenizerInternal;
120private:
121 size_t availableInput() const;
122 size_t endOfInput() const;
123 const Token& setToken(Token t);
124 const Token& setError(ErrorCode error);
125
126 struct {
127 const uint8_t* bytes;
128 size_t length;
129 size_t offset;
130 } _input;
131 struct Value {
132 Value() : offset(0), length(0), buffered(false) {}
133 void beginAtOffset(size_t z);
134 size_t offset; // into _input.bytes
135 size_t length;
136 std::string buffer;
137 bool buffered; // if true, contents lives in buffer
138 } _value;
139 Token _token;
140 struct {
141 ErrorCode code;
142 } _error;
143};
144
145
146// Helps in building JSON, providing a final sequential byte buffer
147class Builder {
148public:
149 Builder() : _buf(0), _capacity(0), _size(0), _state(NeutralState) {}
150 ~Builder() { if (_buf) { free(_buf); _buf = 0; } }
151 Builder(const Builder& other);
152 Builder& operator=(const Builder& other);
153#if JSONT_CXX_RVALUE_REFS
154 Builder(Builder&& other);
155 Builder& operator=(Builder&& other);
156#endif
157
158 Builder& startObject();
159 Builder& endObject();
160 Builder& startArray();
161 Builder& endArray();
162 Builder& fieldName(const char* v, size_t length, TextEncoding e=UTF8TextEncoding);
163 Builder& fieldName(const std::string& name, TextEncoding enc=UTF8TextEncoding);
164 Builder& value(const char* v, size_t length, TextEncoding e=UTF8TextEncoding);
165 Builder& value(const char* v);
166 Builder& value(const std::string& v);
167 Builder& value(double v);
168 Builder& value(int64_t v);
169 Builder& value(int v);
170 Builder& value(unsigned int v);
171 Builder& value(long v);
172 Builder& value(bool v);
173 Builder& nullValue();
174
175 size_t size() const;
176 const char* bytes() const;
177 std::string toString() const;
178 const char* seizeBytes(size_t& size_out);
179 const void reset();
180
181private:
182 size_t available() const;
183 void reserve(size_t size);
184 void prefix();
185 Builder& appendString(const uint8_t* v, size_t length, TextEncoding enc);
186 Builder& appendChar(char byte);
187
188 char* _buf;
189 size_t _capacity;
190 size_t _size;
191 enum {
192 NeutralState = 0,
193 AfterFieldName,
194 AfterValue,
195 AfterObjectStart,
196 AfterArrayStart,
197 } _state;
198};
199
200
201// Convenience function
202inline Builder build() { return Builder(); }
203
204
205// ------------------- internal ---------------------
206
207inline Tokenizer::Tokenizer(const char* bytes, size_t length,
208 TextEncoding encoding) : _token(End) {
209 reset(bytes, length, encoding);
210}
211
212inline const Token& Tokenizer::current() const { return _token; }
213
214inline bool Tokenizer::hasValue() const {
215 return _token >= Integer && _token <= FieldName;
216}
217
218inline std::string Tokenizer::stringValue() const {
219 const char* bytes;
220 size_t size = dataValue(&bytes);
221 return std::string(bytes, size);
222}
223
224inline bool Tokenizer::boolValue() const {
225 return _token == True;
226}
227
228inline size_t Tokenizer::availableInput() const {
229 return _input.length - _input.offset;
230}
231inline size_t Tokenizer::endOfInput() const {
232 return _input.offset == _input.length;
233}
234inline const Token& Tokenizer::setToken(Token t) {
235 return _token = t;
236}
237inline const Token& Tokenizer::setError(Tokenizer::ErrorCode error) {
238 _error.code = error;
239 return _token = Error;
240}
241inline size_t Tokenizer::inputOffset() const {
242 return _input.offset;
243}
244inline size_t Tokenizer::inputSize() const {
245 return _input.length;
246}
247inline const char* Tokenizer::inputBytes() const {
248 return (const char*)_input.bytes;
249}
250
251inline void Tokenizer::Value::beginAtOffset(size_t z) {
252 offset = z;
253 length = 0;
254 buffered = false;
255}
256
257inline Tokenizer::ErrorCode Tokenizer::error() const {
258 return _error.code;
259}
260
261
262inline Builder& Builder::startObject() {
263 prefix();
264 _state = AfterObjectStart;
265 return appendChar('{');
266}
267
268inline Builder& Builder::endObject() {
269 _state = AfterValue;
270 return appendChar('}');
271}
272
273inline Builder& Builder::startArray() {
274 prefix();
275 _state = AfterArrayStart;
276 return appendChar('[');
277}
278
279inline Builder& Builder::endArray() {
280 _state = AfterValue;
281 return appendChar(']');
282}
283
284inline Builder& Builder::fieldName(const std::string& name, TextEncoding enc) {
285 return fieldName(name.data(), name.size(), enc);
286}
287
288inline Builder& Builder::fieldName(const char* v, size_t length,
289 TextEncoding enc) {
290 prefix();
291 _state = AfterFieldName;
292 return appendString((const uint8_t*)v, length, enc);
293}
294
295inline Builder& Builder::value(const char* v, size_t length, TextEncoding enc) {
296 prefix();
297 _state = AfterValue;
298 return appendString((const uint8_t*)v, length, enc);
299}
300
301inline Builder& Builder::value(const char* v) {
302 return value(v, strlen(v));
303}
304
305inline Builder& Builder::value(const std::string& v) {
306 return value(v.data(), v.size());
307}
308
309inline Builder& Builder::value(double v) {
310 prefix();
311 reserve(256);
312 int z = snprintf(_buf+_size, 256, "%g", v);
313 assert(z < 256);
314 _size += z;
315 _state = AfterValue;
316 return *this;
317}
318
319inline Builder& Builder::value(int64_t v) {
320 prefix();
321 reserve(21);
322 int z = snprintf(_buf+_size, 21, "%lld", v);
323 assert(z < 21);
324 _size += z;
325 _state = AfterValue;
326 return *this;
327}
328
329inline Builder& Builder::value(int v) { return value((int64_t)v); }
330inline Builder& Builder::value(unsigned int v) { return value((int64_t)v); }
331inline Builder& Builder::value(long v) { return value((int64_t)v); }
332
333inline Builder& Builder::value(bool v) {
334 prefix();
335 if (v) {
336 reserve(4);
337 _buf[_size] = 't';
338 _buf[++_size] = 'r';
339 _buf[++_size] = 'u';
340 _buf[++_size] = 'e';
341 ++_size;
342 } else {
343 reserve(5);
344 _buf[_size] = 'f';
345 _buf[++_size] = 'a';
346 _buf[++_size] = 'l';
347 _buf[++_size] = 's';
348 _buf[++_size] = 'e';
349 ++_size;
350 }
351 _state = AfterValue;
352 return *this;
353}
354
355inline Builder& Builder::nullValue() {
356 prefix();
357 reserve(4);
358 _buf[_size] = 'n';
359 _buf[++_size] = 'u';
360 _buf[++_size] = 'l';
361 _buf[++_size] = 'l';
362 ++_size;
363 _state = AfterValue;
364 return *this;
365}
366
367inline size_t Builder::size() const { return _size; }
368inline const char* Builder::bytes() const { return _buf; }
369inline std::string Builder::toString() const {
370 return std::string(bytes(), size());
371}
372inline const char* Builder::seizeBytes(size_t& size_out) {
373 const char* buf = _buf;
374 size_out = _size;
375 _buf = 0;
376 _capacity = 0;
377 reset();
378 return buf;
379}
380inline const void Builder::reset() {
381 _size = 0;
382 _state = NeutralState;
383}
384
385inline size_t Builder::available() const {
386 return _capacity - _size;
387}
388
389inline void Builder::reserve(size_t size) {
390 if (available() < size) {
391 #if 0
392 // exact allocation for debugging purposes
393 printf("DEBUG Builder::reserve: size=%zu available=%zu grow_by=%zu\n",
394 size, available(), (size - available()) );
395 _capacity += size - available();
396 #else
397 _capacity += size - available();
398 _capacity = (_capacity < 64) ? 64 : (_capacity * 1.5);
399 #endif
400 _buf = (char*)realloc((void*)_buf, _capacity);
401 }
402}
403
404inline void Builder::prefix() {
405 if (_state == AfterFieldName) {
406 appendChar(':');
407 } else if (_state == AfterValue) {
408 appendChar(',');
409 }
410}
411
412inline Builder& Builder::appendChar(char byte) {
413 reserve(1);
414 _buf[_size++] = byte;
415 return *this;
416}
417
418}
419
420#endif // JSONT_CXX_INCLUDED