1: <?php
2:
3: // https://github.com/colinodell/json5/blob/main/src/Json5Decoder.php
4: // https://github.com/colinodell/json5/blob/main/src/SyntaxError.php
5:
6: /*
7: * This file is part of the colinodell/json5 package.
8: *
9: * (c) Colin O'Dell <colinodell@gmail.com>
10: *
11: * Based on the official JSON5 implementation for JavaScript (https://github.com/json5/json5)
12: * - (c) 2012-2016 Aseem Kishore and others (https://github.com/json5/json5/contributors)
13: *
14: * For the full copyright and license information, please view the LICENSE
15: * file that was distributed with this source code.
16: */
17:
18: namespace ngatngay;
19:
20: class json
21: {
22: private int $length;
23: private int $at = 0;
24: private ?string $currentByte;
25: private int $lineNumber = 1;
26: private int $depth = 1;
27: private int $currentLineStartsAt = 0;
28:
29: /**
30: * Private constructor.
31: */
32: private function __construct(
33: private string $json,
34: private bool $associative = false,
35: private int $maxDepth = 512,
36: private bool $castBigIntToString = false
37: ) {
38: $this->length = \strlen($json);
39: $this->currentByte = $this->getByte(0);
40: }
41:
42: public static function encode(...$args) {
43: return \json_encode(...$args);
44: }
45:
46: /**
47: * Takes a JSON encoded string and converts it into a PHP variable.
48: *
49: * The parameters exactly match PHP's json_decode() function - see
50: * http://php.net/manual/en/function.json-decode.php for more information.
51: *
52: * @param string $json The JSON string being decoded.
53: * @param bool $associative When TRUE, returned objects will be converted into associative arrays.
54: * @param int $depth User specified recursion depth.
55: * @param int $flags Bitmask of JSON decode options.
56: *
57: * @throws SyntaxError if the JSON encoded string could not be parsed.
58: */
59: public static function decode(string $json, ?bool $associative = false, int $depth = 512, int $flags = 0): mixed
60: {
61: // Try parsing with json_decode first, since that's much faster
62: try {
63: $result = \json_decode($json, $associative, $depth, $flags);
64: if (\json_last_error() === \JSON_ERROR_NONE) {
65: return $result;
66: }
67: } catch (\Throwable $e) {
68: // ignore exception, continue parsing as JSON5
69: }
70:
71: // Fall back to JSON5 if that fails
72: $associative = $associative === true || ($associative === null && $flags & \JSON_OBJECT_AS_ARRAY);
73: $castBigIntToString = (bool) ($flags & \JSON_BIGINT_AS_STRING);
74:
75: $decoder = new self($json, $associative, $depth, $castBigIntToString);
76:
77: $result = $decoder->value();
78: $decoder->white();
79: if ($decoder->currentByte) {
80: $decoder->throwSyntaxError('Syntax error');
81: }
82:
83: return $result;
84: }
85:
86: private function getByte(int $at): ?string
87: {
88: if ($at >= $this->length) {
89: return null;
90: }
91:
92: return $this->json[$at];
93: }
94:
95: private function currentChar(): ?string
96: {
97: if ($this->at >= $this->length) {
98: return null;
99: }
100:
101: return \mb_substr(\substr($this->json, $this->at, 4), 0, 1);
102: }
103:
104: /**
105: * Parse the next character.
106: */
107: private function next(): void
108: {
109: // Get the next character. When there are no more characters,
110: // return the empty string.
111: if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) {
112: $this->lineNumber++;
113: $this->currentLineStartsAt = $this->at + 1;
114: }
115:
116: $this->at++;
117:
118: $this->currentByte = $this->getByte($this->at);
119: }
120:
121: /**
122: * Parse the next character if it matches $c or fail.
123: */
124: private function nextOrFail(string $c): void
125: {
126: if ($c !== $this->currentByte) {
127: $this->throwSyntaxError(\sprintf(
128: 'Expected %s instead of %s',
129: self::renderChar($c),
130: self::renderChar($this->currentChar())
131: ));
132: }
133:
134: $this->next();
135: }
136:
137: /**
138: * Get the next character without consuming it or
139: * assigning it to the ch variable.
140: */
141: private function peek(): ?string
142: {
143: return $this->getByte($this->at + 1);
144: }
145:
146: /**
147: * Attempt to match a regular expression at the current position on the current line.
148: *
149: * This function will not match across multiple lines.
150: */
151: private function match(string $regex): ?string
152: {
153: $subject = \substr($this->json, $this->at);
154: // Only match on the current line
155: if ($pos = \strpos($subject, "\n")) {
156: $subject = \substr($subject, 0, $pos);
157: }
158:
159: if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) {
160: return null;
161: }
162:
163: $this->at += $matches[0][1] + \strlen($matches[0][0]);
164: $this->currentByte = $this->getByte($this->at);
165:
166: return $matches[0][0];
167: }
168:
169: /**
170: * Parse an identifier.
171: *
172: * Normally, reserved words are disallowed here, but we
173: * only use this for unquoted object keys, where reserved words are allowed,
174: * so we don't check for those here. References:
175: * - http://es5.github.com/#x7.6
176: * - https://developer.mozilla.org/en/Core_JavaScript_1.5_Guide/Core_Language_Features#Variables
177: * - http://docstore.mik.ua/orelly/webprog/jscript/ch02_07.htm
178: */
179: private function identifier(): string
180: {
181: // @codingStandardsIgnoreStart
182: // Be careful when editing this regex, there are a couple Unicode characters in between here -------------vv
183: $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}‌‍]|\\\\u[0-9A-Fa-f]{4})*/u');
184: // @codingStandardsIgnoreEnd
185:
186: if ($match === null) {
187: $this->throwSyntaxError('Bad identifier as unquoted key');
188: }
189:
190: // Un-escape escaped Unicode chars
191: $unescaped = \preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) {
192: return \json_decode('"'.$m[0].'"');
193: }, $match);
194:
195: return $unescaped;
196: }
197:
198: private function number(): int|float|string
199: {
200: $number = null;
201: $sign = '';
202: $string = '';
203: $base = 10;
204:
205: if ($this->currentByte === '-' || $this->currentByte === '+') {
206: $sign = $this->currentByte;
207: $this->next();
208: }
209:
210: // support for Infinity
211: if ($this->currentByte === 'I') {
212: $this->word();
213:
214: return ($sign === '-') ? -INF : INF;
215: }
216:
217: // support for NaN
218: if ($this->currentByte === 'N') {
219: $number = $this->word();
220:
221: // ignore sign as -NaN also is NaN
222: return $number;
223: }
224:
225: if ($this->currentByte === '0') {
226: $string .= $this->currentByte;
227: $this->next();
228: if ($this->currentByte === 'x' || $this->currentByte === 'X') {
229: $string .= $this->currentByte;
230: $this->next();
231: $base = 16;
232: } elseif (\is_numeric($this->currentByte)) {
233: $this->throwSyntaxError('Octal literal');
234: }
235: }
236:
237: switch ($base) {
238: case 10:
239: // @codingStandardsIgnoreStart
240: if ((\is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) {
241: $string .= $match;
242: }
243: if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) {
244: $string .= $match;
245: }
246: // @codingStandardsIgnoreEnd
247: $number = $string;
248: break;
249: case 16:
250: if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) {
251: $string .= $match;
252: $number = \hexdec($string);
253: break;
254: }
255: $this->throwSyntaxError('Bad hex number');
256: }
257:
258: if ($sign === '-') {
259: $number = '-' . $number;
260: }
261:
262: if (!\is_numeric($number) || !\is_finite($number)) {
263: $this->throwSyntaxError('Bad number');
264: }
265:
266: // Adding 0 will automatically cast this to an int or float
267: $asIntOrFloat = $number + 0;
268:
269: $isIntLike = preg_match('/^-?\d+$/', $number) === 1;
270: if ($this->castBigIntToString && $isIntLike && is_float($asIntOrFloat)) {
271: return $number;
272: }
273:
274: return $asIntOrFloat;
275: }
276:
277: private function string(): string
278: {
279: $string = '';
280:
281: $delim = $this->currentByte;
282: $this->next();
283: while ($this->currentByte !== null) {
284: if ($this->currentByte === $delim) {
285: $this->next();
286:
287: return $string;
288: }
289:
290: if ($this->currentByte === '\\') {
291: if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) {
292: try {
293: $unicodeUnescaped = \json_decode('"' . $unicodeEscaped . '"', false, 1, JSON_THROW_ON_ERROR);
294: if ($unicodeUnescaped === null && ($err = json_last_error_msg())) {
295: throw new \JsonException($err);
296: }
297: $string .= $unicodeUnescaped;
298: } catch (\JsonException $e) {
299: $this->throwSyntaxError($e->getMessage());
300: }
301: continue;
302: }
303:
304: $this->next();
305: if ($this->currentByte === "\r") {
306: if ($this->peek() === "\n") {
307: $this->next();
308: }
309: } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) {
310: $string .= $escapee;
311: } else {
312: break;
313: }
314: } elseif ($this->currentByte === "\n") {
315: // unescaped newlines are invalid; see:
316: // https://github.com/json5/json5/issues/24
317: // @todo this feels special-cased; are there other invalid unescaped chars?
318: break;
319: } else {
320: $string .= $this->currentByte;
321: }
322:
323: $this->next();
324: }
325:
326: $this->throwSyntaxError('Bad string');
327: }
328:
329: /**
330: * Skip an inline comment, assuming this is one.
331: *
332: * The current character should be the second / character in the // pair that begins this inline comment.
333: * To finish the inline comment, we look for a newline or the end of the text.
334: */
335: private function inlineComment(): void
336: {
337: do {
338: $this->next();
339: if ($this->currentByte === "\n" || $this->currentByte === "\r") {
340: $this->next();
341:
342: return;
343: }
344: } while ($this->currentByte !== null);
345: }
346:
347: /**
348: * Skip a block comment, assuming this is one.
349: *
350: * The current character should be the * character in the /* pair that begins this block comment.
351: * To finish the block comment, we look for an ending *​/ pair of characters,
352: * but we also watch for the end of text before the comment is terminated.
353: */
354: private function blockComment(): void
355: {
356: do {
357: $this->next();
358: while ($this->currentByte === '*') {
359: $this->nextOrFail('*');
360: if ($this->currentByte === '/') {
361: $this->nextOrFail('/');
362:
363: return;
364: }
365: }
366: } while ($this->currentByte !== null);
367:
368: $this->throwSyntaxError('Unterminated block comment');
369: }
370:
371: /**
372: * Skip a comment, whether inline or block-level, assuming this is one.
373: */
374: private function comment(): void
375: {
376: // Comments always begin with a / character.
377: $this->nextOrFail('/');
378:
379: if ($this->currentByte === '/') {
380: $this->inlineComment();
381: } elseif ($this->currentByte === '*') {
382: $this->blockComment();
383: } else {
384: $this->throwSyntaxError('Unrecognized comment');
385: }
386: }
387:
388: /**
389: * Skip whitespace and comments.
390: *
391: * Note that we're detecting comments by only a single / character.
392: * This works since regular expressions are not valid JSON(5), but this will
393: * break if there are other valid values that begin with a / character!
394: */
395: private function white(): void
396: {
397: while ($this->currentByte !== null) {
398: if ($this->currentByte === '/') {
399: $this->comment();
400: } elseif (\preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) {
401: $this->next();
402: } elseif (\ord($this->currentByte) === 0xC2 && \ord($this->peek()) === 0xA0) {
403: // Non-breaking space in UTF-8
404: $this->next();
405: $this->next();
406: } else {
407: return;
408: }
409: }
410: }
411:
412: /**
413: * Matches true, false, null, etc
414: */
415: private function word(): bool|float|null
416: {
417: switch ($this->currentByte) {
418: case 't':
419: $this->nextOrFail('t');
420: $this->nextOrFail('r');
421: $this->nextOrFail('u');
422: $this->nextOrFail('e');
423: return true;
424: case 'f':
425: $this->nextOrFail('f');
426: $this->nextOrFail('a');
427: $this->nextOrFail('l');
428: $this->nextOrFail('s');
429: $this->nextOrFail('e');
430: return false;
431: case 'n':
432: $this->nextOrFail('n');
433: $this->nextOrFail('u');
434: $this->nextOrFail('l');
435: $this->nextOrFail('l');
436: return null;
437: case 'I':
438: $this->nextOrFail('I');
439: $this->nextOrFail('n');
440: $this->nextOrFail('f');
441: $this->nextOrFail('i');
442: $this->nextOrFail('n');
443: $this->nextOrFail('i');
444: $this->nextOrFail('t');
445: $this->nextOrFail('y');
446: return INF;
447: case 'N':
448: $this->nextOrFail('N');
449: $this->nextOrFail('a');
450: $this->nextOrFail('N');
451: return NAN;
452: }
453:
454: $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar()));
455: }
456:
457: private function arr(): array
458: {
459: $arr = [];
460:
461: if (++$this->depth > $this->maxDepth) {
462: $this->throwSyntaxError('Maximum stack depth exceeded');
463: }
464:
465: $this->nextOrFail('[');
466: $this->white();
467: while ($this->currentByte !== null) {
468: if ($this->currentByte === ']') {
469: $this->nextOrFail(']');
470: $this->depth--;
471: return $arr; // Potentially empty array
472: }
473: // ES5 allows omitting elements in arrays, e.g. [,] and
474: // [,null]. We don't allow this in JSON5.
475: if ($this->currentByte === ',') {
476: $this->throwSyntaxError('Missing array element');
477: }
478:
479: $arr[] = $this->value();
480:
481: $this->white();
482: // If there's no comma after this value, this needs to
483: // be the end of the array.
484: if ($this->currentByte !== ',') {
485: $this->nextOrFail(']');
486: $this->depth--;
487: return $arr;
488: }
489: $this->nextOrFail(',');
490: $this->white();
491: }
492:
493: $this->throwSyntaxError('Invalid array');
494: }
495:
496: /**
497: * Parse an object value
498: */
499: private function obj(): array|object
500: {
501: $object = $this->associative ? [] : new \stdClass;
502:
503: if (++$this->depth > $this->maxDepth) {
504: $this->throwSyntaxError('Maximum stack depth exceeded');
505: }
506:
507: $this->nextOrFail('{');
508: $this->white();
509: while ($this->currentByte !== null) {
510: if ($this->currentByte === '}') {
511: $this->nextOrFail('}');
512: $this->depth--;
513: return $object; // Potentially empty object
514: }
515:
516: // Keys can be unquoted. If they are, they need to be
517: // valid JS identifiers.
518: if ($this->currentByte === '"' || $this->currentByte === "'") {
519: $key = $this->string();
520: } else {
521: $key = $this->identifier();
522: }
523:
524: $this->white();
525: $this->nextOrFail(':');
526: if ($this->associative) {
527: $object[$key] = $this->value();
528: } else {
529: $object->{$key} = $this->value();
530: }
531: $this->white();
532: // If there's no comma after this pair, this needs to be
533: // the end of the object.
534: if ($this->currentByte !== ',') {
535: $this->nextOrFail('}');
536: $this->depth--;
537: return $object;
538: }
539: $this->nextOrFail(',');
540: $this->white();
541: }
542:
543: $this->throwSyntaxError('Invalid object');
544: }
545:
546: /**
547: * Parse a JSON value.
548: *
549: * It could be an object, an array, a string, a number,
550: * or a word.
551: */
552: private function value(): mixed
553: {
554: $this->white();
555: return match ($this->currentByte) {
556: '{' => $this->obj(),
557: '[' => $this->arr(),
558: '"', "'" => $this->string(),
559: '-', '+', '.' => $this->number(),
560: default => \is_numeric($this->currentByte) ? $this->number() : $this->word(),
561: };
562: }
563:
564: /**
565: * @throws SyntaxError
566: *
567: * @phpstan-return never
568: */
569: private function throwSyntaxError(string $message): void
570: {
571: // Calculate the column number
572: $str = \substr($this->json, $this->currentLineStartsAt, $this->at - $this->currentLineStartsAt);
573: $column = \mb_strlen($str) + 1;
574:
575: // throw new SyntaxError($message, $this->lineNumber, $column);
576: $exception = new class($message, $this->lineNumber, $column) extends \JsonException {
577: public function __construct(
578: string $message,
579: private int $lineNumber,
580: private int $column,
581: \Throwable|null $previous = null
582: ) {
583: $message = \sprintf('%s at line %d column %d of the JSON5 data', $message, $lineNumber, $column);
584: parent::__construct($message, 0, $previous);
585: }
586:
587: public function getLineNumber(): int
588: {
589: return $this->lineNumber;
590: }
591:
592: public function getColumn(): int
593: {
594: return $this->column;
595: }
596: };
597:
598: throw $exception;
599: }
600:
601: private static function renderChar(?string $chr): string
602: {
603: return $chr === null ? 'EOF' : "'" . $chr . "'";
604: }
605:
606: private static function getEscapee(string $ch): ?string
607: {
608: return match ($ch) {
609: "'" => "'",
610: '"' => '"',
611: '\\' => '\\',
612: '/' => '/',
613: "\n" => '',
614: 'b' => \chr(8),
615: 'f' => "\f",
616: 'n' => "\n",
617: 'r' => "\r",
618: 't' => "\t",
619: default => null,
620: };
621: }
622: }