1: | <?php |
2: | |
3: | |
4: | |
5: | |
6: | |
7: | |
8: | |
9: | |
10: | |
11: | |
12: | |
13: | |
14: | |
15: | |
16: | |
17: | |
18: | namespace ngatngay; |
19: | |
20: | class json |
21: | { |
22: | private int $length; |
23: | private int $at = 0; |
24: | private ?string $currentByte; |
25: | private int $lineNumber = 1; |
26: | private int $depth = 1; |
27: | private int $currentLineStartsAt = 0; |
28: | |
29: | |
30: | |
31: | |
32: | private function __construct( |
33: | private string $json, |
34: | private bool $associative = false, |
35: | private int $maxDepth = 512, |
36: | private bool $castBigIntToString = false |
37: | ) { |
38: | $this->length = \strlen($json); |
39: | $this->currentByte = $this->getByte(0); |
40: | } |
41: | |
42: | public static function encode(...$args) { |
43: | return \json_encode(...$args); |
44: | } |
45: | |
46: | |
47: | |
48: | |
49: | |
50: | |
51: | |
52: | |
53: | |
54: | |
55: | |
56: | |
57: | |
58: | |
59: | public static function decode(string $json, ?bool $associative = false, int $depth = 512, int $flags = 0): mixed |
60: | { |
61: | |
62: | try { |
63: | $result = \json_decode($json, $associative, $depth, $flags); |
64: | if (\json_last_error() === \JSON_ERROR_NONE) { |
65: | return $result; |
66: | } |
67: | } catch (\Throwable $e) { |
68: | |
69: | } |
70: | |
71: | |
72: | $associative = $associative === true || ($associative === null && $flags & \JSON_OBJECT_AS_ARRAY); |
73: | $castBigIntToString = (bool) ($flags & \JSON_BIGINT_AS_STRING); |
74: | |
75: | $decoder = new self($json, $associative, $depth, $castBigIntToString); |
76: | |
77: | $result = $decoder->value(); |
78: | $decoder->white(); |
79: | if ($decoder->currentByte) { |
80: | $decoder->throwSyntaxError('Syntax error'); |
81: | } |
82: | |
83: | return $result; |
84: | } |
85: | |
86: | private function getByte(int $at): ?string |
87: | { |
88: | if ($at >= $this->length) { |
89: | return null; |
90: | } |
91: | |
92: | return $this->json[$at]; |
93: | } |
94: | |
95: | private function currentChar(): ?string |
96: | { |
97: | if ($this->at >= $this->length) { |
98: | return null; |
99: | } |
100: | |
101: | return \mb_substr(\substr($this->json, $this->at, 4), 0, 1); |
102: | } |
103: | |
104: | |
105: | |
106: | |
107: | private function next(): void |
108: | { |
109: | |
110: | |
111: | if ($this->currentByte === "\n" || ($this->currentByte === "\r" && $this->peek() !== "\n")) { |
112: | $this->lineNumber++; |
113: | $this->currentLineStartsAt = $this->at + 1; |
114: | } |
115: | |
116: | $this->at++; |
117: | |
118: | $this->currentByte = $this->getByte($this->at); |
119: | } |
120: | |
121: | |
122: | |
123: | |
124: | private function nextOrFail(string $c): void |
125: | { |
126: | if ($c !== $this->currentByte) { |
127: | $this->throwSyntaxError(\sprintf( |
128: | 'Expected %s instead of %s', |
129: | self::renderChar($c), |
130: | self::renderChar($this->currentChar()) |
131: | )); |
132: | } |
133: | |
134: | $this->next(); |
135: | } |
136: | |
137: | |
138: | |
139: | |
140: | |
141: | private function peek(): ?string |
142: | { |
143: | return $this->getByte($this->at + 1); |
144: | } |
145: | |
146: | |
147: | |
148: | |
149: | |
150: | |
151: | private function match(string $regex): ?string |
152: | { |
153: | $subject = \substr($this->json, $this->at); |
154: | |
155: | if ($pos = \strpos($subject, "\n")) { |
156: | $subject = \substr($subject, 0, $pos); |
157: | } |
158: | |
159: | if (!\preg_match($regex, $subject, $matches, PREG_OFFSET_CAPTURE)) { |
160: | return null; |
161: | } |
162: | |
163: | $this->at += $matches[0][1] + \strlen($matches[0][0]); |
164: | $this->currentByte = $this->getByte($this->at); |
165: | |
166: | return $matches[0][0]; |
167: | } |
168: | |
169: | |
170: | |
171: | |
172: | |
173: | |
174: | |
175: | |
176: | |
177: | |
178: | |
179: | private function identifier(): string |
180: | { |
181: | |
182: | |
183: | $match = $this->match('/^(?:[\$_\p{L}\p{Nl}]|\\\\u[0-9A-Fa-f]{4})(?:[\$_\p{L}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]|\\\\u[0-9A-Fa-f]{4})*/u'); |
184: | |
185: | |
186: | if ($match === null) { |
187: | $this->throwSyntaxError('Bad identifier as unquoted key'); |
188: | } |
189: | |
190: | |
191: | $unescaped = \preg_replace_callback('/(?:\\\\u[0-9A-Fa-f]{4})+/', function ($m) { |
192: | return \json_decode('"'.$m[0].'"'); |
193: | }, $match); |
194: | |
195: | return $unescaped; |
196: | } |
197: | |
198: | private function number(): int|float|string |
199: | { |
200: | $number = null; |
201: | $sign = ''; |
202: | $string = ''; |
203: | $base = 10; |
204: | |
205: | if ($this->currentByte === '-' || $this->currentByte === '+') { |
206: | $sign = $this->currentByte; |
207: | $this->next(); |
208: | } |
209: | |
210: | |
211: | if ($this->currentByte === 'I') { |
212: | $this->word(); |
213: | |
214: | return ($sign === '-') ? -INF : INF; |
215: | } |
216: | |
217: | |
218: | if ($this->currentByte === 'N') { |
219: | $number = $this->word(); |
220: | |
221: | |
222: | return $number; |
223: | } |
224: | |
225: | if ($this->currentByte === '0') { |
226: | $string .= $this->currentByte; |
227: | $this->next(); |
228: | if ($this->currentByte === 'x' || $this->currentByte === 'X') { |
229: | $string .= $this->currentByte; |
230: | $this->next(); |
231: | $base = 16; |
232: | } elseif (\is_numeric($this->currentByte)) { |
233: | $this->throwSyntaxError('Octal literal'); |
234: | } |
235: | } |
236: | |
237: | switch ($base) { |
238: | case 10: |
239: | |
240: | if ((\is_numeric($this->currentByte) || $this->currentByte === '.') && ($match = $this->match('/^\d*\.?\d*/')) !== null) { |
241: | $string .= $match; |
242: | } |
243: | if (($this->currentByte === 'E' || $this->currentByte === 'e') && ($match = $this->match('/^[Ee][-+]?\d*/')) !== null) { |
244: | $string .= $match; |
245: | } |
246: | |
247: | $number = $string; |
248: | break; |
249: | case 16: |
250: | if (($match = $this->match('/^[A-Fa-f0-9]+/')) !== null) { |
251: | $string .= $match; |
252: | $number = \hexdec($string); |
253: | break; |
254: | } |
255: | $this->throwSyntaxError('Bad hex number'); |
256: | } |
257: | |
258: | if ($sign === '-') { |
259: | $number = '-' . $number; |
260: | } |
261: | |
262: | if (!\is_numeric($number) || !\is_finite($number)) { |
263: | $this->throwSyntaxError('Bad number'); |
264: | } |
265: | |
266: | |
267: | $asIntOrFloat = $number + 0; |
268: | |
269: | $isIntLike = preg_match('/^-?\d+$/', $number) === 1; |
270: | if ($this->castBigIntToString && $isIntLike && is_float($asIntOrFloat)) { |
271: | return $number; |
272: | } |
273: | |
274: | return $asIntOrFloat; |
275: | } |
276: | |
277: | private function string(): string |
278: | { |
279: | $string = ''; |
280: | |
281: | $delim = $this->currentByte; |
282: | $this->next(); |
283: | while ($this->currentByte !== null) { |
284: | if ($this->currentByte === $delim) { |
285: | $this->next(); |
286: | |
287: | return $string; |
288: | } |
289: | |
290: | if ($this->currentByte === '\\') { |
291: | if ($this->peek() === 'u' && $unicodeEscaped = $this->match('/^(?:\\\\u[A-Fa-f0-9]{4})+/')) { |
292: | try { |
293: | $unicodeUnescaped = \json_decode('"' . $unicodeEscaped . '"', false, 1, JSON_THROW_ON_ERROR); |
294: | if ($unicodeUnescaped === null && ($err = json_last_error_msg())) { |
295: | throw new \JsonException($err); |
296: | } |
297: | $string .= $unicodeUnescaped; |
298: | } catch (\JsonException $e) { |
299: | $this->throwSyntaxError($e->getMessage()); |
300: | } |
301: | continue; |
302: | } |
303: | |
304: | $this->next(); |
305: | if ($this->currentByte === "\r") { |
306: | if ($this->peek() === "\n") { |
307: | $this->next(); |
308: | } |
309: | } elseif (($escapee = self::getEscapee($this->currentByte)) !== null) { |
310: | $string .= $escapee; |
311: | } else { |
312: | break; |
313: | } |
314: | } elseif ($this->currentByte === "\n") { |
315: | |
316: | |
317: | |
318: | break; |
319: | } else { |
320: | $string .= $this->currentByte; |
321: | } |
322: | |
323: | $this->next(); |
324: | } |
325: | |
326: | $this->throwSyntaxError('Bad string'); |
327: | } |
328: | |
329: | |
330: | |
331: | |
332: | |
333: | |
334: | |
335: | private function inlineComment(): void |
336: | { |
337: | do { |
338: | $this->next(); |
339: | if ($this->currentByte === "\n" || $this->currentByte === "\r") { |
340: | $this->next(); |
341: | |
342: | return; |
343: | } |
344: | } while ($this->currentByte !== null); |
345: | } |
346: | |
347: | |
348: | |
349: | |
350: | |
351: | |
352: | |
353: | |
354: | private function blockComment(): void |
355: | { |
356: | do { |
357: | $this->next(); |
358: | while ($this->currentByte === '*') { |
359: | $this->nextOrFail('*'); |
360: | if ($this->currentByte === '/') { |
361: | $this->nextOrFail('/'); |
362: | |
363: | return; |
364: | } |
365: | } |
366: | } while ($this->currentByte !== null); |
367: | |
368: | $this->throwSyntaxError('Unterminated block comment'); |
369: | } |
370: | |
371: | |
372: | |
373: | |
374: | private function comment(): void |
375: | { |
376: | |
377: | $this->nextOrFail('/'); |
378: | |
379: | if ($this->currentByte === '/') { |
380: | $this->inlineComment(); |
381: | } elseif ($this->currentByte === '*') { |
382: | $this->blockComment(); |
383: | } else { |
384: | $this->throwSyntaxError('Unrecognized comment'); |
385: | } |
386: | } |
387: | |
388: | |
389: | |
390: | |
391: | |
392: | |
393: | |
394: | |
395: | private function white(): void |
396: | { |
397: | while ($this->currentByte !== null) { |
398: | if ($this->currentByte === '/') { |
399: | $this->comment(); |
400: | } elseif (\preg_match('/^[ \t\r\n\v\f\xA0]/', $this->currentByte) === 1) { |
401: | $this->next(); |
402: | } elseif (\ord($this->currentByte) === 0xC2 && \ord($this->peek()) === 0xA0) { |
403: | |
404: | $this->next(); |
405: | $this->next(); |
406: | } else { |
407: | return; |
408: | } |
409: | } |
410: | } |
411: | |
412: | |
413: | |
414: | |
415: | private function word(): bool|float|null |
416: | { |
417: | switch ($this->currentByte) { |
418: | case 't': |
419: | $this->nextOrFail('t'); |
420: | $this->nextOrFail('r'); |
421: | $this->nextOrFail('u'); |
422: | $this->nextOrFail('e'); |
423: | return true; |
424: | case 'f': |
425: | $this->nextOrFail('f'); |
426: | $this->nextOrFail('a'); |
427: | $this->nextOrFail('l'); |
428: | $this->nextOrFail('s'); |
429: | $this->nextOrFail('e'); |
430: | return false; |
431: | case 'n': |
432: | $this->nextOrFail('n'); |
433: | $this->nextOrFail('u'); |
434: | $this->nextOrFail('l'); |
435: | $this->nextOrFail('l'); |
436: | return null; |
437: | case 'I': |
438: | $this->nextOrFail('I'); |
439: | $this->nextOrFail('n'); |
440: | $this->nextOrFail('f'); |
441: | $this->nextOrFail('i'); |
442: | $this->nextOrFail('n'); |
443: | $this->nextOrFail('i'); |
444: | $this->nextOrFail('t'); |
445: | $this->nextOrFail('y'); |
446: | return INF; |
447: | case 'N': |
448: | $this->nextOrFail('N'); |
449: | $this->nextOrFail('a'); |
450: | $this->nextOrFail('N'); |
451: | return NAN; |
452: | } |
453: | |
454: | $this->throwSyntaxError('Unexpected ' . self::renderChar($this->currentChar())); |
455: | } |
456: | |
457: | private function arr(): array |
458: | { |
459: | $arr = []; |
460: | |
461: | if (++$this->depth > $this->maxDepth) { |
462: | $this->throwSyntaxError('Maximum stack depth exceeded'); |
463: | } |
464: | |
465: | $this->nextOrFail('['); |
466: | $this->white(); |
467: | while ($this->currentByte !== null) { |
468: | if ($this->currentByte === ']') { |
469: | $this->nextOrFail(']'); |
470: | $this->depth--; |
471: | return $arr; |
472: | } |
473: | |
474: | |
475: | if ($this->currentByte === ',') { |
476: | $this->throwSyntaxError('Missing array element'); |
477: | } |
478: | |
479: | $arr[] = $this->value(); |
480: | |
481: | $this->white(); |
482: | |
483: | |
484: | if ($this->currentByte !== ',') { |
485: | $this->nextOrFail(']'); |
486: | $this->depth--; |
487: | return $arr; |
488: | } |
489: | $this->nextOrFail(','); |
490: | $this->white(); |
491: | } |
492: | |
493: | $this->throwSyntaxError('Invalid array'); |
494: | } |
495: | |
496: | |
497: | |
498: | |
499: | private function obj(): array|object |
500: | { |
501: | $object = $this->associative ? [] : new \stdClass; |
502: | |
503: | if (++$this->depth > $this->maxDepth) { |
504: | $this->throwSyntaxError('Maximum stack depth exceeded'); |
505: | } |
506: | |
507: | $this->nextOrFail('{'); |
508: | $this->white(); |
509: | while ($this->currentByte !== null) { |
510: | if ($this->currentByte === '}') { |
511: | $this->nextOrFail('}'); |
512: | $this->depth--; |
513: | return $object; |
514: | } |
515: | |
516: | |
517: | |
518: | if ($this->currentByte === '"' || $this->currentByte === "'") { |
519: | $key = $this->string(); |
520: | } else { |
521: | $key = $this->identifier(); |
522: | } |
523: | |
524: | $this->white(); |
525: | $this->nextOrFail(':'); |
526: | if ($this->associative) { |
527: | $object[$key] = $this->value(); |
528: | } else { |
529: | $object->{$key} = $this->value(); |
530: | } |
531: | $this->white(); |
532: | |
533: | |
534: | if ($this->currentByte !== ',') { |
535: | $this->nextOrFail('}'); |
536: | $this->depth--; |
537: | return $object; |
538: | } |
539: | $this->nextOrFail(','); |
540: | $this->white(); |
541: | } |
542: | |
543: | $this->throwSyntaxError('Invalid object'); |
544: | } |
545: | |
546: | |
547: | |
548: | |
549: | |
550: | |
551: | |
552: | private function value(): mixed |
553: | { |
554: | $this->white(); |
555: | return match ($this->currentByte) { |
556: | '{' => $this->obj(), |
557: | '[' => $this->arr(), |
558: | '"', "'" => $this->string(), |
559: | '-', '+', '.' => $this->number(), |
560: | default => \is_numeric($this->currentByte) ? $this->number() : $this->word(), |
561: | }; |
562: | } |
563: | |
564: | |
565: | |
566: | |
567: | |
568: | |
569: | private function throwSyntaxError(string $message): void |
570: | { |
571: | |
572: | $str = \substr($this->json, $this->currentLineStartsAt, $this->at - $this->currentLineStartsAt); |
573: | $column = \mb_strlen($str) + 1; |
574: | |
575: | |
576: | $exception = new class($message, $this->lineNumber, $column) extends \JsonException { |
577: | public function __construct( |
578: | string $message, |
579: | private int $lineNumber, |
580: | private int $column, |
581: | \Throwable|null $previous = null |
582: | ) { |
583: | $message = \sprintf('%s at line %d column %d of the JSON5 data', $message, $lineNumber, $column); |
584: | parent::__construct($message, 0, $previous); |
585: | } |
586: | |
587: | public function getLineNumber(): int |
588: | { |
589: | return $this->lineNumber; |
590: | } |
591: | |
592: | public function getColumn(): int |
593: | { |
594: | return $this->column; |
595: | } |
596: | }; |
597: | |
598: | throw $exception; |
599: | } |
600: | |
601: | private static function renderChar(?string $chr): string |
602: | { |
603: | return $chr === null ? 'EOF' : "'" . $chr . "'"; |
604: | } |
605: | |
606: | private static function getEscapee(string $ch): ?string |
607: | { |
608: | return match ($ch) { |
609: | "'" => "'", |
610: | '"' => '"', |
611: | '\\' => '\\', |
612: | '/' => '/', |
613: | "\n" => '', |
614: | 'b' => \chr(8), |
615: | 'f' => "\f", |
616: | 'n' => "\n", |
617: | 'r' => "\r", |
618: | 't' => "\t", |
619: | default => null, |
620: | }; |
621: | } |
622: | } |