vendor/twig/twig/src/Lexer.php line 187

Open in your IDE?
  1. <?php
  2. /*
  3. * This file is part of Twig.
  4. *
  5. * (c) Fabien Potencier
  6. * (c) Armin Ronacher
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. use Twig\ExpressionParser\ExpressionParsers;
  14. /**
  15. * @author Fabien Potencier <fabien@symfony.com>
  16. */
  17. class Lexer
  18. {
  19. private $isInitialized = false;
  20. private $tokens;
  21. private $code;
  22. private $cursor;
  23. private $lineno;
  24. private $end;
  25. private $state;
  26. private $states;
  27. private $brackets;
  28. private $env;
  29. private $source;
  30. private $options;
  31. private $regexes;
  32. private $position;
  33. private $positions;
  34. private $currentVarBlockLine;
  35. private array $openingBrackets = ['{', '(', '['];
  36. private array $closingBrackets = ['}', ')', ']'];
  37. public const STATE_DATA = 0;
  38. public const STATE_BLOCK = 1;
  39. public const STATE_VAR = 2;
  40. public const STATE_STRING = 3;
  41. public const STATE_INTERPOLATION = 4;
  42. public const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  43. public const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  44. public const REGEX_NUMBER = '/(?(DEFINE)
  45. (?<LNUM>[0-9]+(_[0-9]+)*) # Integers (with underscores) 123_456
  46. (?<FRAC>\.(?&LNUM)) # Fractional part .456
  47. (?<EXPONENT>[eE][+-]?(?&LNUM)) # Exponent part E+10
  48. (?<DNUM>(?&LNUM)(?:(?&FRAC))?) # Decimal number 123_456.456
  49. )(?:(?&DNUM)(?:(?&EXPONENT))?) # 123_456.456E+10
  50. /Ax';
  51. public const REGEX_DQ_STRING_DELIM = '/"/A';
  52. public const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  53. public const REGEX_INLINE_COMMENT = '/#[^\n]*/A';
  54. public const PUNCTUATION = '()[]{}?:.,|';
  55. private const SPECIAL_CHARS = [
  56. 'f' => "\f",
  57. 'n' => "\n",
  58. 'r' => "\r",
  59. 't' => "\t",
  60. 'v' => "\v",
  61. ];
  62. public function __construct(Environment $env, array $options = [])
  63. {
  64. $this->env = $env;
  65. $this->options = array_merge([
  66. 'tag_comment' => ['{#', '#}'],
  67. 'tag_block' => ['{%', '%}'],
  68. 'tag_variable' => ['{{', '}}'],
  69. 'whitespace_trim' => '-',
  70. 'whitespace_line_trim' => '~',
  71. 'whitespace_line_chars' => ' \t\0\x0B',
  72. 'interpolation' => ['#{', '}'],
  73. ], $options);
  74. }
  75. private function initialize(): void
  76. {
  77. if ($this->isInitialized) {
  78. return;
  79. }
  80. // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  81. $this->regexes = [
  82. // }}
  83. 'lex_var' => '{
  84. \s*
  85. (?:'.
  86. preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  87. '|'.
  88. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  89. '|'.
  90. preg_quote($this->options['tag_variable'][1], '#'). // }}
  91. ')
  92. }Ax',
  93. // %}
  94. 'lex_block' => '{
  95. \s*
  96. (?:'.
  97. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  98. '|'.
  99. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  100. '|'.
  101. preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
  102. ')
  103. }Ax',
  104. // {% endverbatim %}
  105. 'lex_raw_data' => '{'.
  106. preg_quote($this->options['tag_block'][0], '#'). // {%
  107. '('.
  108. $this->options['whitespace_trim']. // -
  109. '|'.
  110. $this->options['whitespace_line_trim']. // ~
  111. ')?\s*endverbatim\s*'.
  112. '(?:'.
  113. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
  114. '|'.
  115. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  116. '|'.
  117. preg_quote($this->options['tag_block'][1], '#'). // %}
  118. ')
  119. }sx',
  120. 'operator' => $this->getOperatorRegex(),
  121. // #}
  122. 'lex_comment' => '{
  123. (?:'.
  124. preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
  125. '|'.
  126. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
  127. '|'.
  128. preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
  129. ')
  130. }sx',
  131. // verbatim %}
  132. 'lex_block_raw' => '{
  133. \s*verbatim\s*
  134. (?:'.
  135. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
  136. '|'.
  137. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  138. '|'.
  139. preg_quote($this->options['tag_block'][1], '#'). // %}
  140. ')
  141. }Asx',
  142. 'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  143. // {{ or {% or {#
  144. 'lex_tokens_start' => '{
  145. ('.
  146. preg_quote($this->options['tag_variable'][0], '#'). // {{
  147. '|'.
  148. preg_quote($this->options['tag_block'][0], '#'). // {%
  149. '|'.
  150. preg_quote($this->options['tag_comment'][0], '#'). // {#
  151. ')('.
  152. preg_quote($this->options['whitespace_trim'], '#'). // -
  153. '|'.
  154. preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  155. ')?
  156. }sx',
  157. 'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  158. 'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  159. ];
  160. $this->isInitialized = true;
  161. }
  162. public function tokenize(Source $source): TokenStream
  163. {
  164. $this->initialize();
  165. $this->source = $source;
  166. $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
  167. $this->cursor = 0;
  168. $this->lineno = 1;
  169. $this->end = \strlen($this->code);
  170. $this->tokens = [];
  171. $this->state = self::STATE_DATA;
  172. $this->states = [];
  173. $this->brackets = [];
  174. $this->position = -1;
  175. // find all token starts in one go
  176. preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, \PREG_OFFSET_CAPTURE);
  177. $this->positions = $matches;
  178. while ($this->cursor < $this->end) {
  179. // dispatch to the lexing functions depending
  180. // on the current state
  181. switch ($this->state) {
  182. case self::STATE_DATA:
  183. $this->lexData();
  184. break;
  185. case self::STATE_BLOCK:
  186. $this->lexBlock();
  187. break;
  188. case self::STATE_VAR:
  189. $this->lexVar();
  190. break;
  191. case self::STATE_STRING:
  192. $this->lexString();
  193. break;
  194. case self::STATE_INTERPOLATION:
  195. $this->lexInterpolation();
  196. break;
  197. }
  198. }
  199. $this->pushToken(Token::EOF_TYPE);
  200. if ($this->brackets) {
  201. [$expect, $lineno] = array_pop($this->brackets);
  202. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  203. }
  204. return new TokenStream($this->tokens, $this->source);
  205. }
  206. private function lexData(): void
  207. {
  208. // if no matches are left we return the rest of the template as simple text token
  209. if ($this->position == \count($this->positions[0]) - 1) {
  210. $this->pushToken(Token::TEXT_TYPE, substr($this->code, $this->cursor));
  211. $this->cursor = $this->end;
  212. return;
  213. }
  214. // Find the first token after the current cursor
  215. $position = $this->positions[0][++$this->position];
  216. while ($position[1] < $this->cursor) {
  217. if ($this->position == \count($this->positions[0]) - 1) {
  218. return;
  219. }
  220. $position = $this->positions[0][++$this->position];
  221. }
  222. // push the template text first
  223. $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
  224. // trim?
  225. if (isset($this->positions[2][$this->position][0])) {
  226. if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  227. // whitespace_trim detected ({%-, {{- or {#-)
  228. $text = rtrim($text);
  229. } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  230. // whitespace_line_trim detected ({%~, {{~ or {#~)
  231. // don't trim \r and \n
  232. $text = rtrim($text, " \t\0\x0B");
  233. }
  234. }
  235. $this->pushToken(Token::TEXT_TYPE, $text);
  236. $this->moveCursor($textContent.$position[0]);
  237. switch ($this->positions[1][$this->position][0]) {
  238. case $this->options['tag_comment'][0]:
  239. $this->lexComment();
  240. break;
  241. case $this->options['tag_block'][0]:
  242. // raw data?
  243. if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
  244. $this->moveCursor($match[0]);
  245. $this->lexRawData();
  246. // {% line \d+ %}
  247. } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
  248. $this->moveCursor($match[0]);
  249. $this->lineno = (int) $match[1];
  250. } else {
  251. $this->pushToken(Token::BLOCK_START_TYPE);
  252. $this->pushState(self::STATE_BLOCK);
  253. $this->currentVarBlockLine = $this->lineno;
  254. }
  255. break;
  256. case $this->options['tag_variable'][0]:
  257. $this->pushToken(Token::VAR_START_TYPE);
  258. $this->pushState(self::STATE_VAR);
  259. $this->currentVarBlockLine = $this->lineno;
  260. break;
  261. }
  262. }
  263. private function lexBlock(): void
  264. {
  265. if (!$this->brackets && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
  266. $this->pushToken(Token::BLOCK_END_TYPE);
  267. $this->moveCursor($match[0]);
  268. $this->popState();
  269. } else {
  270. $this->lexExpression();
  271. }
  272. }
  273. private function lexVar(): void
  274. {
  275. if (!$this->brackets && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
  276. $this->pushToken(Token::VAR_END_TYPE);
  277. $this->moveCursor($match[0]);
  278. $this->popState();
  279. } else {
  280. $this->lexExpression();
  281. }
  282. }
  283. private function lexExpression(): void
  284. {
  285. // whitespace
  286. if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
  287. $this->moveCursor($match[0]);
  288. if ($this->cursor >= $this->end) {
  289. throw new SyntaxError(\sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
  290. }
  291. }
  292. // operators
  293. if (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
  294. $operator = preg_replace('/\s+/', ' ', $match[0]);
  295. if (\in_array($operator, $this->openingBrackets, true)) {
  296. $this->checkBrackets($operator);
  297. }
  298. $this->pushToken(Token::OPERATOR_TYPE, $operator);
  299. $this->moveCursor($match[0]);
  300. }
  301. // names
  302. elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
  303. $this->pushToken(Token::NAME_TYPE, $match[0]);
  304. $this->moveCursor($match[0]);
  305. }
  306. // numbers
  307. elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
  308. $this->pushToken(Token::NUMBER_TYPE, 0 + str_replace('_', '', $match[0]));
  309. $this->moveCursor($match[0]);
  310. }
  311. // punctuation
  312. elseif (str_contains(self::PUNCTUATION, $this->code[$this->cursor])) {
  313. $this->checkBrackets($this->code[$this->cursor]);
  314. $this->pushToken(Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
  315. ++$this->cursor;
  316. }
  317. // strings
  318. elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
  319. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1)));
  320. $this->moveCursor($match[0]);
  321. }
  322. // opening double quoted string
  323. elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  324. $this->brackets[] = ['"', $this->lineno];
  325. $this->pushState(self::STATE_STRING);
  326. $this->moveCursor($match[0]);
  327. }
  328. // inline comment
  329. elseif (preg_match(self::REGEX_INLINE_COMMENT, $this->code, $match, 0, $this->cursor)) {
  330. $this->moveCursor($match[0]);
  331. }
  332. // unlexable
  333. else {
  334. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  335. }
  336. }
  337. private function stripcslashes(string $str, string $quoteType): string
  338. {
  339. $result = '';
  340. $length = \strlen($str);
  341. $i = 0;
  342. while ($i < $length) {
  343. if (false === $pos = strpos($str, '\\', $i)) {
  344. $result .= substr($str, $i);
  345. break;
  346. }
  347. $result .= substr($str, $i, $pos - $i);
  348. $i = $pos + 1;
  349. if ($i >= $length) {
  350. $result .= '\\';
  351. break;
  352. }
  353. $nextChar = $str[$i];
  354. if (isset(self::SPECIAL_CHARS[$nextChar])) {
  355. $result .= self::SPECIAL_CHARS[$nextChar];
  356. } elseif ('\\' === $nextChar) {
  357. $result .= $nextChar;
  358. } elseif ("'" === $nextChar || '"' === $nextChar) {
  359. if ($nextChar !== $quoteType) {
  360. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  361. }
  362. $result .= $nextChar;
  363. } elseif ('#' === $nextChar && $i + 1 < $length && '{' === $str[$i + 1]) {
  364. $result .= '#{';
  365. ++$i;
  366. } elseif ('x' === $nextChar && $i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  367. $hex = $str[++$i];
  368. if ($i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  369. $hex .= $str[++$i];
  370. }
  371. $result .= \chr(hexdec($hex));
  372. } elseif (ctype_digit($nextChar) && $nextChar < '8') {
  373. $octal = $nextChar;
  374. while ($i + 1 < $length && ctype_digit($str[$i + 1]) && $str[$i + 1] < '8' && \strlen($octal) < 3) {
  375. $octal .= $str[++$i];
  376. }
  377. $result .= \chr(octdec($octal));
  378. } else {
  379. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  380. $result .= $nextChar;
  381. }
  382. ++$i;
  383. }
  384. return $result;
  385. }
  386. private function lexRawData(): void
  387. {
  388. if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  389. throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source);
  390. }
  391. $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
  392. $this->moveCursor($text.$match[0][0]);
  393. // trim?
  394. if (isset($match[1][0])) {
  395. if ($this->options['whitespace_trim'] === $match[1][0]) {
  396. // whitespace_trim detected ({%-, {{- or {#-)
  397. $text = rtrim($text);
  398. } else {
  399. // whitespace_line_trim detected ({%~, {{~ or {#~)
  400. // don't trim \r and \n
  401. $text = rtrim($text, " \t\0\x0B");
  402. }
  403. }
  404. $this->pushToken(Token::TEXT_TYPE, $text);
  405. }
  406. private function lexComment(): void
  407. {
  408. if (!preg_match($this->regexes['lex_comment'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  409. throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
  410. }
  411. $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
  412. }
  413. private function lexString(): void
  414. {
  415. if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
  416. $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  417. $this->pushToken(Token::INTERPOLATION_START_TYPE);
  418. $this->moveCursor($match[0]);
  419. $this->pushState(self::STATE_INTERPOLATION);
  420. } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) {
  421. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes($match[0], '"'));
  422. $this->moveCursor($match[0]);
  423. } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  424. [$expect, $lineno] = array_pop($this->brackets);
  425. if ('"' != $this->code[$this->cursor]) {
  426. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  427. }
  428. $this->popState();
  429. ++$this->cursor;
  430. } else {
  431. // unlexable
  432. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  433. }
  434. }
  435. private function lexInterpolation(): void
  436. {
  437. $bracket = end($this->brackets);
  438. if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
  439. array_pop($this->brackets);
  440. $this->pushToken(Token::INTERPOLATION_END_TYPE);
  441. $this->moveCursor($match[0]);
  442. $this->popState();
  443. } else {
  444. $this->lexExpression();
  445. }
  446. }
  447. private function pushToken($type, $value = ''): void
  448. {
  449. // do not push empty text tokens
  450. if (Token::TEXT_TYPE === $type && '' === $value) {
  451. return;
  452. }
  453. $this->tokens[] = new Token($type, $value, $this->lineno);
  454. }
  455. private function moveCursor($text): void
  456. {
  457. $this->cursor += \strlen($text);
  458. $this->lineno += substr_count($text, "\n");
  459. }
  460. private function getOperatorRegex(): string
  461. {
  462. $expressionParsers = [];
  463. foreach ($this->env->getExpressionParsers() as $expressionParser) {
  464. $expressionParsers = array_merge($expressionParsers, ExpressionParsers::getOperatorTokensFor($expressionParser));
  465. }
  466. $expressionParsers = array_combine($expressionParsers, array_map('strlen', $expressionParsers));
  467. arsort($expressionParsers);
  468. $regex = [];
  469. foreach ($expressionParsers as $expressionParser => $length) {
  470. // an operator that ends with a character must be followed by
  471. // a whitespace, a parenthesis, an opening map [ or sequence {
  472. $r = preg_quote($expressionParser, '/');
  473. if (ctype_alpha($expressionParser[$length - 1])) {
  474. $r .= '(?=[\s()\[{])';
  475. }
  476. // an operator that begins with a character must not have a dot or pipe before
  477. if (ctype_alpha($expressionParser[0])) {
  478. $r = '(?<![\.\|]\s|.[\.\|])'.$r;
  479. }
  480. // an operator with a space can be any amount of whitespaces
  481. $r = preg_replace('/\s+/', '\s+', $r);
  482. $regex[] = $r;
  483. }
  484. return '/'.implode('|', $regex).'/A';
  485. }
  486. private function pushState($state): void
  487. {
  488. $this->states[] = $this->state;
  489. $this->state = $state;
  490. }
  491. private function popState(): void
  492. {
  493. if (0 === \count($this->states)) {
  494. throw new \LogicException('Cannot pop state without a previous state.');
  495. }
  496. $this->state = array_pop($this->states);
  497. }
  498. private function checkBrackets(string $code): void
  499. {
  500. // opening bracket
  501. if (\in_array($code, $this->openingBrackets, true)) {
  502. $this->brackets[] = [$code, $this->lineno];
  503. } elseif (\in_array($code, $this->closingBrackets, true)) {
  504. // closing bracket
  505. if (!$this->brackets) {
  506. throw new SyntaxError(\sprintf('Unexpected "%s".', $code), $this->lineno, $this->source);
  507. }
  508. [$expect, $lineno] = array_pop($this->brackets);
  509. if ($code !== str_replace($this->openingBrackets, $this->closingBrackets, $expect)) {
  510. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  511. }
  512. }
  513. }
  514. }