Skip to content

Commit 403e38d

Browse files
Merge pull request #363 from iifawzi/support-with-statements
Completing, refactoring and improving the `WITH` statements parser
2 parents dbc80de + c8f72ec commit 403e38d

40 files changed

+33623
-719
lines changed

‎src/Statements/CreateStatement.php‎

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,17 @@ class CreateStatement extends Statement
310310
*/
311311
public $fields;
312312

313+
/**
314+
* If `CREATE TABLE WITH`.
315+
* If `CREATE TABLE AS WITH`.
316+
* If `CREATE VIEW AS WITH`.
317+
*
318+
* Used by `CREATE TABLE`, `CREATE VIEW`
319+
*
320+
* @var WithStatement|null
321+
*/
322+
public $with;
323+
313324
/**
314325
* If `CREATE TABLE ... SELECT`.
315326
* If `CREATE VIEW AS ` ... SELECT`.
@@ -436,6 +447,13 @@ public function build()
436447
. Expression::build($this->like);
437448
}
438449

450+
if ($this->with !== null) {
451+
return 'CREATE '
452+
. OptionsArray::build($this->options) . ' '
453+
. Expression::build($this->name) . ' '
454+
. $this->with->build();
455+
}
456+
439457
$partition = '';
440458

441459
if (! empty($this->partitionBy)) {
@@ -465,10 +483,17 @@ public function build()
465483
. OptionsArray::build($this->entityOptions)
466484
. $partition;
467485
} elseif ($this->options->has('VIEW')) {
486+
$builtStatement = '';
487+
if ($this->select !== null) {
488+
$builtStatement = $this->select->build();
489+
} elseif ($this->with !== null) {
490+
$builtStatement = $this->with->build();
491+
}
492+
468493
return 'CREATE '
469494
. OptionsArray::build($this->options) . ' '
470495
. Expression::build($this->name) . ' '
471-
. $fields . ' AS ' . ($this->select ? $this->select->build() : '')
496+
. $fields . ' AS ' . $builtStatement
472497
. (! empty($this->body) ? TokensList::build($this->body) : '') . ' '
473498
. OptionsArray::build($this->entityOptions);
474499
} elseif ($this->options->has('TRIGGER')) {
@@ -544,14 +569,22 @@ public function parse(Parser $parser, TokensList $list)
544569
if (($token->type === Token::TYPE_KEYWORD) && ($token->keyword === 'SELECT')) {
545570
/* CREATE TABLE ... SELECT */
546571
$this->select = new SelectStatement($parser, $list);
572+
} elseif ($token->type === Token::TYPE_KEYWORD && ($token->keyword === 'WITH')) {
573+
/* CREATE TABLE WITH */
574+
$this->with = new WithStatement($parser, $list);
547575
} elseif (
548576
($token->type === Token::TYPE_KEYWORD) && ($token->keyword === 'AS')
549577
&& ($list->tokens[$nextidx]->type === Token::TYPE_KEYWORD)
550-
&& ($list->tokens[$nextidx]->value === 'SELECT')
551578
) {
552-
/* CREATE TABLE ... AS SELECT */
553-
$list->idx = $nextidx;
554-
$this->select = new SelectStatement($parser, $list);
579+
if ($list->tokens[$nextidx]->value === 'SELECT') {
580+
/* CREATE TABLE ... AS SELECT */
581+
$list->idx = $nextidx;
582+
$this->select = new SelectStatement($parser, $list);
583+
} elseif ($list->tokens[$nextidx]->value === 'WITH') {
584+
/* CREATE TABLE WITH */
585+
$list->idx = $nextidx;
586+
$this->with = new WithStatement($parser, $list);
587+
}
555588
} elseif ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'LIKE') {
556589
/* CREATE TABLE `new_tbl` LIKE 'orig_tbl' */
557590
$list->idx = $nextidx;
@@ -707,10 +740,14 @@ public function parse(Parser $parser, TokensList $list)
707740
$token->type === Token::TYPE_KEYWORD
708741
&& $token->keyword === 'AS'
709742
&& $list->tokens[$nextidx]->type === Token::TYPE_KEYWORD
710-
&& $list->tokens[$nextidx]->value === 'SELECT'
711743
) {
712-
$list->idx = $nextidx;
713-
$this->select = new SelectStatement($parser, $list);
744+
if ($list->tokens[$nextidx]->value === 'SELECT') {
745+
$list->idx = $nextidx;
746+
$this->select = new SelectStatement($parser, $list);
747+
} elseif ($list->tokens[$nextidx]->value === 'WITH') {
748+
++$list->idx;
749+
$this->with = new WithStatement($parser, $list);
750+
}
714751
}
715752

716753
// Parsing all other tokens

‎src/Statements/InsertStatement.php‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ class InsertStatement extends Statement
9898
*/
9999
public $select;
100100

101+
/**
102+
* If WITH CTE is present
103+
* holds the WithStatement.
104+
*
105+
* @var WithStatement|null
106+
*/
107+
public $with;
108+
101109
/**
102110
* If ON DUPLICATE KEY UPDATE clause is present
103111
* holds the SetOperation.
@@ -208,6 +216,8 @@ public function parse(Parser $parser, TokensList $list)
208216
$this->set = SetOperation::parse($parser, $list);
209217
} elseif ($token->keyword === 'SELECT') {
210218
$this->select = new SelectStatement($parser, $list);
219+
} elseif ($token->keyword === 'WITH') {
220+
$this->with = new WithStatement($parser, $list);
211221
} else {
212222
$parser->error('Unexpected keyword.', $token);
213223
break;

‎src/Statements/WithStatement.php‎

Lines changed: 168 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ final class WithStatement extends Statement
6060
/** @var WithKeyword[] */
6161
public $withers = [];
6262

63+
/**
64+
* holds the CTE parser.
65+
*
66+
* @var Parser|null
67+
*/
68+
public $cteStatementParser;
69+
6370
/**
6471
* @param Parser $parser the instance that requests parsing
6572
* @param TokensList $list the list of tokens to be parsed
@@ -68,26 +75,34 @@ final class WithStatement extends Statement
6875
*/
6976
public function parse(Parser $parser, TokensList $list)
7077
{
71-
++$list->idx; // Skipping `WITH`.
72-
73-
// parse any options if provided
74-
$this->options = OptionsArray::parse($parser, $list, static::$OPTIONS);
75-
++$list->idx;
76-
7778
/**
7879
* The state of the parser.
7980
*
8081
* Below are the states of the parser.
8182
*
8283
* 0 ---------------- [ name ] -----------------> 1
83-
* 1 -------------- [( columns )] AS ----------------> 2
84-
* 2 ------------------ [ , ] --------------------> 0
84+
*
85+
* 1 ------------------ [ ( ] ------------------> 2
86+
*
87+
* 2 ------------------ [ AS ] -----------------> 3
88+
*
89+
* 3 ------------------ [ ( ] ------------------> 4
90+
*
91+
* 4 ------------------ [ , ] ------------------> 1
92+
*
93+
* 4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5
8594
*
8695
* @var int
8796
*/
8897
$state = 0;
8998
$wither = null;
9099

100+
++$list->idx; // Skipping `WITH`.
101+
102+
// parse any options if provided
103+
$this->options = OptionsArray::parse($parser, $list, static::$OPTIONS);
104+
++$list->idx;
105+
91106
for (; $list->idx < $list->count; ++$list->idx) {
92107
/**
93108
* Token parsed at this moment.
@@ -99,57 +114,165 @@ public function parse(Parser $parser, TokensList $list)
99114
continue;
100115
}
101116

102-
if ($token->type === Token::TYPE_NONE) {
117+
if ($state === 0) {
118+
if ($token->type !== Token::TYPE_NONE) {
119+
$parser->error('The name of the CTE was expected.', $token);
120+
break;
121+
}
122+
103123
$wither = $token->value;
104124
$this->withers[$wither] = new WithKeyword($wither);
105125
$state = 1;
106-
continue;
107-
}
108-
109-
if ($state === 1) {
110-
if ($token->value === '(') {
126+
} elseif ($state === 1) {
127+
if ($token->type === Token::TYPE_OPERATOR && $token->value === '(') {
111128
$this->withers[$wither]->columns = Array2d::parse($parser, $list);
112-
continue;
113-
}
114-
115-
if ($token->keyword === 'AS') {
116-
++$list->idx;
117129
$state = 2;
118-
continue;
130+
} elseif ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS') {
131+
$state = 3;
132+
} else {
133+
$parser->error('Unexpected token.', $token);
134+
break;
119135
}
120136
} elseif ($state === 2) {
121-
if ($token->value === '(') {
122-
++$list->idx;
123-
$subList = $this->getSubTokenList($list);
124-
if ($subList instanceof ParserException) {
125-
$parser->errors[] = $subList;
126-
continue;
127-
}
137+
if (! ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS')) {
138+
$parser->error('AS keyword was expected.', $token);
139+
break;
140+
}
141+
142+
$state = 3;
143+
} elseif ($state === 3) {
144+
$idxBeforeGetNext = $list->idx;
128145

129-
$subParser = new Parser($subList);
146+
// We want to get the next non-comment and non-space token after $token
147+
// therefore, the first getNext call will start with the current $idx which's $token,
148+
// will return it and increase $idx by 1, which's not guaranteed to be non-comment
149+
// and non-space, that's why we're calling getNext again.
150+
$list->getNext();
151+
$nextKeyword = $list->getNext();
130152

131-
if (count($subParser->errors)) {
132-
foreach ($subParser->errors as $error) {
133-
$parser->errors[] = $error;
134-
}
153+
if (! ($token->value === '(' && ($nextKeyword && $nextKeyword->value === 'SELECT'))) {
154+
$parser->error('Subquery of the CTE was expected.', $token);
155+
$list->idx = $idxBeforeGetNext;
156+
break;
157+
}
158+
159+
// Restore the index
160+
$list->idx = $idxBeforeGetNext;
161+
162+
++$list->idx;
163+
$subList = $this->getSubTokenList($list);
164+
if ($subList instanceof ParserException) {
165+
$parser->errors[] = $subList;
166+
break;
167+
}
168+
169+
$subParser = new Parser($subList);
170+
171+
if (count($subParser->errors)) {
172+
foreach ($subParser->errors as $error) {
173+
$parser->errors[] = $error;
135174
}
136175

137-
$this->withers[$wither]->statement = $subParser;
138-
continue;
176+
break;
139177
}
140178

141-
// There's another WITH expression to parse, go back to state=0
179+
$this->withers[$wither]->statement = $subParser;
180+
181+
$state = 4;
182+
} elseif ($state === 4) {
142183
if ($token->value === ',') {
143-
$list->idx++;
184+
// There's another WITH expression to parse, go back to state=0
144185
$state = 0;
145186
continue;
146187
}
147188

148-
// No more WITH expressions, we're done with this statement
189+
if (
190+
$token->type === Token::TYPE_KEYWORD && (
191+
$token->value === 'SELECT'
192+
|| $token->value === 'INSERT'
193+
|| $token->value === 'UPDATE'
194+
|| $token->value === 'DELETE'
195+
)
196+
) {
197+
$state = 5;
198+
--$list->idx;
199+
continue;
200+
}
201+
202+
$parser->error('An expression was expected.', $token);
203+
break;
204+
} elseif ($state === 5) {
205+
/**
206+
* We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression
207+
* which's mostly is SELECT, or INSERT, UPDATE, or delete statement.
208+
* e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3.
209+
* The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query
210+
* not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression
211+
* to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead.
212+
*/
213+
214+
// Index of the last parsed token by default would be the last token in the $list, because we're
215+
// assuming that all remaining tokens at state 4, are related to the expression.
216+
$idxOfLastParsedToken = $list->count - 1;
217+
// Index before search to be able to restore the index.
218+
$idxBeforeSearch = $list->idx;
219+
// Length of expression tokens is null by default, in order for the $subList to start
220+
// from $list->idx to the end of the $list.
221+
$lengthOfExpressionTokens = null;
222+
223+
if ($list->getNextOfTypeAndValue(Token::TYPE_KEYWORD, 'ON')) {
224+
// (-1) because getNextOfTypeAndValue returned ON and increased the index.
225+
$idxOfOn = $list->idx - 1;
226+
// We want to make sure that it's `ON DUPLICATE KEY UPDATE`
227+
$dubplicateToken = $list->getNext();
228+
$keyToken = $list->getNext();
229+
$updateToken = $list->getNext();
230+
if (
231+
$dubplicateToken && $dubplicateToken->keyword === 'DUPLICATE'
232+
&& ($keyToken && $keyToken->keyword === 'KEY')
233+
&& ($updateToken && $updateToken->keyword === 'UPDATE')
234+
) {
235+
// Index of the last parsed token will be the token before the ON Keyword
236+
$idxOfLastParsedToken = $idxOfOn - 1;
237+
// The length of the expression tokens would be the difference
238+
// between the first unrelated token `ON` and the idx
239+
// before skipping the CTE tokens.
240+
$lengthOfExpressionTokens = $idxOfOn - $idxBeforeSearch;
241+
}
242+
}
243+
244+
// Restore the index
245+
$list->idx = $idxBeforeSearch;
246+
247+
$subList = new TokensList(array_slice($list->tokens, $list->idx, $lengthOfExpressionTokens));
248+
$subParser = new Parser($subList);
249+
if (count($subParser->errors)) {
250+
foreach ($subParser->errors as $error) {
251+
$parser->errors[] = $error;
252+
}
253+
254+
break;
255+
}
256+
257+
$this->cteStatementParser = $subParser;
258+
259+
$list->idx = $idxOfLastParsedToken;
149260
break;
150261
}
151262
}
152263

264+
// 5 is the only valid end state
265+
if ($state !== 5) {
266+
/**
267+
* Token parsed at this moment.
268+
*
269+
* @var Token
270+
*/
271+
$token = $list->tokens[$list->idx];
272+
273+
$parser->error('Unexpected end of the WITH CTE.', $token);
274+
}
275+
153276
--$list->idx;
154277
}
155278

@@ -165,6 +288,14 @@ public function build()
165288
$str .= WithKeyword::build($wither);
166289
}
167290

291+
$str .= ' ';
292+
293+
if ($this->cteStatementParser) {
294+
foreach ($this->cteStatementParser->statements as $statement) {
295+
$str .= $statement->build();
296+
}
297+
}
298+
168299
return $str;
169300
}
170301

0 commit comments

Comments
 (0)