@@ -60,6 +60,13 @@ final class WithStatement extends Statement
6060 /** @var WithKeyword[] */
6161 public $ withers = [];
6262
63+ /**
64+ * holds the CTE parser.
65+ *
66+ * @var Parser|null
67+ */
68+ public $ cteStatementParser ;
69+
6370 /**
6471 * @param Parser $parser the instance that requests parsing
6572 * @param TokensList $list the list of tokens to be parsed
@@ -68,26 +75,34 @@ final class WithStatement extends Statement
6875 */
6976 public function parse (Parser $ parser , TokensList $ list )
7077 {
71- ++$ list ->idx ; // Skipping `WITH`.
72-
73- // parse any options if provided
74- $ this ->options = OptionsArray::parse ($ parser , $ list , static ::$ OPTIONS );
75- ++$ list ->idx ;
76-
7778 /**
7879 * The state of the parser.
7980 *
8081 * Below are the states of the parser.
8182 *
8283 * 0 ---------------- [ name ] -----------------> 1
83- * 1 -------------- [( columns )] AS ----------------> 2
84- * 2 ------------------ [ , ] --------------------> 0
84+ *
85+ * 1 ------------------ [ ( ] ------------------> 2
86+ *
87+ * 2 ------------------ [ AS ] -----------------> 3
88+ *
89+ * 3 ------------------ [ ( ] ------------------> 4
90+ *
91+ * 4 ------------------ [ , ] ------------------> 1
92+ *
93+ * 4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5
8594 *
8695 * @var int
8796 */
8897 $ state = 0 ;
8998 $ wither = null ;
9099
100+ ++$ list ->idx ; // Skipping `WITH`.
101+
102+ // parse any options if provided
103+ $ this ->options = OptionsArray::parse ($ parser , $ list , static ::$ OPTIONS );
104+ ++$ list ->idx ;
105+
91106 for (; $ list ->idx < $ list ->count ; ++$ list ->idx ) {
92107 /**
93108 * Token parsed at this moment.
@@ -99,57 +114,165 @@ public function parse(Parser $parser, TokensList $list)
99114 continue ;
100115 }
101116
102- if ($ token ->type === Token::TYPE_NONE ) {
117+ if ($ state === 0 ) {
118+ if ($ token ->type !== Token::TYPE_NONE ) {
119+ $ parser ->error ('The name of the CTE was expected. ' , $ token );
120+ break ;
121+ }
122+
103123 $ wither = $ token ->value ;
104124 $ this ->withers [$ wither ] = new WithKeyword ($ wither );
105125 $ state = 1 ;
106- continue ;
107- }
108-
109- if ($ state === 1 ) {
110- if ($ token ->value === '( ' ) {
126+ } elseif ($ state === 1 ) {
127+ if ($ token ->type === Token::TYPE_OPERATOR && $ token ->value === '( ' ) {
111128 $ this ->withers [$ wither ]->columns = Array2d::parse ($ parser , $ list );
112- continue ;
113- }
114-
115- if ($ token ->keyword === 'AS ' ) {
116- ++$ list ->idx ;
117129 $ state = 2 ;
118- continue ;
130+ } elseif ($ token ->type === Token::TYPE_KEYWORD && $ token ->keyword === 'AS ' ) {
131+ $ state = 3 ;
132+ } else {
133+ $ parser ->error ('Unexpected token. ' , $ token );
134+ break ;
119135 }
120136 } elseif ($ state === 2 ) {
121- if ($ token ->value === '( ' ) {
122- ++$ list ->idx ;
123- $ subList = $ this ->getSubTokenList ($ list );
124- if ($ subList instanceof ParserException) {
125- $ parser ->errors [] = $ subList ;
126- continue ;
127- }
137+ if (! ($ token ->type === Token::TYPE_KEYWORD && $ token ->keyword === 'AS ' )) {
138+ $ parser ->error ('AS keyword was expected. ' , $ token );
139+ break ;
140+ }
141+
142+ $ state = 3 ;
143+ } elseif ($ state === 3 ) {
144+ $ idxBeforeGetNext = $ list ->idx ;
128145
129- $ subParser = new Parser ($ subList );
146+ // We want to get the next non-comment and non-space token after $token
147+ // therefore, the first getNext call will start with the current $idx which's $token,
148+ // will return it and increase $idx by 1, which's not guaranteed to be non-comment
149+ // and non-space, that's why we're calling getNext again.
150+ $ list ->getNext ();
151+ $ nextKeyword = $ list ->getNext ();
130152
131- if (count ($ subParser ->errors )) {
132- foreach ($ subParser ->errors as $ error ) {
133- $ parser ->errors [] = $ error ;
134- }
153+ if (! ($ token ->value === '( ' && ($ nextKeyword && $ nextKeyword ->value === 'SELECT ' ))) {
154+ $ parser ->error ('Subquery of the CTE was expected. ' , $ token );
155+ $ list ->idx = $ idxBeforeGetNext ;
156+ break ;
157+ }
158+
159+ // Restore the index
160+ $ list ->idx = $ idxBeforeGetNext ;
161+
162+ ++$ list ->idx ;
163+ $ subList = $ this ->getSubTokenList ($ list );
164+ if ($ subList instanceof ParserException) {
165+ $ parser ->errors [] = $ subList ;
166+ break ;
167+ }
168+
169+ $ subParser = new Parser ($ subList );
170+
171+ if (count ($ subParser ->errors )) {
172+ foreach ($ subParser ->errors as $ error ) {
173+ $ parser ->errors [] = $ error ;
135174 }
136175
137- $ this ->withers [$ wither ]->statement = $ subParser ;
138- continue ;
176+ break ;
139177 }
140178
141- // There's another WITH expression to parse, go back to state=0
179+ $ this ->withers [$ wither ]->statement = $ subParser ;
180+
181+ $ state = 4 ;
182+ } elseif ($ state === 4 ) {
142183 if ($ token ->value === ', ' ) {
143- $ list -> idx ++;
184+ // There's another WITH expression to parse, go back to state=0
144185 $ state = 0 ;
145186 continue ;
146187 }
147188
148- // No more WITH expressions, we're done with this statement
189+ if (
190+ $ token ->type === Token::TYPE_KEYWORD && (
191+ $ token ->value === 'SELECT '
192+ || $ token ->value === 'INSERT '
193+ || $ token ->value === 'UPDATE '
194+ || $ token ->value === 'DELETE '
195+ )
196+ ) {
197+ $ state = 5 ;
198+ --$ list ->idx ;
199+ continue ;
200+ }
201+
202+ $ parser ->error ('An expression was expected. ' , $ token );
203+ break ;
204+ } elseif ($ state === 5 ) {
205+ /**
206+ * We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression
207+ * which's mostly is SELECT, or INSERT, UPDATE, or delete statement.
208+ * e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3.
209+ * The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query
210+ * not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression
211+ * to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead.
212+ */
213+
214+ // Index of the last parsed token by default would be the last token in the $list, because we're
215+ // assuming that all remaining tokens at state 4, are related to the expression.
216+ $ idxOfLastParsedToken = $ list ->count - 1 ;
217+ // Index before search to be able to restore the index.
218+ $ idxBeforeSearch = $ list ->idx ;
219+ // Length of expression tokens is null by default, in order for the $subList to start
220+ // from $list->idx to the end of the $list.
221+ $ lengthOfExpressionTokens = null ;
222+
223+ if ($ list ->getNextOfTypeAndValue (Token::TYPE_KEYWORD , 'ON ' )) {
224+ // (-1) because getNextOfTypeAndValue returned ON and increased the index.
225+ $ idxOfOn = $ list ->idx - 1 ;
226+ // We want to make sure that it's `ON DUPLICATE KEY UPDATE`
227+ $ dubplicateToken = $ list ->getNext ();
228+ $ keyToken = $ list ->getNext ();
229+ $ updateToken = $ list ->getNext ();
230+ if (
231+ $ dubplicateToken && $ dubplicateToken ->keyword === 'DUPLICATE '
232+ && ($ keyToken && $ keyToken ->keyword === 'KEY ' )
233+ && ($ updateToken && $ updateToken ->keyword === 'UPDATE ' )
234+ ) {
235+ // Index of the last parsed token will be the token before the ON Keyword
236+ $ idxOfLastParsedToken = $ idxOfOn - 1 ;
237+ // The length of the expression tokens would be the difference
238+ // between the first unrelated token `ON` and the idx
239+ // before skipping the CTE tokens.
240+ $ lengthOfExpressionTokens = $ idxOfOn - $ idxBeforeSearch ;
241+ }
242+ }
243+
244+ // Restore the index
245+ $ list ->idx = $ idxBeforeSearch ;
246+
247+ $ subList = new TokensList (array_slice ($ list ->tokens , $ list ->idx , $ lengthOfExpressionTokens ));
248+ $ subParser = new Parser ($ subList );
249+ if (count ($ subParser ->errors )) {
250+ foreach ($ subParser ->errors as $ error ) {
251+ $ parser ->errors [] = $ error ;
252+ }
253+
254+ break ;
255+ }
256+
257+ $ this ->cteStatementParser = $ subParser ;
258+
259+ $ list ->idx = $ idxOfLastParsedToken ;
149260 break ;
150261 }
151262 }
152263
264+ // 5 is the only valid end state
265+ if ($ state !== 5 ) {
266+ /**
267+ * Token parsed at this moment.
268+ *
269+ * @var Token
270+ */
271+ $ token = $ list ->tokens [$ list ->idx ];
272+
273+ $ parser ->error ('Unexpected end of the WITH CTE. ' , $ token );
274+ }
275+
153276 --$ list ->idx ;
154277 }
155278
@@ -165,6 +288,14 @@ public function build()
165288 $ str .= WithKeyword::build ($ wither );
166289 }
167290
291+ $ str .= ' ' ;
292+
293+ if ($ this ->cteStatementParser ) {
294+ foreach ($ this ->cteStatementParser ->statements as $ statement ) {
295+ $ str .= $ statement ->build ();
296+ }
297+ }
298+
168299 return $ str ;
169300 }
170301
0 commit comments