Skip to content

Commit d8a3315

Browse files
authored
Update the PHP lexer (#1206)
This updates the PHP lexer from Pygments. Namely, it adds support for [attributes](https://www.php.net/manual/en/language.attributes.overview.php). I've preserved the `_` handling in numbers which is not available in Pygments and I've also improved it a bit.
1 parent 353c35b commit d8a3315

1 file changed

Lines changed: 53 additions & 7 deletions

File tree

‎lexers/embedded/php.xml‎

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
<rule pattern="\\([nrt&#34;$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})">
5555
<token type="LiteralStringEscape"/>
5656
</rule>
57-
<rule pattern="\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|-&gt;(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?">
57+
<rule pattern="\$(?:[_a-z]|[^\x00-\x7f])(?:\w|[^\x00-\x7f])*(\[\S+?\]|-&gt;(?:[_a-z]|[^\x00-\x7f])(?:\w|[^\x00-\x7f])*)?">
5858
<token type="LiteralStringInterpol"/>
5959
</rule>
6060
<rule pattern="(\{\$\{)(.*?)(\}\})">
@@ -82,12 +82,46 @@
8282
<token type="LiteralStringDouble"/>
8383
</rule>
8484
</state>
85+
<state name="variablevariable">
86+
<rule pattern="\}">
87+
<token type="NameVariable"/>
88+
<pop depth="1"/>
89+
</rule>
90+
<rule>
91+
<include state="root"/>
92+
</rule>
93+
</state>
94+
<state name="attribute">
95+
<rule pattern="\]">
96+
<token type="Punctuation"/>
97+
<pop depth="1"/>
98+
</rule>
99+
<rule pattern="\(">
100+
<token type="Punctuation"/>
101+
<push state="attributeparams"/>
102+
</rule>
103+
<rule pattern="(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*">
104+
<token type="NameDecorator"/>
105+
</rule>
106+
<rule>
107+
<include state="root"/>
108+
</rule>
109+
</state>
110+
<state name="attributeparams">
111+
<rule pattern="\)">
112+
<token type="Punctuation"/>
113+
<pop depth="1"/>
114+
</rule>
115+
<rule>
116+
<include state="root"/>
117+
</rule>
118+
</state>
85119
<state name="root">
86120
<rule pattern="\?&gt;">
87121
<token type="CommentPreproc"/>
88122
<pop depth="1"/>
89123
</rule>
90-
<rule pattern="(&lt;&lt;&lt;)([\&#39;&#34;]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)">
124+
<rule pattern="(&lt;&lt;&lt;)([\&#39;&#34;]?)((?:[_a-z]|[^\x00-\x7f])(?:\w|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)">
91125
<bygroups>
92126
<token type="LiteralString"/>
93127
<token type="LiteralString"/>
@@ -101,6 +135,10 @@
101135
<rule pattern="\s+">
102136
<token type="Text"/>
103137
</rule>
138+
<rule pattern="#\[">
139+
<token type="Punctuation"/>
140+
<push state="attribute"/>
141+
</rule>
104142
<rule pattern="#.*?\n">
105143
<token type="CommentSingle"/>
106144
</rule>
@@ -116,7 +154,7 @@
116154
<rule pattern="/\*.*?\*/">
117155
<token type="CommentMultiline"/>
118156
</rule>
119-
<rule pattern="(-&gt;|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)">
157+
<rule pattern="(-&gt;|::)(\s*)((?:[_a-z]|[^\x00-\x7f])(?:\w|[^\x00-\x7f])*)">
120158
<bygroups>
121159
<token type="Operator"/>
122160
<token type="Text"/>
@@ -132,6 +170,13 @@
132170
<rule pattern="[\[\]{}();,]+">
133171
<token type="Punctuation"/>
134172
</rule>
173+
<rule pattern="(new)(\s+)(class)\b">
174+
<bygroups>
175+
<token type="Keyword"/>
176+
<token type="Text"/>
177+
<token type="Keyword"/>
178+
</bygroups>
179+
</rule>
135180
<rule pattern="(class)(\s+)">
136181
<bygroups>
137182
<token type="Keyword"/>
@@ -161,7 +206,7 @@
161206
<token type="NameConstant"/>
162207
</bygroups>
163208
</rule>
164-
<rule pattern="(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b">
209+
<rule pattern="(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally|match)\b">
165210
<token type="Keyword"/>
166211
</rule>
167212
<rule pattern="(true|false|null)\b">
@@ -170,8 +215,9 @@
170215
<rule>
171216
<include state="magicconstants"/>
172217
</rule>
173-
<rule pattern="\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}">
218+
<rule pattern="\$\{">
174219
<token type="NameVariable"/>
220+
<push state="variablevariable"/>
175221
</rule>
176222
<rule pattern="\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*">
177223
<token type="NameVariable"/>
@@ -185,7 +231,7 @@
185231
<rule pattern="\d+e[+-]?[0-9]+">
186232
<token type="LiteralNumberFloat"/>
187233
</rule>
188-
<rule pattern="0[0-7]+">
234+
<rule pattern="0o?[0-7_]+">
189235
<token type="LiteralNumberOct"/>
190236
</rule>
191237
<rule pattern="0x[a-f0-9_]+">
@@ -194,7 +240,7 @@
194240
<rule pattern="\d[\d_]*">
195241
<token type="LiteralNumberInteger"/>
196242
</rule>
197-
<rule pattern="0b[01]+">
243+
<rule pattern="0b[01_]+">
198244
<token type="LiteralNumberBin"/>
199245
</rule>
200246
<rule pattern="&#39;([^&#39;\\]*(?:\\.[^&#39;\\]*)*)&#39;">

0 commit comments

Comments
 (0)