@@ -78,7 +78,8 @@ static const char _codes[26] =
7878};
7979
8080
81- #define ENCODE (c ) (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)
81+ /* Note: these macros require an uppercase letter input! */
82+ #define ENCODE (c ) (isalpha(c) ? _codes[((c) - 'A')] : 0)
8283
8384#define isvowel (c ) (ENCODE(c) & 1) /* AEIOU */
8485
@@ -101,16 +102,19 @@ static const char _codes[26] =
101102/* I suppose I could have been using a character pointer instead of
102103 * accesssing the array directly... */
103104
105+ #define Convert_Raw (c ) toupper(c)
104106/* Look at the next letter in the word */
105- #define Next_Letter (toupper(word[w_idx+1]))
107+ #define Read_Raw_Next_Letter (word[w_idx+1])
108+ #define Read_Next_Letter (Convert_Raw(Read_Raw_Next_Letter))
106109/* Look at the current letter in the word */
107- #define Curr_Letter (toupper(word[w_idx]))
110+ #define Read_Raw_Curr_Letter (word[w_idx])
111+ #define Read_Curr_Letter (Convert_Raw(Read_Raw_Curr_Letter))
108112/* Go N letters back. */
109- #define Look_Back_Letter (n ) (w_idx >= n ? toupper (word[w_idx-n]) : '\0')
113+ #define Look_Back_Letter (n ) (w_idx >= n ? Convert_Raw (word[w_idx-n]) : '\0')
110114/* Previous letter. I dunno, should this return null on failure? */
111- #define Prev_Letter (Look_Back_Letter(1))
115+ #define Read_Prev_Letter (Look_Back_Letter(1))
112116/* Look two letters down. It makes sure you don't walk off the string. */
113- #define After_Next_Letter (Next_Letter != '\0' ? toupper (word[w_idx+2]) \
117+ #define Read_After_Next_Letter (Read_Raw_Next_Letter != '\0' ? Convert_Raw (word[w_idx+2]) \
114118 : '\0')
115119#define Look_Ahead_Letter (n ) (toupper(Lookahead((char *) word+w_idx, n)))
116120
@@ -119,15 +123,13 @@ static const char _codes[26] =
119123/* I probably could have just used strlen... */
120124static char Lookahead (char * word , int how_far )
121125{
122- char letter_ahead = '\0' ; /* null by default */
123126 int idx ;
124127 for (idx = 0 ; word [idx ] != '\0' && idx < how_far ; idx ++ );
125128 /* Edge forward in the string... */
126129
127- letter_ahead = word [idx ]; /* idx will be either == to how_far or
128- * at the end of the string
130+ return word [idx ]; /* idx will be either == to how_far or
131+ * at the end of the string where it will be null
129132 */
130- return letter_ahead ;
131133}
132134
133135
@@ -164,6 +166,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
164166 int w_idx = 0 ; /* point in the phonization we're at. */
165167 size_t p_idx = 0 ; /* end of the phoned phrase */
166168 size_t max_buffer_len = 0 ; /* maximum length of the destination buffer */
169+ char curr_letter ;
167170 ZEND_ASSERT (word != NULL );
168171 ZEND_ASSERT (max_phonemes >= 0 );
169172
@@ -179,18 +182,20 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
179182
180183/*-- The first phoneme has to be processed specially. --*/
181184 /* Find our first letter */
182- for (; !isalpha (Curr_Letter ); w_idx ++ ) {
185+ for (; !isalpha (curr_letter = Read_Raw_Curr_Letter ); w_idx ++ ) {
183186 /* On the off chance we were given nothing but crap... */
184- if (Curr_Letter == '\0' ) {
187+ if (curr_letter == '\0' ) {
185188 End_Phoned_Word ();
186189 return ;
187190 }
188191 }
189192
190- switch (Curr_Letter ) {
193+ curr_letter = Convert_Raw (curr_letter );
194+
195+ switch (curr_letter ) {
191196 /* AE becomes E */
192197 case 'A' :
193- if (Next_Letter == 'E' ) {
198+ if (Read_Next_Letter == 'E' ) {
194199 Phonize ('E' );
195200 w_idx += 2 ;
196201 }
@@ -204,24 +209,26 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
204209 case 'G' :
205210 case 'K' :
206211 case 'P' :
207- if (Next_Letter == 'N' ) {
212+ if (Read_Next_Letter == 'N' ) {
208213 Phonize ('N' );
209214 w_idx += 2 ;
210215 }
211216 break ;
212217 /* WH becomes W,
213218 WR becomes R
214219 W if followed by a vowel */
215- case 'W' :
216- if (Next_Letter == 'R' ) {
217- Phonize (Next_Letter );
220+ case 'W' : {
221+ char next_letter = Read_Next_Letter ;
222+ if (next_letter == 'R' ) {
223+ Phonize ('R' );
218224 w_idx += 2 ;
219- } else if (Next_Letter == 'H' || isvowel (Next_Letter )) {
225+ } else if (next_letter == 'H' || isvowel (next_letter )) {
220226 Phonize ('W' );
221227 w_idx += 2 ;
222228 }
223229 /* else ignore */
224230 break ;
231+ }
225232 /* X becomes S */
226233 case 'X' :
227234 Phonize ('S' );
@@ -236,7 +243,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
236243 case 'I' :
237244 case 'O' :
238245 case 'U' :
239- Phonize (Curr_Letter );
246+ Phonize (curr_letter );
240247 w_idx ++ ;
241248 break ;
242249 default :
@@ -247,7 +254,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
247254
248255
249256 /* On to the metaphoning */
250- for (; Curr_Letter != '\0' &&
257+ for (; ( curr_letter = Read_Raw_Curr_Letter ) != '\0' &&
251258 (max_phonemes == 0 || Phone_Len < (size_t )max_phonemes );
252259 w_idx ++ ) {
253260 /* How many letters to skip because an eariler encoding handled
@@ -263,18 +270,23 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
263270 */
264271
265272 /* Ignore non-alphas */
266- if (!isalpha (Curr_Letter ))
273+ if (!isalpha (curr_letter ))
267274 continue ;
268275
276+ curr_letter = Convert_Raw (curr_letter );
277+ /* Note: we can't cache curr_letter from the previous loop
278+ * because of the skip_letter variable. */
279+ char prev_letter = Read_Prev_Letter ;
280+
269281 /* Drop duplicates, except CC */
270- if (Curr_Letter == Prev_Letter &&
271- Curr_Letter != 'C' )
282+ if (curr_letter == prev_letter &&
283+ curr_letter != 'C' )
272284 continue ;
273285
274- switch (Curr_Letter ) {
286+ switch (curr_letter ) {
275287 /* B -> B unless in MB */
276288 case 'B' :
277- if (Prev_Letter != 'M' )
289+ if (prev_letter != 'M' )
278290 Phonize ('B' );
279291 break ;
280292 /* 'sh' if -CIA- or -CH, but not SCH, except SCHW.
@@ -283,20 +295,20 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
283295 * dropped if -SCI-, SCE-, -SCY- (handed in S)
284296 * else K
285297 */
286- case 'C' :
287- if ( MAKESOFT ( Next_Letter )) { /* C[IEY] */
288- if (After_Next_Letter == 'A' &&
289- Next_Letter == 'I' ) { /* CIA */
298+ case 'C' : {
299+ char next_letter = Read_Next_Letter ;
300+ if (MAKESOFT ( next_letter )) { /* C[IEY] */
301+ if ( next_letter == 'I' && Read_After_Next_Letter == 'A ' ) { /* CIA */
290302 Phonize (SH );
291303 }
292304 /* SC[IEY] */
293- else if (Prev_Letter == 'S' ) {
305+ else if (prev_letter == 'S' ) {
294306 /* Dropped */
295307 } else {
296308 Phonize ('S' );
297309 }
298- } else if (Next_Letter == 'H' ) {
299- if ((!traditional ) && (After_Next_Letter == 'R ' || Prev_Letter == 'S ' )) { /* Christ, School */
310+ } else if (next_letter == 'H' ) {
311+ if ((!traditional ) && (prev_letter == 'S ' || Read_After_Next_Letter == 'R ' )) { /* Christ, School */
300312 Phonize ('K' );
301313 } else {
302314 Phonize (SH );
@@ -306,12 +318,13 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
306318 Phonize ('K' );
307319 }
308320 break ;
321+ }
309322 /* J if in -DGE-, -DGI- or -DGY-
310323 * else T
311324 */
312325 case 'D' :
313- if (Next_Letter == 'G' &&
314- MAKESOFT (After_Next_Letter )) {
326+ if (Read_Next_Letter == 'G' &&
327+ MAKESOFT (Read_After_Next_Letter )) {
315328 Phonize ('J' );
316329 skip_letter ++ ;
317330 } else
@@ -323,47 +336,50 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
323336 * else J if in -GE-, -GI, -GY and not GG
324337 * else K
325338 */
326- case 'G' :
327- if (Next_Letter == 'H' ) {
339+ case 'G' : {
340+ char next_letter = Read_Next_Letter ;
341+ if (next_letter == 'H' ) {
328342 if (!(NOGHTOF (Look_Back_Letter (3 )) ||
329343 Look_Back_Letter (4 ) == 'H' )) {
330344 Phonize ('F' );
331345 skip_letter ++ ;
332346 } else {
333347 /* silent */
334348 }
335- } else if (Next_Letter == 'N' ) {
336- if (Isbreak (After_Next_Letter ) ||
337- (After_Next_Letter == 'E' &&
349+ } else if (next_letter == 'N' ) {
350+ char after_next_letter = Read_After_Next_Letter ;
351+ if (Isbreak (after_next_letter ) ||
352+ (after_next_letter == 'E' &&
338353 Look_Ahead_Letter (3 ) == 'D' )) {
339354 /* dropped */
340355 } else
341356 Phonize ('K' );
342- } else if (MAKESOFT (Next_Letter ) &&
343- Prev_Letter != 'G' ) {
357+ } else if (MAKESOFT (next_letter ) &&
358+ prev_letter != 'G' ) {
344359 Phonize ('J' );
345360 } else {
346361 Phonize ('K' );
347362 }
348363 break ;
364+ }
349365 /* H if before a vowel and not after C,G,P,S,T */
350366 case 'H' :
351- if (isvowel (Next_Letter ) &&
352- !AFFECTH (Prev_Letter ))
367+ if (isvowel (Read_Next_Letter ) &&
368+ !AFFECTH (prev_letter ))
353369 Phonize ('H' );
354370 break ;
355371 /* dropped if after C
356372 * else K
357373 */
358374 case 'K' :
359- if (Prev_Letter != 'C' )
375+ if (prev_letter != 'C' )
360376 Phonize ('K' );
361377 break ;
362378 /* F if before H
363379 * else P
364380 */
365381 case 'P' :
366- if (Next_Letter == 'H' ) {
382+ if (Read_Next_Letter == 'H' ) {
367383 Phonize ('F' );
368384 } else {
369385 Phonize ('P' );
@@ -377,44 +393,50 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
377393 /* 'sh' in -SH-, -SIO- or -SIA- or -SCHW-
378394 * else S
379395 */
380- case 'S' :
381- if (Next_Letter == 'I' &&
382- (After_Next_Letter == 'O' ||
383- After_Next_Letter == 'A' )) {
396+ case 'S' : {
397+ char next_letter = Read_Next_Letter ;
398+ char after_next_letter ;
399+ if (next_letter == 'I' &&
400+ ((after_next_letter = Read_After_Next_Letter ) == 'O' ||
401+ after_next_letter == 'A' )) {
384402 Phonize (SH );
385- } else if (Next_Letter == 'H' ) {
403+ } else if (next_letter == 'H' ) {
386404 Phonize (SH );
387405 skip_letter ++ ;
388- } else if ((!traditional ) && (Next_Letter == 'C' && Look_Ahead_Letter (2 ) == 'H' && Look_Ahead_Letter (3 ) == 'W' )) {
406+ } else if ((!traditional ) && (next_letter == 'C' && Look_Ahead_Letter (2 ) == 'H' && Look_Ahead_Letter (3 ) == 'W' )) {
389407 Phonize (SH );
390408 skip_letter += 2 ;
391409 } else {
392410 Phonize ('S' );
393411 }
394412 break ;
413+ }
395414 /* 'sh' in -TIA- or -TIO-
396415 * else 'th' before H
397416 * else T
398417 */
399- case 'T' :
400- if (Next_Letter == 'I' &&
401- (After_Next_Letter == 'O' ||
402- After_Next_Letter == 'A' )) {
418+ case 'T' : {
419+ char next_letter = Read_Next_Letter ;
420+ char after_next_letter ;
421+ if (next_letter == 'I' &&
422+ ((after_next_letter = Read_After_Next_Letter ) == 'O' ||
423+ after_next_letter == 'A' )) {
403424 Phonize (SH );
404- } else if (Next_Letter == 'H' ) {
425+ } else if (next_letter == 'H' ) {
405426 Phonize (TH );
406427 skip_letter ++ ;
407- } else if (!(Next_Letter == 'C' && After_Next_Letter == 'H' )) {
428+ } else if (!(next_letter == 'C' && Read_After_Next_Letter == 'H' )) {
408429 Phonize ('T' );
409430 }
410431 break ;
432+ }
411433 /* F */
412434 case 'V' :
413435 Phonize ('F' );
414436 break ;
415437 /* W before a vowel, else dropped */
416438 case 'W' :
417- if (isvowel (Next_Letter ))
439+ if (isvowel (Read_Next_Letter ))
418440 Phonize ('W' );
419441 break ;
420442 /* KS */
@@ -424,7 +446,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
424446 break ;
425447 /* Y if followed by a vowel */
426448 case 'Y' :
427- if (isvowel (Next_Letter ))
449+ if (isvowel (Read_Next_Letter ))
428450 Phonize ('Y' );
429451 break ;
430452 /* S */
@@ -438,7 +460,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
438460 case 'M' :
439461 case 'N' :
440462 case 'R' :
441- Phonize (Curr_Letter );
463+ Phonize (curr_letter );
442464 break ;
443465 default :
444466 /* nothing */
0 commit comments