php
diff --git a/‎ext/mbstring/libmbfl/mbfl/mbfilter.c‎
Lines changed: 0 additions & 147 deletions b/‎ext/mbstring/libmbfl/mbfl/mbfilter.c‎
Lines changed: 0 additions & 147 deletions
diff --git a/‎ext/mbstring/libmbfl/mbfl/mbfilter.h‎
Lines changed: 0 additions & 22 deletions b/‎ext/mbstring/libmbfl/mbfl/mbfilter.h‎
Lines changed: 0 additions & 22 deletions
diff --git a/‎ext/mbstring/mb_gpc.c‎
Lines changed: 1 addition & 18 deletions b/‎ext/mbstring/mb_gpc.c‎
Lines changed: 1 addition & 18 deletions
@@ -93,153 +93,6 @@
 #include "filters/mbfilter_singlebyte.h"
 #include "filters/mbfilter_utf8.h"
 
-#include "rare_cp_bitvec.h"
-
-/*
- * encoding detector
- */
-static int mbfl_estimate_encoding_likelihood(int input_cp, void *void_data)
-{
-	mbfl_encoding_detector_data *data = void_data;
-	unsigned int c = input_cp;
-
-	/* Receive wchars decoded from input string using candidate encoding.
-	 * If the string was invalid in the candidate encoding, we assume
-	 * it's the wrong one. Otherwise, give the candidate many 'demerits'
-	 * for each 'rare' codepoint found, a smaller number for each ASCII
-	 * punctuation character, and 1 for all other codepoints.
-	 *
-	 * The 'common' codepoints should cover the vast majority of
-	 * codepoints we are likely to see in practice, while only covering
-	 * a small minority of the entire Unicode encoding space. Why?
-	 * Well, if the test string happens to be valid in an incorrect
-	 * candidate encoding, the bogus codepoints which it decodes to will
-	 * be more or less random. By treating the majority of codepoints as
-	 * 'rare', we ensure that in almost all such cases, the bogus
-	 * codepoints will include plenty of 'rares', thus giving the
-	 * incorrect candidate encoding lots of demerits. See
-	 * common_codepoints.txt for the actual list used.
-	 *
-	 * So, why give extra demerits for ASCII punctuation characters? It's
-	 * because there are some text encodings, like UTF-7, HZ, and ISO-2022,
-	 * which deliberately only use bytes in the ASCII range. When
-	 * misinterpreted as ASCII/UTF-8, strings in these encodings will
-	 * have an unusually high number of ASCII punctuation characters.
-	 * So giving extra demerits for such characters will improve
-	 * detection accuracy for UTF-7 and similar encodings.
-	 *
-	 * Finally, why 1 demerit for all other characters? That penalizes
-	 * long strings, meaning we will tend to choose a candidate encoding
-	 * in which the test string decodes to a smaller number of
-	 * codepoints. That prevents single-byte encodings in which almost
-	 * every possible input byte decodes to a 'common' codepoint from
-	 * being favored too much. */
-	if (c == MBFL_BAD_INPUT) {
-		data->num_illegalchars++;
-	} else if (c > 0xFFFF) {
-		data->score += 40;
-	} else if (c >= 0x21 && c <= 0x2F) {
-		data->score += 6;
-	} else if ((rare_codepoint_bitvec[c >> 5] >> (c & 0x1F)) & 1) {
-		data->score += 30;
-	} else {
-		data->score += 1;
-	}
-	return 0;
-}
-
-mbfl_encoding_detector *mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict)
-{
-	if (!elistsz) {
-		return NULL;
-	}
-
-	mbfl_encoding_detector *identd = emalloc(sizeof(mbfl_encoding_detector));
-	identd->filter_list = ecalloc(elistsz, sizeof(mbfl_convert_filter*));
-	identd->filter_data = ecalloc(elistsz, sizeof(mbfl_encoding_detector_data));
-
-	int filter_list_size = 0;
-	for (int i = 0; i < elistsz; i++) {
-		mbfl_convert_filter *filter = mbfl_convert_filter_new(elist[i], &mbfl_encoding_wchar,
-			mbfl_estimate_encoding_likelihood, NULL, &identd->filter_data[filter_list_size]);
-		if (filter) {
-			identd->filter_list[filter_list_size++] = filter;
-		}
-	}
-	identd->filter_list_size = filter_list_size;
-	identd->strict = strict;
-	return identd;
-}
-
-void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
-{
-	for (int i = 0; i < identd->filter_list_size; i++) {
-		mbfl_convert_filter_delete(identd->filter_list[i]);
-	}
-	efree(identd->filter_list);
-	efree(identd->filter_data);
-	efree(identd);
-}
-
-int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string)
-{
-	int num = identd->filter_list_size;
-	size_t n = string->len;
-	unsigned char *p = string->val;
-	int bad = 0;
-
-	if (identd->strict) {
-		for (int i = 0; i < num; i++) {
-			mbfl_convert_filter *filter = identd->filter_list[i];
-			mbfl_encoding_detector_data *data = &identd->filter_data[i];
-			if (filter->from->check != NULL && !(filter->from->check)(p, n)) {
-				data->num_illegalchars++;
-			}
-		}
-	}
-
-	while (n--) {
-		for (int i = 0; i < num; i++) {
-			mbfl_convert_filter *filter = identd->filter_list[i];
-			mbfl_encoding_detector_data *data = &identd->filter_data[i];
-			if (!data->num_illegalchars) {
-				(*filter->filter_function)(*p, filter);
-				if (data->num_illegalchars) {
-					bad++;
-				}
-			}
-		}
-		if ((num - 1) <= bad && !identd->strict) {
-			return 1;
-		}
-		p++;
-	}
-
-	for (int i = 0; i < num; i++) {
-		mbfl_convert_filter *filter = identd->filter_list[i];
-		(filter->filter_flush)(filter);
-	}
-
-	return 0;
-}
-
-const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd)
-{
-	size_t best_score = SIZE_MAX; /* Low score is 'better' */
-	const mbfl_encoding *enc = NULL;
-
-	for (int i = 0; i < identd->filter_list_size; i++) {
-		mbfl_convert_filter *filter = identd->filter_list[i];
-		mbfl_encoding_detector_data *data = &identd->filter_data[i];
-		if (!data->num_illegalchars && data->score < best_score) {
-			enc = filter->from;
-			best_score = data->score;
-		}
-	}
-
-	return enc;
-}
-
 /*
  *  strcut
  */
 
@@ -125,28 +125,6 @@
 #define MIN(a,b) ((a)<(b)?(a):(b))
 #endif
 
-/*
- * encoding detector
- */
-typedef struct _mbfl_encoding_detector mbfl_encoding_detector;
-
-typedef struct {
-	size_t num_illegalchars;
-	size_t score;
-} mbfl_encoding_detector_data;
-
-struct _mbfl_encoding_detector {
-	mbfl_convert_filter **filter_list;
-	mbfl_encoding_detector_data *filter_data;
-	int filter_list_size;
-	int strict;
-};
-
-MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict);
-MBFLAPI extern void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd);
-MBFLAPI extern int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string);
-MBFLAPI extern const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd);
-
 /* Lengths -1 through -16 are reserved for error return values */
 static inline int mbfl_is_error(size_t len) {
 	return len >= (size_t) -16;
 
@@ -177,7 +177,6 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
 	size_t n, num = 1, *len_list = NULL;
 	size_t new_val_len;
 	const mbfl_encoding *from_encoding = NULL;
-	mbfl_encoding_detector *identd = NULL;
 
 	if (!res || *res == '\0') {
 		goto out;
@@ -235,23 +234,7 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i
 	} else if (info->num_from_encodings == 1) {
 		from_encoding = info->from_encodings[0];
 	} else {
-		/* auto detect */
-		from_encoding = NULL;
-		identd = mbfl_encoding_detector_new(info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection));
-		if (identd != NULL) {
-			n = 0;
-			while (n < num) {
-				mbfl_string string;
-				string.val = (unsigned char *)val_list[n];
-				string.len = len_list[n];
-				if (mbfl_encoding_detector_feed(identd, &string)) {
-					break;
-				}
-				n++;
-			}
-			from_encoding = mbfl_encoding_detector_judge(identd);
-			mbfl_encoding_detector_delete(identd);
-		}
+		from_encoding = mb_guess_encoding_for_strings((const unsigned char**)val_list, len_list, num, info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection));
 		if (!from_encoding) {
 			if (info->report_errors) {
 				php_error_docref(NULL, E_WARNING, "Unable to detect encoding");