Skip to content

Commit 1a512ee

Browse files
Move utf8_encode and utf8_decode to ext/standard
1 parent a5251f7 commit 1a512ee

File tree

11 files changed

+110
-90
lines changed

11 files changed

+110
-90
lines changed

ext/standard/basic_functions.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,6 +2465,14 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3)
24652465
ZEND_ARG_INFO(0, length)
24662466
ZEND_ARG_INFO(0, case_sensitivity)
24672467
ZEND_END_ARG_INFO()
2468+
2469+
ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
2470+
ZEND_ARG_INFO(0, data)
2471+
ZEND_END_ARG_INFO()
2472+
2473+
ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
2474+
ZEND_ARG_INFO(0, data)
2475+
ZEND_END_ARG_INFO()
24682476
/* }}} */
24692477
/* {{{ syslog.c */
24702478
#ifdef HAVE_SYSLOG_H
@@ -2764,6 +2772,8 @@ const zend_function_entry basic_functions[] = { /* {{{ */
27642772
PHP_FE(str_split, arginfo_str_split)
27652773
PHP_FE(strpbrk, arginfo_strpbrk)
27662774
PHP_FE(substr_compare, arginfo_substr_compare)
2775+
PHP_FE(utf8_encode, arginfo_utf8_encode)
2776+
PHP_FE(utf8_decode, arginfo_utf8_decode)
27672777

27682778
#ifdef HAVE_STRCOLL
27692779
PHP_FE(strcoll, arginfo_strcoll)

ext/standard/php_string.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ PHP_FUNCTION(str_word_count);
9393
PHP_FUNCTION(str_split);
9494
PHP_FUNCTION(strpbrk);
9595
PHP_FUNCTION(substr_compare);
96+
PHP_FUNCTION(utf8_encode);
97+
PHP_FUNCTION(utf8_decode);
9698
#ifdef HAVE_STRCOLL
9799
PHP_FUNCTION(strcoll);
98100
#endif

ext/standard/string.c

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@
6464

6565
/* For str_getcsv() support */
6666
#include "ext/standard/file.h"
67+
/* For php_next_utf8_char() */
68+
#include "ext/standard/html.h"
6769

6870
#define STR_PAD_LEFT 0
6971
#define STR_PAD_RIGHT 1
@@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare)
56535655
}
56545656
/* }}} */
56555657

5658+
/* {{{ */
5659+
static zend_string *php_utf8_encode(const char *s, size_t len)
5660+
{
5661+
size_t pos = len;
5662+
zend_string *str;
5663+
unsigned char c;
5664+
5665+
str = zend_string_safe_alloc(len, 2, 0, 0);
5666+
ZSTR_LEN(str) = 0;
5667+
while (pos > 0) {
5668+
/* The lower 256 codepoints of Unicode are identical to Latin-1,
5669+
* so we don't need to do any mapping here. */
5670+
c = (unsigned char)(*s);
5671+
if (c < 0x80) {
5672+
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
5673+
/* We only account for the single-byte and two-byte cases because
5674+
* we're only dealing with the first 256 Unicode codepoints. */
5675+
} else {
5676+
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
5677+
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
5678+
}
5679+
pos--;
5680+
s++;
5681+
}
5682+
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
5683+
str = zend_string_truncate(str, ZSTR_LEN(str), 0);
5684+
return str;
5685+
}
5686+
/* }}} */
5687+
5688+
/* {{{ */
5689+
static zend_string *php_utf8_decode(const char *s, size_t len)
5690+
{
5691+
size_t pos = 0;
5692+
unsigned int c;
5693+
zend_string *str;
5694+
5695+
str = zend_string_alloc(len, 0);
5696+
ZSTR_LEN(str) = 0;
5697+
while (pos < len) {
5698+
int status = FAILURE;
5699+
c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
5700+
5701+
/* The lower 256 codepoints of Unicode are identical to Latin-1,
5702+
* so we don't need to do any mapping here beyond replacing non-Latin-1
5703+
* characters. */
5704+
if (status == FAILURE || c > 0xFFU) {
5705+
c = '?';
5706+
}
5707+
5708+
ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
5709+
}
5710+
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
5711+
if (ZSTR_LEN(str) < len) {
5712+
str = zend_string_truncate(str, ZSTR_LEN(str), 0);
5713+
}
5714+
5715+
return str;
5716+
}
5717+
/* }}} */
5718+
5719+
5720+
/* {{{ proto string utf8_encode(string data)
5721+
Encodes an ISO-8859-1 string to UTF-8 */
5722+
PHP_FUNCTION(utf8_encode)
5723+
{
5724+
char *arg;
5725+
size_t arg_len;
5726+
5727+
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
5728+
return;
5729+
}
5730+
5731+
RETURN_STR(php_utf8_encode(arg, arg_len));
5732+
}
5733+
/* }}} */
5734+
5735+
/* {{{ proto string utf8_decode(string data)
5736+
Converts a UTF-8 encoded string to ISO-8859-1 */
5737+
PHP_FUNCTION(utf8_decode)
5738+
{
5739+
char *arg;
5740+
size_t arg_len;
5741+
5742+
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
5743+
return;
5744+
}
5745+
5746+
RETURN_STR(php_utf8_decode(arg, arg_len));
5747+
}
5748+
/* }}} */
5749+
56565750
/*
56575751
* Local variables:
56585752
* tab-width: 4
Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
--TEST--
22
Bug #43957 (utf8_decode() bogus conversion on multibyte indicator near end of string)
3-
--SKIPIF--
4-
<?php
5-
require_once("skipif.inc");
6-
if (!extension_loaded('xml')) die ("skip xml extension not available");
7-
?>
83
--FILE--
94
<?php
105
echo utf8_decode('abc'.chr(0xe0));
Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
--TEST--
22
Bug #49687 Several utf8_decode deficiencies and vulnerabilities
3-
--SKIPIF--
4-
<?php
5-
require_once("skipif.inc");
6-
if (!extension_loaded('xml')) die ("skip xml extension not available");
7-
?>
83
--FILE--
94
<?php
105

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
--TEST--
22
UTF-8<->ISO Latin 1 encoding/decoding test
3-
--SKIPIF--
4-
<?php include("skipif.inc"); ?>
53
--FILE--
64
<?php
75
printf("%s -> %s\n", urlencode("ć"), urlencode(utf8_encode("ć")));

ext/xml/tests/utf8_decode_error.phpt renamed to ext/standard/tests/strings/utf8_decode_error.phpt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
--TEST--
22
Test utf8_decode() function : error conditions
3-
--SKIPIF--
4-
<?php
5-
if (!extension_loaded("xml")) {
6-
print "skip - XML extension not loaded";
7-
}
8-
?>
93
--FILE--
104
<?php
115
/* Prototype : proto string utf8_decode(string data)
126
* Description: Converts a UTF-8 encoded string to ISO-8859-1
13-
* Source code: ext/xml/xml.c
7+
* Source code: ext/standard/string.c
148
* Alias to functions:
159
*/
1610

ext/xml/tests/utf8_decode_variation1.phpt renamed to ext/standard/tests/strings/utf8_decode_variation1.phpt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
--TEST--
22
Test utf8_decode() function : usage variations - different types for data
3-
--SKIPIF--
4-
<?php
5-
if (!extension_loaded("xml")) {
6-
print "skip - XML extension not loaded";
7-
}
8-
?>
93
--FILE--
104
<?php
115
/* Prototype : proto string utf8_decode(string data)
126
* Description: Converts a UTF-8 encoded string to ISO-8859-1
13-
* Source code: ext/xml/xml.c
7+
* Source code: ext/standard/string.c
148
* Alias to functions:
159
*/
1610

ext/xml/tests/utf8_encode_error.phpt renamed to ext/standard/tests/strings/utf8_encode_error.phpt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
--TEST--
22
Test utf8_encode() function : error conditions
3-
--SKIPIF--
4-
<?php
5-
if (!extension_loaded("xml")) {
6-
print "skip - XML extension not loaded";
7-
}
8-
?>
93
--FILE--
104
<?php
115
/* Prototype : proto string utf8_encode(string data)
126
* Description: Encodes an ISO-8859-1 string to UTF-8
13-
* Source code: ext/xml/xml.c
7+
* Source code: ext/standard/string.c
148
* Alias to functions:
159
*/
1610

ext/xml/tests/utf8_encode_variation1.phpt renamed to ext/standard/tests/strings/utf8_encode_variation1.phpt

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
--TEST--
22
Test utf8_encode() function : usage variations - <type here specifics of this variation>
3-
--SKIPIF--
4-
<?php
5-
if (!extension_loaded("xml")) {
6-
print "skip - XML extension not loaded";
7-
}
8-
?>
93
--FILE--
104
<?php
115
/* Prototype : proto string utf8_encode(string data)
126
* Description: Encodes an ISO-8859-1 string to UTF-8
13-
* Source code: ext/xml/xml.c
7+
* Source code: ext/standard/string.c
148
* Alias to functions:
159
*/
1610

0 commit comments

Comments
 (0)