Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,8 @@ PHP NEWS
. Fixed bug #73234 (Emulated statements let value dictate parameter type).
(Adam Baratz)

- XML:
. Moved utf8_encode() and utf8_decode() to the Standard extension. (Andrea)

<<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>

3 changes: 3 additions & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ PHP 7.2 UPGRADE NOTES
'Multiline' = "FirstLine\r\n SecondLine",
];

- XML:
. utf8_encode() and utf8_decode() have been moved to the Standard extension
as string functions.

========================================
6. New Functions
Expand Down
10 changes: 10 additions & 0 deletions ext/standard/basic_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -2465,6 +2465,14 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3)
ZEND_ARG_INFO(0, length)
ZEND_ARG_INFO(0, case_sensitivity)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
ZEND_ARG_INFO(0, data)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
ZEND_ARG_INFO(0, data)
ZEND_END_ARG_INFO()
/* }}} */
/* {{{ syslog.c */
#ifdef HAVE_SYSLOG_H
Expand Down Expand Up @@ -2764,6 +2772,8 @@ const zend_function_entry basic_functions[] = { /* {{{ */
PHP_FE(str_split, arginfo_str_split)
PHP_FE(strpbrk, arginfo_strpbrk)
PHP_FE(substr_compare, arginfo_substr_compare)
PHP_FE(utf8_encode, arginfo_utf8_encode)
PHP_FE(utf8_decode, arginfo_utf8_decode)

#ifdef HAVE_STRCOLL
PHP_FE(strcoll, arginfo_strcoll)
Expand Down
2 changes: 2 additions & 0 deletions ext/standard/php_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ PHP_FUNCTION(str_word_count);
PHP_FUNCTION(str_split);
PHP_FUNCTION(strpbrk);
PHP_FUNCTION(substr_compare);
PHP_FUNCTION(utf8_encode);
PHP_FUNCTION(utf8_decode);
#ifdef HAVE_STRCOLL
PHP_FUNCTION(strcoll);
#endif
Expand Down
94 changes: 94 additions & 0 deletions ext/standard/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@

/* For str_getcsv() support */
#include "ext/standard/file.h"
/* For php_next_utf8_char() */
#include "ext/standard/html.h"

#define STR_PAD_LEFT 0
#define STR_PAD_RIGHT 1
Expand Down Expand Up @@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare)
}
/* }}} */

/* {{{ */
static zend_string *php_utf8_encode(const char *s, size_t len)
{
size_t pos = len;
zend_string *str;
unsigned char c;

str = zend_string_safe_alloc(len, 2, 0, 0);
ZSTR_LEN(str) = 0;
while (pos > 0) {
/* The lower 256 codepoints of Unicode are identical to Latin-1,
* so we don't need to do any mapping here. */
c = (unsigned char)(*s);
if (c < 0x80) {
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
/* We only account for the single-byte and two-byte cases because
* we're only dealing with the first 256 Unicode codepoints. */
} else {
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
}
pos--;
s++;
}
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
str = zend_string_truncate(str, ZSTR_LEN(str), 0);
return str;
}
/* }}} */

/* {{{ */
static zend_string *php_utf8_decode(const char *s, size_t len)
{
size_t pos = 0;
unsigned int c;
zend_string *str;

str = zend_string_alloc(len, 0);
ZSTR_LEN(str) = 0;
while (pos < len) {
int status = FAILURE;
c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);

/* The lower 256 codepoints of Unicode are identical to Latin-1,
* so we don't need to do any mapping here beyond replacing non-Latin-1
* characters. */
if (status == FAILURE || c > 0xFFU) {
c = '?';
}

ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
}
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
if (ZSTR_LEN(str) < len) {
str = zend_string_truncate(str, ZSTR_LEN(str), 0);
}

return str;
}
/* }}} */


/* {{{ proto string utf8_encode(string data)
Encodes an ISO-8859-1 string to UTF-8 */
PHP_FUNCTION(utf8_encode)
{
char *arg;
size_t arg_len;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
return;
}

RETURN_STR(php_utf8_encode(arg, arg_len));
}
/* }}} */

/* {{{ proto string utf8_decode(string data)
Converts a UTF-8 encoded string to ISO-8859-1 */
PHP_FUNCTION(utf8_decode)
{
char *arg;
size_t arg_len;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
return;
}

RETURN_STR(php_utf8_decode(arg, arg_len));
}
/* }}} */

/*
* Local variables:
* tab-width: 4
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
--TEST--
Bug #43957 (utf8_decode() bogus conversion on multibyte indicator near end of string)
--SKIPIF--
<?php
require_once("skipif.inc");
if (!extension_loaded('xml')) die ("skip xml extension not available");
?>
--FILE--
<?php
echo utf8_decode('abc'.chr(0xe0));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
--TEST--
Bug #49687 Several utf8_decode deficiencies and vulnerabilities
--SKIPIF--
<?php
require_once("skipif.inc");
if (!extension_loaded('xml')) die ("skip xml extension not available");
?>
--FILE--
<?php

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
--TEST--
UTF-8<->ISO Latin 1 encoding/decoding test
--SKIPIF--
<?php include("skipif.inc"); ?>
--FILE--
<?php
printf("%s -> %s\n", urlencode("�"), urlencode(utf8_encode("�")));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
--TEST--
Test utf8_decode() function : error conditions
--SKIPIF--
<?php
if (!extension_loaded("xml")) {
print "skip - XML extension not loaded";
}
?>
--FILE--
<?php
/* Prototype : proto string utf8_decode(string data)
* Description: Converts a UTF-8 encoded string to ISO-8859-1
* Source code: ext/xml/xml.c
* Source code: ext/standard/string.c
* Alias to functions:
*/

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
--TEST--
Test utf8_decode() function : usage variations - different types for data
--SKIPIF--
<?php
if (!extension_loaded("xml")) {
print "skip - XML extension not loaded";
}
?>
--FILE--
<?php
/* Prototype : proto string utf8_decode(string data)
* Description: Converts a UTF-8 encoded string to ISO-8859-1
* Source code: ext/xml/xml.c
* Source code: ext/standard/string.c
* Alias to functions:
*/

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
--TEST--
Test utf8_encode() function : error conditions
--SKIPIF--
<?php
if (!extension_loaded("xml")) {
print "skip - XML extension not loaded";
}
?>
--FILE--
<?php
/* Prototype : proto string utf8_encode(string data)
* Description: Encodes an ISO-8859-1 string to UTF-8
* Source code: ext/xml/xml.c
* Source code: ext/standard/string.c
* Alias to functions:
*/

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
--TEST--
Test utf8_encode() function : usage variations - <type here specifics of this variation>
--SKIPIF--
<?php
if (!extension_loaded("xml")) {
print "skip - XML extension not loaded";
}
?>
--FILE--
<?php
/* Prototype : proto string utf8_encode(string data)
* Description: Encodes an ISO-8859-1 string to UTF-8
* Source code: ext/xml/xml.c
* Source code: ext/standard/string.c
* Alias to functions:
*/

Expand Down
50 changes: 0 additions & 50 deletions ext/xml/xml.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,6 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_get_option, 0, 0, 2)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
ZEND_ARG_INFO(0, data)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
ZEND_ARG_INFO(0, data)
ZEND_END_ARG_INFO()

const zend_function_entry xml_functions[] = {
PHP_FE(xml_parser_create, arginfo_xml_parser_create)
PHP_FE(xml_parser_create_ns, arginfo_xml_parser_create_ns)
Expand All @@ -243,8 +235,6 @@ const zend_function_entry xml_functions[] = {
PHP_FE(xml_parser_free, arginfo_xml_parser_free)
PHP_FE(xml_parser_set_option, arginfo_xml_parser_set_option)
PHP_FE(xml_parser_get_option, arginfo_xml_parser_get_option)
PHP_FE(utf8_encode, arginfo_utf8_encode)
PHP_FE(utf8_decode, arginfo_utf8_decode)
PHP_FE_END
};

Expand Down Expand Up @@ -1667,46 +1657,6 @@ PHP_FUNCTION(xml_parser_get_option)
}
/* }}} */

/* {{{ proto string utf8_encode(string data)
Encodes an ISO-8859-1 string to UTF-8 */
PHP_FUNCTION(utf8_encode)
{
char *arg;
size_t arg_len;
zend_string *encoded;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
return;
}

encoded = xml_utf8_encode(arg, arg_len, (XML_Char*)"ISO-8859-1");
if (encoded == NULL) {
RETURN_FALSE;
}
RETURN_STR(encoded);
}
/* }}} */

/* {{{ proto string utf8_decode(string data)
Converts a UTF-8 encoded string to ISO-8859-1 */
PHP_FUNCTION(utf8_decode)
{
char *arg;
size_t arg_len;
zend_string *decoded;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
return;
}

decoded = xml_utf8_decode((XML_Char*)arg, arg_len, (XML_Char*)"ISO-8859-1");
if (decoded == NULL) {
RETURN_FALSE;
}
RETURN_STR(decoded);
}
/* }}} */

#endif

/*
Expand Down