内容字号:默认大号超大号

段落设置:段首缩进取消段首缩进

字体设置:切换到微软雅黑切换到宋体

PHP中trim 会导致乱码的原因

2017-10-24 23:14 出处:投稿 人气: 评论(0

关于PHP中trim 会导致乱码的原因,下面小编给大家详细介绍:


?
运行以下代码:

?

$tag?=?"互联网产品、";
$text?= rtrim($tag,?"、");
print_r($text);

我们可能以为会得到的结果是互联网产品,实际结果是互联网产?。为什么会这样呢?

科普

PHP 里使用mb_前缀的都是多字节函数 http://php.net/manual/zh/ref....

比如

$str?=?"abcd";
print_r(strlen($str)."\n");?// 4
print_r(mb_strlen($str)."\n");?// 4
?
$str?=?"周梦康";
print_r(strlen($str)."\n");?// 9
print_r(mb_strlen($str)."\n");?// 3

mb_系列函数是以“多个字节组成的一个字符”为颗粒度来操作的,不带mb_则是按实际的字节数来操作的。

原理

trim?函数文档

string trim ( string?$str?[, string?$character_mask?=?" \t\n\r\0\x0B"?] )

该函数不是多字节函数,也就是说,汉字这样的多字节字符,会拿其头或尾的单字节来和后面的$character_mask对应的char数组进行匹配,如果在后面的数组中,则删掉,继续匹配。比如:

echo?ltrim("bcdf","abc");?// df

如下面的 demo 中的函数string_print_char所示:
0xe3 0x80 0x81三字节组成,
0xe5 0x93 0x81三字节组成。
所以在执行rtrim的时候,通过字节比对,会将0x81去掉,导致了最后出现了乱码。

源码探究

查看 PHP7 的源码,然后提炼出下面的小 demo ,方便大家一起学习,其实PHP源码的学习并不难,每天进步一点点。

//
//? main.c
//? trim
//
//? Created by 周梦康 on 2017/10/18.
//? Copyright ? 2017年 周梦康. All rights reserved.
//
?
#include?
#include?
#include?
?
void string_print_char(char *str);
void php_charmask(unsigned char *input, size_t len, char *mask);
char *ltrim(char *str,char *character_mask);
char *rtrim(char *str,char *character_mask);
?
?
int main(int argc, char?const?*argv[])
{
????printf("%s\n",ltrim("bcdf","abc"));
????
????string_print_char("品");?// e5??? 93??? 81
????string_print_char("、");?// e3??? 80??? 81
????
????printf("%s\n",rtrim("互联网产品、","、"));
????
????
????return?0;
}
?
char *ltrim(char *str,char *character_mask)
{
????char *res;
????char mask[256];
????register size_t i;
????int trimmed = 0;
????
????size_t len =?strlen(str);
????
????php_charmask((unsigned char*)character_mask,?strlen(character_mask), mask);
????
????for?(i = 0; i < len; i++) {
????????if?(mask[(unsigned char)str[i]]) {
????????????trimmed++;
????????}?else?{
????????????break;
????????}
????}
????
????len -= trimmed;
????str += trimmed;
????
????res = (char *) malloc(sizeof(char) * (len+1));
????memcpy(res,str,len);
????
????return?res;
}
?
char *rtrim(char *str,char *character_mask)
{
????char *res;
????char mask[256];
????register size_t i;
????
????size_t len =?strlen(str);
????
????php_charmask((unsigned char*)character_mask,?strlen(character_mask), mask);
????
????if?(len > 0) {
????????i = len - 1;
????????do?{
????????????if?(mask[(unsigned char)str[i]]) {
????????????????len--;
????????????}?else?{
????????????????break;
????????????}
????????}?while?(i-- != 0);
????}
????
????res = (char *) malloc(sizeof(char) * (len+1));
????memcpy(res,str,len);
????
????return?res;
}
?
void string_print_char(char *str)
{
????unsigned long l =?strlen(str);
????
????for?(int i=0; i < l; i++) {
????????printf("%02hhx\t",str[i]);
????}
????
????printf("\n");
}
?
void php_charmask(unsigned char *input, size_t len, char *mask)
{
????unsigned char *end;
????unsigned char c;
????
????memset(mask, 0, 256);
????
????for?(end?= input+len; input end; input++) {
????????c = *input;
????????mask[c]= 1;
????}
}

如果觉得 demo 还不够清晰的,复制下来,自己执行一次吧~
C 语言基础较差的同学也不用担心,我准备后面专门写一个PHP小白学习 C 语言的系列入门短文哈。

解决方案

那么我们就依葫芦画瓢,用 php 本身的多字节函数来实现下吧:

function?mb_rtrim($string,?$trim,?$encoding)
{
?
????$mask?= [];
????$trimLength?= mb_strlen($trim,?$encoding);
????for?($i?= 0;?$i?$trimLength;?$i++) {
????????$item?= mb_substr($trim,?$i, 1,?$encoding);
????????$mask[] =?$item;
????}
?
????$len?= mb_strlen($string,?$encoding);
????if?($len?> 0) {
????????$i?=?$len?- 1;
????????do?{
????????????$item?= mb_substr($string,?$i, 1,?$encoding);
????????????if?(in_array($item,?$mask)) {
????????????????$len--;
????????????}?else?{
????????????????break;
????????????}
????????}?while?($i-- != 0);
????}
?
????return?mb_substr($string, 0,?$len,?$encoding);
}
?
mb_internal_encoding("UTF-8");
$tag?=?"互联网产品、";
$encoding?= mb_internal_encoding();
print_r(mb_rtrim($tag,?"、",$encoding));

当然你也可以使用正则来做。通过上面的函数学习,单字节函数和多字节函数,你学会了吗?

PHP7 相关源码

PHP_FUNCTION(trim)
{
????php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
}
PHP_FUNCTION(rtrim)
{
????php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
}
PHP_FUNCTION(ltrim)
{
????php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
static?void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
????zend_string *str;
????zend_string *what = NULL;
?
????ZEND_PARSE_PARAMETERS_START(1, 2)
????????Z_PARAM_STR(str)
????????Z_PARAM_OPTIONAL
????????Z_PARAM_STR(what)
????ZEND_PARSE_PARAMETERS_END();
?
????ZVAL_STR(return_value, php_trim(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
}
PHPAPI zend_string *php_trim(zend_string *str, char *what, size_t what_len, int mode)
{
????const?char *c = ZSTR_VAL(str);
????size_t len = ZSTR_LEN(str);
????register size_t i;
????size_t trimmed = 0;
????char mask[256];
?
????if?(what) {
????????if?(what_len == 1) {
????????????char p = *what;
????????????if?(mode & 1) {
????????????????for?(i = 0; i < len; i++) {
????????????????????if?(c[i] == p) {
????????????????????????trimmed++;
????????????????????}?else?{
????????????????????????break;
????????????????????}
????????????????}
????????????????len -= trimmed;
????????????????c += trimmed;
????????????}
????????????if?(mode & 2) {
????????????????if?(len > 0) {
????????????????????i = len - 1;
????????????????????do?{
????????????????????????if?(c[i] == p) {
????????????????????????????len--;
????????????????????????}?else?{
????????????????????????????break;
????????????????????????}
????????????????????}?while?(i-- != 0);
????????????????}
????????????}
????????}?else?{
????????????php_charmask((unsigned char*)what, what_len, mask);
?
????????????if?(mode & 1) {
????????????????for?(i = 0; i < len; i++) {
????????????????????if?(mask[(unsigned char)c[i]]) {
????????????????????????trimmed++;
????????????????????}?else?{
????????????????????????break;
????????????????????}
????????????????}
????????????????len -= trimmed;
????????????????c += trimmed;
????????????}
????????????if?(mode & 2) {
????????????????if?(len > 0) {
????????????????????i = len - 1;
????????????????????do?{
????????????????????????if?(mask[(unsigned char)c[i]]) {
????????????????????????????len--;
????????????????????????}?else?{
????????????????????????????break;
????????????????????????}
????????????????????}?while?(i-- != 0);
????????????????}
????????????}
????????}
????}?else?{
????????if?(mode & 1) {
????????????for?(i = 0; i < len; i++) {
????????????????if?((unsigned char)c[i] <=?' '?&&
????????????????????(c[i] ==?' '?|| c[i] ==?'\n'?|| c[i] ==?'\r'?|| c[i] ==?'\t'?|| c[i] ==?'\v'?|| c[i] ==?'\0')) {
????????????????????trimmed++;
????????????????}?else?{
????????????????????break;
????????????????}
????????????}
????????????len -= trimmed;
????????????c += trimmed;
????????}
????????if?(mode & 2) {
????????????if?(len > 0) {
????????????????i = len - 1;
????????????????do?{
????????????????????if?((unsigned char)c[i] <=?' '?&&
????????????????????????(c[i] ==?' '?|| c[i] ==?'\n'?|| c[i] ==?'\r'?|| c[i] ==?'\t'?|| c[i] ==?'\v'|| c[i] ==?'\0')) {
????????????????????????len--;
????????????????????}?else?{
????????????????????????break;
????????????????????}
????????????????}?while?(i-- != 0);
????????????}
????????}
????}
?
????if?(ZSTR_LEN(str) == len) {
????????return?zend_string_copy(str);
????}?else?{
????????return?zend_string_init(c, len, 0);
????}
}
/* {{{ php_charmask
?* Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
?* it needs to be incrementing.
?* Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
?*/
static?inline int php_charmask(unsigned char *input, size_t len, char *mask)
{
????unsigned char *end;
????unsigned char c;
????int result = SUCCESS;
?
????memset(mask, 0, 256);
????for?(end?= input+len; input end; input++) {
????????c=*input;
????????if?((input+3 end) && input[1] ==?'.'?&& input[2] ==?'.'
????????????????&& input[3] >= c) {
????????????memset(mask+c, 1, input[3] - c + 1);
????????????input+=3;
????????}?else?if?((input+1 end) && input[0] ==?'.'?&& input[1] ==?'.') {
????????????/* Error, try to be as helpful as possible:
???????????????(a range ending/starting with '.' won't be captured here) */
????????????if?(end-len >= input) {?/* there was no 'left' char */
????????????????php_error_docref(NULL, E_WARNING,?"Invalid '..'-range, no character to the left of '..'");
????????????????result = FAILURE;
????????????????continue;
????????????}
????????????if?(input+2 >=?end) {?/* there is no 'right' char */
????????????????php_error_docref(NULL, E_WARNING,?"Invalid '..'-range, no character to the right of '..'");
????????????????result = FAILURE;
????????????????continue;
????????????}
????????????if?(input[-1] > input[2]) {?/* wrong order */
????????????????php_error_docref(NULL, E_WARNING,?"Invalid '..'-range, '..'-range needs to be incrementing");
????????????????result = FAILURE;
????????????????continue;
????????????}
????????????/* FIXME: better error (a..b..c is the only left possibility?) */
????????????php_error_docref(NULL, E_WARNING,?"Invalid '..'-range");
????????????result = FAILURE;
????????????continue;
????????}?else?{
????????????mask[c]=1;
????????}
????}
????return?result;
}
/* }}} */

相关栏目

相关文章



分享给小伙伴们:

评论

发表评论愿您的每句评论,都能给大家的生活添色彩,带来共鸣,带来思索,带来快乐。

签名: 验证码: 点击我更换图片

评论列表

    ? 2002-2017 dngsw.cn www.yabo2009.net高手网 版权所有

    粤ICP备13005586号-3