mirror of
https://github.com/retailcrm/Fetch.git
synced 2024-11-28 21:46:02 +03:00
autofix utf8 strings
This commit is contained in:
parent
e8c4ea7985
commit
82d18754bc
@ -577,6 +577,10 @@ class Message
|
|||||||
$to = strtolower($to);
|
$to = strtolower($to);
|
||||||
|
|
||||||
if ($from === $to) {
|
if ($from === $to) {
|
||||||
|
if ($to === 'utf-8') {
|
||||||
|
return UTF8::fix($text);
|
||||||
|
}
|
||||||
|
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
54
src/Fetch/UTF8.php
Normal file
54
src/Fetch/UTF8.php
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Fetch;
|
||||||
|
|
||||||
|
final class UTF8
|
||||||
|
{
|
||||||
|
public static function fix($text)
|
||||||
|
{
|
||||||
|
if(!is_string($text)) {
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
|
||||||
|
$buf = '';
|
||||||
|
for ($i = 0, $max = strlen($text); $i < $max; $i++) {
|
||||||
|
$c1 = $text{$i};
|
||||||
|
if ($c1 >= "\xc0") { //Should be converted to UTF8, if it's not UTF8 already
|
||||||
|
$c2 = $i + 1 >= $max ? "\x00" : $text[$i + 1];
|
||||||
|
$c3 = $i + 2 >= $max ? "\x00" : $text[$i + 2];
|
||||||
|
$c4 = $i + 3 >= $max ? "\x00" : $text[$i + 3];
|
||||||
|
|
||||||
|
if ($c1 >= "\xc0" & $c1 <= "\xdf") { //looks like 2 bytes UTF8
|
||||||
|
if ($c2 >= "\x80" && $c2 <= "\xbf") { //yeah, almost sure it's UTF8 already
|
||||||
|
$buf .= $c1 . $c2;
|
||||||
|
$i++;
|
||||||
|
} else { //not valid UTF8. Convert it.
|
||||||
|
$buf .= '?';
|
||||||
|
}
|
||||||
|
} elseif ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8
|
||||||
|
if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { //yeah, almost sure it's UTF8 already
|
||||||
|
$buf .= $c1 . $c2 . $c3;
|
||||||
|
$i += 2;
|
||||||
|
} else { //not valid UTF8. Convert it.
|
||||||
|
$buf .= '?';
|
||||||
|
}
|
||||||
|
} elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8
|
||||||
|
if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { //yeah, almost sure it's UTF8 already
|
||||||
|
$buf .= $c1 . $c2 . $c3 . $c4;
|
||||||
|
$i += 3;
|
||||||
|
} else { //not valid UTF8. Convert it.
|
||||||
|
$buf .= '?';
|
||||||
|
}
|
||||||
|
} else { //doesn't look like UTF8, but should be converted
|
||||||
|
$buf .= '?';
|
||||||
|
}
|
||||||
|
} elseif (($c1 & "\xc0") === "\x80") { // needs conversion
|
||||||
|
$buf .= '?';
|
||||||
|
} else { // it doesn't need conversion
|
||||||
|
$buf .= $c1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $buf;
|
||||||
|
}
|
||||||
|
}
|
@ -34,6 +34,10 @@ class MIMETest extends \PHPUnit_Framework_TestCase
|
|||||||
. PHP_EOL .
|
. PHP_EOL .
|
||||||
'=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='
|
'=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='
|
||||||
),
|
),
|
||||||
|
array(
|
||||||
|
'?????? ?????.pdf',
|
||||||
|
'=?UTF-8?B?' .base64_encode("\xCF\xF0\xE8\xEC\xE5\xF0 \xEF\xEB\xE0\xED\xE0\x2E\x70\x64\x66") . '?=',
|
||||||
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user