From 82d18754bc58edbdb0ed4835bc0a34e2eb8f4eb6 Mon Sep 17 00:00:00 2001 From: Sergey Linnik Date: Mon, 30 Jan 2017 19:59:18 +0300 Subject: [PATCH] autofix utf8 strings --- src/Fetch/Message.php | 4 +++ src/Fetch/UTF8.php | 54 +++++++++++++++++++++++++++++++++++ tests/Fetch/Test/MIMETest.php | 4 +++ 3 files changed, 62 insertions(+) create mode 100644 src/Fetch/UTF8.php diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index dccb0dc..dc76535 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -577,6 +577,10 @@ class Message $to = strtolower($to); if ($from === $to) { + if ($to === 'utf-8') { + return UTF8::fix($text); + } + return $text; } diff --git a/src/Fetch/UTF8.php b/src/Fetch/UTF8.php new file mode 100644 index 0000000..87b0869 --- /dev/null +++ b/src/Fetch/UTF8.php @@ -0,0 +1,54 @@ += "\xc0") { //Should be converted to UTF8, if it's not UTF8 already + $c2 = $i + 1 >= $max ? "\x00" : $text[$i + 1]; + $c3 = $i + 2 >= $max ? "\x00" : $text[$i + 2]; + $c4 = $i + 3 >= $max ? "\x00" : $text[$i + 3]; + + if ($c1 >= "\xc0" & $c1 <= "\xdf") { //looks like 2 bytes UTF8 + if ($c2 >= "\x80" && $c2 <= "\xbf") { //yeah, almost sure it's UTF8 already + $buf .= $c1 . $c2; + $i++; + } else { //not valid UTF8. Convert it. + $buf .= '?'; + } + } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8 + if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { //yeah, almost sure it's UTF8 already + $buf .= $c1 . $c2 . $c3; + $i += 2; + } else { //not valid UTF8. Convert it. + $buf .= '?'; + } + } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8 + if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { //yeah, almost sure it's UTF8 already + $buf .= $c1 . $c2 . $c3 . $c4; + $i += 3; + } else { //not valid UTF8. Convert it. + $buf .= '?'; + } + } else { //doesn't look like UTF8, but should be converted + $buf .= '?'; + } + } elseif (($c1 & "\xc0") === "\x80") { // needs conversion + $buf .= '?'; + } else { // it doesn't need conversion + $buf .= $c1; + } + } + + return $buf; + } +} diff --git a/tests/Fetch/Test/MIMETest.php b/tests/Fetch/Test/MIMETest.php index f49a517..3fc8c26 100644 --- a/tests/Fetch/Test/MIMETest.php +++ b/tests/Fetch/Test/MIMETest.php @@ -34,6 +34,10 @@ class MIMETest extends \PHPUnit_Framework_TestCase . PHP_EOL . '=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=' ), + array( + '?????? ?????.pdf', + '=?UTF-8?B?' .base64_encode("\xCF\xF0\xE8\xEC\xE5\xF0 \xEF\xEB\xE0\xED\xE0\x2E\x70\x64\x66") . '?=', + ), ); }