From f2722da2b30dc9c11372c2df3de60521fada7a6b Mon Sep 17 00:00:00 2001 From: Sergey Linnik Date: Fri, 28 Aug 2015 16:44:16 +0300 Subject: [PATCH] Generalize charset converting --- src/Fetch/MIME.php | 2 +- src/Fetch/Message.php | 63 +++++++++++++++++++++----------- tests/Fetch/Test/MessageTest.php | 15 ++++++++ 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/src/Fetch/MIME.php b/src/Fetch/MIME.php index b63d72b..c83c9f3 100644 --- a/src/Fetch/MIME.php +++ b/src/Fetch/MIME.php @@ -37,7 +37,7 @@ final class MIME foreach (imap_mime_header_decode($text) as $word) { $ch = 'default' === $word->charset ? 'ascii' : $word->charset; - $result .= iconv($ch, $targetCharset, $word->text); + $result .= Message::charsetConvert($word->text, $ch, $targetCharset) ?: $word->text; } return $result; diff --git a/src/Fetch/Message.php b/src/Fetch/Message.php index e382678..b72f690 100755 --- a/src/Fetch/Message.php +++ b/src/Fetch/Message.php @@ -520,27 +520,8 @@ class Message $messageBody = self::decode($messageBody, $structure->encoding); - if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { - $mb_converted = false; - if (function_exists('mb_convert_encoding')) { - if (!in_array($parameters['charset'], mb_list_encodings())) { - if ($structure->encoding === 0) { - $parameters['charset'] = 'US-ASCII'; - } else { - $parameters['charset'] = 'UTF-8'; - } - } - - $messageBody = @mb_convert_encoding($messageBody, self::$charset, $parameters['charset']); - $mb_converted = true; - } - if (!$mb_converted) { - $messageBodyConv = @iconv($parameters['charset'], self::$charset . self::$charsetFlag, $messageBody); - - if ($messageBodyConv !== false) { - $messageBody = $messageBodyConv; - } - } + if (!empty($parameters['charset'])) { + $messageBody = self::charsetConvert($messageBody, $parameters['charset'], self::$charset) ?: $messageBody; } if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) { @@ -575,6 +556,46 @@ class Message } } + /** + * @param string $text + * @param string $from + * @param string $to + * + * @return string|null + */ + public static function charsetConvert($text, $from, $to = null) + { + if (!$text) { + return ''; + } + + if (null === $to) { + $to = self::$charset; + } + + $from = strtolower($from); + $to = strtolower($to); + + if ($from === $to) { + return $text; + } + + $converted = null; + if (!$converted && function_exists('mb_convert_encoding') && @mb_check_encoding($text, $from)) { + $converted = @mb_convert_encoding($text, $to, $from); + } + + if (!$converted && function_exists('iconv')) { + $converted = @iconv($from, $to . self::$charsetFlag, $text); + } + + if ($converted) { + return $converted; + } + + return null; + } + /** * This function takes in the message data and encoding type and returns the decoded data. * diff --git a/tests/Fetch/Test/MessageTest.php b/tests/Fetch/Test/MessageTest.php index 0cb9a76..fb4de09 100644 --- a/tests/Fetch/Test/MessageTest.php +++ b/tests/Fetch/Test/MessageTest.php @@ -240,6 +240,21 @@ class MessageTest extends \PHPUnit_Framework_TestCase $this->assertEquals($sentFolderNumStart + 1, $server->numMessages(), 'Message moved into Sent Folder.'); } + public function testCharsetConvert() + { + $this->assertSame('Привет', Message::charsetConvert( + implode(array_map('chr', array(0xF0, 0xD2, 0xC9, 0xD7, 0xC5, 0xD4))), + 'koi8-r', + 'utf-8' + )); + + $this->assertSame('test', Message::charsetConvert('test', 'unk1', 'unk1'), 'Same charsets not try converting'); + $this->assertSame('', Message::charsetConvert('', 'unk1', 'unk1'), 'Empty text not try converting'); + + $this->assertSame(null, Message::charsetConvert('test', 'unk1', 'utf-8'), 'Null when source charset is unknown'); + $this->assertSame(null, Message::charsetConvert('test', 'utf-8', 'unk1'), 'Null when destination charset is unknown'); + } + public function testDecode() { $quotedPrintableDecoded = "Now's the time for all folk to come to the aid of their country.";