Generalize charset converting

This commit is contained in:
Sergey Linnik 2015-08-28 16:44:16 +03:00
parent 3fbee968a6
commit f2722da2b3
3 changed files with 58 additions and 22 deletions

View File

@ -37,7 +37,7 @@ final class MIME
foreach (imap_mime_header_decode($text) as $word) { foreach (imap_mime_header_decode($text) as $word) {
$ch = 'default' === $word->charset ? 'ascii' : $word->charset; $ch = 'default' === $word->charset ? 'ascii' : $word->charset;
$result .= iconv($ch, $targetCharset, $word->text); $result .= Message::charsetConvert($word->text, $ch, $targetCharset) ?: $word->text;
} }
return $result; return $result;

View File

@ -520,27 +520,8 @@ class Message
$messageBody = self::decode($messageBody, $structure->encoding); $messageBody = self::decode($messageBody, $structure->encoding);
if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) { if (!empty($parameters['charset'])) {
$mb_converted = false; $messageBody = self::charsetConvert($messageBody, $parameters['charset'], self::$charset) ?: $messageBody;
if (function_exists('mb_convert_encoding')) {
if (!in_array($parameters['charset'], mb_list_encodings())) {
if ($structure->encoding === 0) {
$parameters['charset'] = 'US-ASCII';
} else {
$parameters['charset'] = 'UTF-8';
}
}
$messageBody = @mb_convert_encoding($messageBody, self::$charset, $parameters['charset']);
$mb_converted = true;
}
if (!$mb_converted) {
$messageBodyConv = @iconv($parameters['charset'], self::$charset . self::$charsetFlag, $messageBody);
if ($messageBodyConv !== false) {
$messageBody = $messageBodyConv;
}
}
} }
if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) { if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) {
@ -575,6 +556,46 @@ class Message
} }
} }
/**
* @param string $text
* @param string $from
* @param string $to
*
* @return string|null
*/
public static function charsetConvert($text, $from, $to = null)
{
if (!$text) {
return '';
}
if (null === $to) {
$to = self::$charset;
}
$from = strtolower($from);
$to = strtolower($to);
if ($from === $to) {
return $text;
}
$converted = null;
if (!$converted && function_exists('mb_convert_encoding') && @mb_check_encoding($text, $from)) {
$converted = @mb_convert_encoding($text, $to, $from);
}
if (!$converted && function_exists('iconv')) {
$converted = @iconv($from, $to . self::$charsetFlag, $text);
}
if ($converted) {
return $converted;
}
return null;
}
/** /**
* This function takes in the message data and encoding type and returns the decoded data. * This function takes in the message data and encoding type and returns the decoded data.
* *

View File

@ -240,6 +240,21 @@ class MessageTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($sentFolderNumStart + 1, $server->numMessages(), 'Message moved into Sent Folder.'); $this->assertEquals($sentFolderNumStart + 1, $server->numMessages(), 'Message moved into Sent Folder.');
} }
public function testCharsetConvert()
{
$this->assertSame('Привет', Message::charsetConvert(
implode(array_map('chr', array(0xF0, 0xD2, 0xC9, 0xD7, 0xC5, 0xD4))),
'koi8-r',
'utf-8'
));
$this->assertSame('test', Message::charsetConvert('test', 'unk1', 'unk1'), 'Same charsets not try converting');
$this->assertSame('', Message::charsetConvert('', 'unk1', 'unk1'), 'Empty text not try converting');
$this->assertSame(null, Message::charsetConvert('test', 'unk1', 'utf-8'), 'Null when source charset is unknown');
$this->assertSame(null, Message::charsetConvert('test', 'utf-8', 'unk1'), 'Null when destination charset is unknown');
}
public function testDecode() public function testDecode()
{ {
$quotedPrintableDecoded = "Now's the time for all folk to come to the aid of their country."; $quotedPrintableDecoded = "Now's the time for all folk to come to the aid of their country.";