GH-554 - Whitespace after toRichTextObject()

This commit is contained in:
MarkBaker 2015-07-12 23:16:41 +01:00
parent 7ced78b0be
commit 879f86c235
3 changed files with 77 additions and 79 deletions

View File

@ -3,7 +3,7 @@
class PHPExcel_Helper_HTML class PHPExcel_Helper_HTML
{ {
protected static $colourMap = array( protected static $colourMap = array(
'aliceblue' => 'f0f8ff', 'aliceblue' => 'f0f8ff',
'antiquewhite' => 'faebd7', 'antiquewhite' => 'faebd7',
'antiquewhite1' => 'ffefdb', 'antiquewhite1' => 'ffefdb',
'antiquewhite2' => 'eedfcc', 'antiquewhite2' => 'eedfcc',
@ -526,12 +526,12 @@ class PHPExcel_Helper_HTML
protected $size; protected $size;
protected $color; protected $color;
protected $bold = false; protected $bold = false;
protected $italic = false; protected $italic = false;
protected $underline = false; protected $underline = false;
protected $superscript = false; protected $superscript = false;
protected $subscript = false; protected $subscript = false;
protected $strikethrough = false; protected $strikethrough = false;
protected $startTagCallbacks = array( protected $startTagCallbacks = array(
'font' => 'startFontTag', 'font' => 'startFontTag',
@ -573,8 +573,7 @@ class PHPExcel_Helper_HTML
protected $richTextObject; protected $richTextObject;
protected function initialise() protected function initialise() {
{
$this->face = $this->size = $this->color = null; $this->face = $this->size = $this->color = null;
$this->bold = $this->italic = $this->underline = $this->superscript = $this->subscript = $this->strikethrough = false; $this->bold = $this->italic = $this->underline = $this->superscript = $this->subscript = $this->strikethrough = false;
@ -583,30 +582,44 @@ class PHPExcel_Helper_HTML
$this->stringData = ''; $this->stringData = '';
} }
public function toRichTextObject($html) public function toRichTextObject($html) {
{
$this->initialise(); $this->initialise();
// Create a new DOM object // Create a new DOM object
$dom = new domDocument; $dom = new domDocument;
// Load the HTML file into the DOM object // Load the HTML file into the DOM object
// Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup // Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup
$loaded = @$dom->loadHTML($html); $loaded = @$dom->loadHTML($html);
// Discard excess white space // Discard excess white space
$dom->preserveWhiteSpace = false; $dom->preserveWhiteSpace = false;
$this->richTextObject = new PHPExcel_RichText(); $this->richTextObject = new PHPExcel_RichText();;
$this->parseElements($dom); $this->parseElements($dom);
// Clean any further spurious whitespace
$this->cleanWhitespace();
return $this->richTextObject; return $this->richTextObject;
} }
protected function buildTextRun() protected function cleanWhitespace() {
{ foreach($this->richTextObject->getRichTextElements() as $key => $element) {
$text = $this->stringData; $text = $element->getText();
if (trim($text) === '') { // Trim any leading spaces on the first run
return; if ($key == 0) {
$text = ltrim($text);
}
// Trim any spaces immediately after a line break
$text = preg_replace('/\n */mu', "\n", $text);
$element->setText($text);
} }
}
protected function buildTextRun() {
$text = $this->stringData;
if (trim($text) === '')
return;
$richtextRun = $this->richTextObject->createTextRun($this->stringData); $richtextRun = $this->richTextObject->createTextRun($this->stringData);
if ($this->face) { if ($this->face) {
@ -616,7 +629,7 @@ class PHPExcel_Helper_HTML
$richtextRun->getFont()->setSize($this->size); $richtextRun->getFont()->setSize($this->size);
} }
if ($this->color) { if ($this->color) {
$richtextRun->getFont()->setColor(new PHPExcel_Style_Color('ff' . $this->color)); $richtextRun->getFont()->setColor( new PHPExcel_Style_Color( 'ff' . $this->color ) );
} }
if ($this->bold) { if ($this->bold) {
$richtextRun->getFont()->setBold(true); $richtextRun->getFont()->setBold(true);
@ -639,22 +652,19 @@ class PHPExcel_Helper_HTML
$this->stringData = ''; $this->stringData = '';
} }
protected function rgbToColour($rgb) protected function rgbToColour($rgb) {
{
preg_match_all('/\d+/', $rgb, $values); preg_match_all('/\d+/', $rgb, $values);
foreach ($values[0] as &$value) { foreach($values[0] as &$value) {
$value = str_pad(dechex($value), 2, '0', STR_PAD_LEFT); $value = str_pad(dechex($value), 2, '0', STR_PAD_LEFT);
} }
return implode($values[0]); return implode($values[0]);
} }
protected function colourNameLookup($rgb) protected function colourNameLookup($rgb) {
{
return self::$colourMap[$rgb]; return self::$colourMap[$rgb];
} }
protected function startFontTag($tag) protected function startFontTag($tag) {
{
foreach ($tag->attributes as $attribute) { foreach ($tag->attributes as $attribute) {
$attributeName = strtolower($attribute->name); $attributeName = strtolower($attribute->name);
$attributeValue = $attribute->value; $attributeValue = $attribute->value;
@ -662,7 +672,7 @@ class PHPExcel_Helper_HTML
if ($attributeName == 'color') { if ($attributeName == 'color') {
if (preg_match('/rgb\s*\(/', $attributeValue)) { if (preg_match('/rgb\s*\(/', $attributeValue)) {
$this->$attributeName = $this->rgbToColour($attributeValue); $this->$attributeName = $this->rgbToColour($attributeValue);
} elseif (strpos(trim($attributeValue), '#') === 0) { } elseif(strpos(trim($attributeValue), '#') === 0) {
$this->$attributeName = ltrim($attributeValue, '#'); $this->$attributeName = ltrim($attributeValue, '#');
} else { } else {
$this->$attributeName = $this->colourNameLookup($attributeValue); $this->$attributeName = $this->colourNameLookup($attributeValue);
@ -673,85 +683,69 @@ class PHPExcel_Helper_HTML
} }
} }
protected function endFontTag() protected function endFontTag() {
{
$this->face = $this->size = $this->color = null; $this->face = $this->size = $this->color = null;
} }
protected function startBoldTag() protected function startBoldTag() {
{
$this->bold = true; $this->bold = true;
} }
protected function endBoldTag() protected function endBoldTag() {
{
$this->bold = false; $this->bold = false;
} }
protected function startItalicTag() protected function startItalicTag() {
{
$this->italic = true; $this->italic = true;
} }
protected function endItalicTag() protected function endItalicTag() {
{
$this->italic = false; $this->italic = false;
} }
protected function startUnderlineTag() protected function startUnderlineTag() {
{
$this->underline = true; $this->underline = true;
} }
protected function endUnderlineTag() protected function endUnderlineTag() {
{
$this->underline = false; $this->underline = false;
} }
protected function startSubscriptTag() protected function startSubscriptTag() {
{
$this->subscript = true; $this->subscript = true;
} }
protected function endSubscriptTag() protected function endSubscriptTag() {
{
$this->subscript = false; $this->subscript = false;
} }
protected function startSuperscriptTag() protected function startSuperscriptTag() {
{
$this->superscript = true; $this->superscript = true;
} }
protected function endSuperscriptTag() protected function endSuperscriptTag() {
{
$this->superscript = false; $this->superscript = false;
} }
protected function startStrikethruTag() protected function startStrikethruTag() {
{
$this->strikethrough = true; $this->strikethrough = true;
} }
protected function endStrikethruTag() protected function endStrikethruTag() {
{
$this->strikethrough = false; $this->strikethrough = false;
} }
protected function breakTag() protected function breakTag() {
{ $this->stringData .= "\n";
$this->stringData .= PHP_EOL;
} }
protected function parseTextNode(DOMText $textNode) protected function parseTextNode(DOMText $textNode) {
{ $domText = preg_replace('/\s+/u', ' ', str_replace(["\r", "\n"], ' ', $textNode->nodeValue));
$domText = preg_replace('/\s+/u', ' ', ltrim($textNode->nodeValue));
$this->stringData .= $domText; $this->stringData .= $domText;
$this->buildTextRun(); $this->buildTextRun();
} }
protected function handleCallback($element, $callbackTag, $callbacks) protected function handleCallback($element, $callbackTag, $callbacks) {
{
if (isset($callbacks[$callbackTag])) { if (isset($callbacks[$callbackTag])) {
$elementHandler = $callbacks[$callbackTag]; $elementHandler = $callbacks[$callbackTag];
if (method_exists($this, $elementHandler)) { if (method_exists($this, $elementHandler)) {
@ -760,22 +754,20 @@ class PHPExcel_Helper_HTML
} }
} }
protected function parseElementNode(DOMElement $element) protected function parseElementNode(DOMElement $element) {
{
$callbackTag = strtolower($element->nodeName); $callbackTag = strtolower($element->nodeName);
$this->stack[] = $callbackTag; $this->stack[] = $callbackTag;
$this->handleCallback($element, $callbackTag, $this->startTagCallbacks); $this->handleCallback($element, $callbackTag, $this->startTagCallbacks);
$this->parseElements($element); $this->parseElements($element);
$this->stringData .= ' '; // $this->stringData .= ' ';
array_pop($this->stack); array_pop($this->stack);
$this->handleCallback($element, $callbackTag, $this->endTagCallbacks); $this->handleCallback($element, $callbackTag, $this->endTagCallbacks);
} }
protected function parseElements(DOMNode $element) protected function parseElements(DOMNode $element) {
{
foreach ($element->childNodes as $child) { foreach ($element->childNodes as $child) {
if ($child instanceof DOMText) { if ($child instanceof DOMText) {
$this->parseTextNode($child); $this->parseTextNode($child);

View File

@ -2,7 +2,7 @@
/** /**
* PHPExcel * PHPExcel
* *
* Copyright (c) 2006 - 2015 PHPExcel * Copyright (C) 2006 - 2014 PHPExcel
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
@ -20,7 +20,7 @@
* *
* @category PHPExcel * @category PHPExcel
* @package PHPExcel * @package PHPExcel
* @copyright Copyright (c) 2006 - 2015 PHPExcel (http://www.codeplex.com/PHPExcel) * @copyright Copyright (c) 2006 - 2014 PHPExcel (http://www.codeplex.com/PHPExcel)
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
* @version ##VERSION##, ##DATE## * @version ##VERSION##, ##DATE##
*/ */
@ -55,7 +55,7 @@ $objPHPExcel->getProperties()->setCreator("Maarten Balliauw")
// Add some data // Add some data
echo date('H:i:s') , " Add some data" , EOL; echo date('H:i:s') , " Add some data" , EOL;
$html1='<font color="#0000ff"> $html1 = '<font color="#0000ff">
<h1 align="center">My very first example of rich text<br />generated from html markup</h1> <h1 align="center">My very first example of rich text<br />generated from html markup</h1>
<p> <p>
<font size="14" COLOR="rgb(0,255,128)"> <font size="14" COLOR="rgb(0,255,128)">
@ -64,11 +64,11 @@ while this block uses an <u>underline</u>.
</font> </font>
</p> </p>
<p align="right"><font size="9" color="red"> <p align="right"><font size="9" color="red">
I want to eat <ins><del>healthy food</del><strong>pizza</strong></ins>. I want to eat <ins><del>healthy food</del> <strong>pizza</strong></ins>.
</font> </font>
'; ';
$html2='<p> $html2 = '<p>
<font color="#ff0000"> <font color="#ff0000">
100&deg;C is a hot temperature 100&deg;C is a hot temperature
</font> </font>
@ -78,9 +78,11 @@ $html2='<p>
</font> </font>
</p>'; </p>';
$html3='2<sup>3</sup> equals 8'; $html3 = '2<sup>3</sup> equals 8';
$html4='H<sub>2</sub>SO<sub>4</sub> is the chemical formula for Sulphuric acid'; $html4 = 'H<sub>2</sub>SO<sub>4</sub> is the chemical formula for Sulphuric acid';
$html5 = '<strong>bold</strong>, <em>italic</em>, <strong><em>bold+italic</em></strong>';
$wizard = new PHPExcel_Helper_HTML; $wizard = new PHPExcel_Helper_HTML;
@ -97,7 +99,7 @@ $objPHPExcel->getActiveSheet()->getStyle('A1')
$richText = $wizard->toRichTextObject($html2); $richText = $wizard->toRichTextObject($html2);
$objPHPExcel->setActiveSheetIndex(0) $objPHPExcel->getActiveSheet()
->setCellValue('A2', $richText); ->setCellValue('A2', $richText);
$objPHPExcel->getActiveSheet()->getRowDimension(1)->setRowHeight(-1); $objPHPExcel->getActiveSheet()->getRowDimension(1)->setRowHeight(-1);
@ -105,12 +107,15 @@ $objPHPExcel->getActiveSheet()->getStyle('A2')
->getAlignment() ->getAlignment()
->setWrapText(true); ->setWrapText(true);
$objPHPExcel->setActiveSheetIndex(0) $objPHPExcel->getActiveSheet()
->setCellValue('A3', $wizard->toRichTextObject($html3)); ->setCellValue('A3', $wizard->toRichTextObject($html3));
$objPHPExcel->setActiveSheetIndex(0) $objPHPExcel->getActiveSheet()
->setCellValue('A4', $wizard->toRichTextObject($html4)); ->setCellValue('A4', $wizard->toRichTextObject($html4));
$objPHPExcel->getActiveSheet()
->setCellValue('A5', $wizard->toRichTextObject($html5));
// Rename worksheet // Rename worksheet
echo date('H:i:s') , " Rename worksheet" , EOL; echo date('H:i:s') , " Rename worksheet" , EOL;

View File

@ -27,6 +27,7 @@ Planned for 1.8.2
- Bugfix: (MBaker) - Fix to getCell() method when cell reference includes a worksheet reference - Bugfix: (MBaker) - Fix to getCell() method when cell reference includes a worksheet reference
- Bugfix: (ncrypthic) Work Item GH-570 - Ignore inlineStr type if formula element exists - Bugfix: (ncrypthic) Work Item GH-570 - Ignore inlineStr type if formula element exists
- General: (umpirsky) Work Item GH-548 - Optimize vlookup() sort - General: (umpirsky) Work Item GH-548 - Optimize vlookup() sort
- Bugfix: (MBaker) Work Item GH-554 - Whitespace after toRichTextObject()
2015-04-30 (v1.8.1): 2015-04-30 (v1.8.1):