From 879f86c235846350b96a5337a2f1323d87140614 Mon Sep 17 00:00:00 2001 From: MarkBaker Date: Sun, 12 Jul 2015 23:16:41 +0100 Subject: [PATCH] GH-554 - Whitespace after toRichTextObject() --- Classes/PHPExcel/Helper/HTML.php | 128 +++++++++++++++---------------- Examples/42richText.php | 27 ++++--- changelog.txt | 1 + 3 files changed, 77 insertions(+), 79 deletions(-) diff --git a/Classes/PHPExcel/Helper/HTML.php b/Classes/PHPExcel/Helper/HTML.php index 28bf6b1..5b6ecfe 100644 --- a/Classes/PHPExcel/Helper/HTML.php +++ b/Classes/PHPExcel/Helper/HTML.php @@ -3,7 +3,7 @@ class PHPExcel_Helper_HTML { protected static $colourMap = array( - 'aliceblue' => 'f0f8ff', + 'aliceblue' => 'f0f8ff', 'antiquewhite' => 'faebd7', 'antiquewhite1' => 'ffefdb', 'antiquewhite2' => 'eedfcc', @@ -526,12 +526,12 @@ class PHPExcel_Helper_HTML protected $size; protected $color; - protected $bold = false; - protected $italic = false; - protected $underline = false; - protected $superscript = false; - protected $subscript = false; - protected $strikethrough = false; + protected $bold = false; + protected $italic = false; + protected $underline = false; + protected $superscript = false; + protected $subscript = false; + protected $strikethrough = false; protected $startTagCallbacks = array( 'font' => 'startFontTag', @@ -573,8 +573,7 @@ class PHPExcel_Helper_HTML protected $richTextObject; - protected function initialise() - { + protected function initialise() { $this->face = $this->size = $this->color = null; $this->bold = $this->italic = $this->underline = $this->superscript = $this->subscript = $this->strikethrough = false; @@ -583,30 +582,44 @@ class PHPExcel_Helper_HTML $this->stringData = ''; } - public function toRichTextObject($html) - { + public function toRichTextObject($html) { $this->initialise(); - // Create a new DOM object + // Create a new DOM object $dom = new domDocument; - // Load the HTML file into the DOM object + // Load the HTML file into the DOM object // Note the use of error suppression, because typically this will be an html fragment, so not fully valid markup $loaded = @$dom->loadHTML($html); - // Discard excess white space + // Discard excess white space $dom->preserveWhiteSpace = false; - $this->richTextObject = new PHPExcel_RichText(); + $this->richTextObject = new PHPExcel_RichText();; $this->parseElements($dom); + + // Clean any further spurious whitespace + $this->cleanWhitespace(); + return $this->richTextObject; } - protected function buildTextRun() - { - $text = $this->stringData; - if (trim($text) === '') { - return; + protected function cleanWhitespace() { + foreach($this->richTextObject->getRichTextElements() as $key => $element) { + $text = $element->getText(); + // Trim any leading spaces on the first run + if ($key == 0) { + $text = ltrim($text); + } + // Trim any spaces immediately after a line break + $text = preg_replace('/\n */mu', "\n", $text); + $element->setText($text); } + } + + protected function buildTextRun() { + $text = $this->stringData; + if (trim($text) === '') + return; $richtextRun = $this->richTextObject->createTextRun($this->stringData); if ($this->face) { @@ -616,7 +629,7 @@ class PHPExcel_Helper_HTML $richtextRun->getFont()->setSize($this->size); } if ($this->color) { - $richtextRun->getFont()->setColor(new PHPExcel_Style_Color('ff' . $this->color)); + $richtextRun->getFont()->setColor( new PHPExcel_Style_Color( 'ff' . $this->color ) ); } if ($this->bold) { $richtextRun->getFont()->setBold(true); @@ -639,22 +652,19 @@ class PHPExcel_Helper_HTML $this->stringData = ''; } - protected function rgbToColour($rgb) - { + protected function rgbToColour($rgb) { preg_match_all('/\d+/', $rgb, $values); - foreach ($values[0] as &$value) { + foreach($values[0] as &$value) { $value = str_pad(dechex($value), 2, '0', STR_PAD_LEFT); } return implode($values[0]); } - protected function colourNameLookup($rgb) - { + protected function colourNameLookup($rgb) { return self::$colourMap[$rgb]; } - protected function startFontTag($tag) - { + protected function startFontTag($tag) { foreach ($tag->attributes as $attribute) { $attributeName = strtolower($attribute->name); $attributeValue = $attribute->value; @@ -662,7 +672,7 @@ class PHPExcel_Helper_HTML if ($attributeName == 'color') { if (preg_match('/rgb\s*\(/', $attributeValue)) { $this->$attributeName = $this->rgbToColour($attributeValue); - } elseif (strpos(trim($attributeValue), '#') === 0) { + } elseif(strpos(trim($attributeValue), '#') === 0) { $this->$attributeName = ltrim($attributeValue, '#'); } else { $this->$attributeName = $this->colourNameLookup($attributeValue); @@ -673,85 +683,69 @@ class PHPExcel_Helper_HTML } } - protected function endFontTag() - { + protected function endFontTag() { $this->face = $this->size = $this->color = null; } - protected function startBoldTag() - { + protected function startBoldTag() { $this->bold = true; } - protected function endBoldTag() - { + protected function endBoldTag() { $this->bold = false; } - protected function startItalicTag() - { + protected function startItalicTag() { $this->italic = true; } - protected function endItalicTag() - { + protected function endItalicTag() { $this->italic = false; } - protected function startUnderlineTag() - { + protected function startUnderlineTag() { $this->underline = true; } - protected function endUnderlineTag() - { + protected function endUnderlineTag() { $this->underline = false; } - protected function startSubscriptTag() - { + protected function startSubscriptTag() { $this->subscript = true; } - protected function endSubscriptTag() - { + protected function endSubscriptTag() { $this->subscript = false; } - protected function startSuperscriptTag() - { + protected function startSuperscriptTag() { $this->superscript = true; } - protected function endSuperscriptTag() - { + protected function endSuperscriptTag() { $this->superscript = false; } - protected function startStrikethruTag() - { + protected function startStrikethruTag() { $this->strikethrough = true; } - protected function endStrikethruTag() - { + protected function endStrikethruTag() { $this->strikethrough = false; } - protected function breakTag() - { - $this->stringData .= PHP_EOL; + protected function breakTag() { + $this->stringData .= "\n"; } - protected function parseTextNode(DOMText $textNode) - { - $domText = preg_replace('/\s+/u', ' ', ltrim($textNode->nodeValue)); + protected function parseTextNode(DOMText $textNode) { + $domText = preg_replace('/\s+/u', ' ', str_replace(["\r", "\n"], ' ', $textNode->nodeValue)); $this->stringData .= $domText; $this->buildTextRun(); } - protected function handleCallback($element, $callbackTag, $callbacks) - { + protected function handleCallback($element, $callbackTag, $callbacks) { if (isset($callbacks[$callbackTag])) { $elementHandler = $callbacks[$callbackTag]; if (method_exists($this, $elementHandler)) { @@ -760,22 +754,20 @@ class PHPExcel_Helper_HTML } } - protected function parseElementNode(DOMElement $element) - { + protected function parseElementNode(DOMElement $element) { $callbackTag = strtolower($element->nodeName); $this->stack[] = $callbackTag; $this->handleCallback($element, $callbackTag, $this->startTagCallbacks); $this->parseElements($element); - $this->stringData .= ' '; +// $this->stringData .= ' '; array_pop($this->stack); $this->handleCallback($element, $callbackTag, $this->endTagCallbacks); } - protected function parseElements(DOMNode $element) - { + protected function parseElements(DOMNode $element) { foreach ($element->childNodes as $child) { if ($child instanceof DOMText) { $this->parseTextNode($child); diff --git a/Examples/42richText.php b/Examples/42richText.php index b8f994e..32bc401 100644 --- a/Examples/42richText.php +++ b/Examples/42richText.php @@ -2,7 +2,7 @@ /** * PHPExcel * - * Copyright (c) 2006 - 2015 PHPExcel + * Copyright (C) 2006 - 2014 PHPExcel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -20,7 +20,7 @@ * * @category PHPExcel * @package PHPExcel - * @copyright Copyright (c) 2006 - 2015 PHPExcel (http://www.codeplex.com/PHPExcel) + * @copyright Copyright (c) 2006 - 2014 PHPExcel (http://www.codeplex.com/PHPExcel) * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL * @version ##VERSION##, ##DATE## */ @@ -55,7 +55,7 @@ $objPHPExcel->getProperties()->setCreator("Maarten Balliauw") // Add some data echo date('H:i:s') , " Add some data" , EOL; -$html1=' +$html1 = '

My very first example of rich text
generated from html markup

@@ -64,11 +64,11 @@ while this block uses an underline.

-I want to eat healthy foodpizza. +I want to eat healthy food pizza. '; -$html2='

+$html2 = '

100°C is a hot temperature @@ -78,9 +78,11 @@ $html2='

'; -$html3='23 equals 8'; +$html3 = '23 equals 8'; -$html4='H2SO4 is the chemical formula for Sulphuric acid'; +$html4 = 'H2SO4 is the chemical formula for Sulphuric acid'; + +$html5 = 'bold, italic, bold+italic'; $wizard = new PHPExcel_Helper_HTML; @@ -97,7 +99,7 @@ $objPHPExcel->getActiveSheet()->getStyle('A1') $richText = $wizard->toRichTextObject($html2); -$objPHPExcel->setActiveSheetIndex(0) +$objPHPExcel->getActiveSheet() ->setCellValue('A2', $richText); $objPHPExcel->getActiveSheet()->getRowDimension(1)->setRowHeight(-1); @@ -105,12 +107,15 @@ $objPHPExcel->getActiveSheet()->getStyle('A2') ->getAlignment() ->setWrapText(true); -$objPHPExcel->setActiveSheetIndex(0) - ->setCellValue('A3', $wizard->toRichTextObject($html3)); +$objPHPExcel->getActiveSheet() + ->setCellValue('A3', $wizard->toRichTextObject($html3)); -$objPHPExcel->setActiveSheetIndex(0) +$objPHPExcel->getActiveSheet() ->setCellValue('A4', $wizard->toRichTextObject($html4)); +$objPHPExcel->getActiveSheet() + ->setCellValue('A5', $wizard->toRichTextObject($html5)); + // Rename worksheet echo date('H:i:s') , " Rename worksheet" , EOL; diff --git a/changelog.txt b/changelog.txt index 2f455c2..eaefe2a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -27,6 +27,7 @@ Planned for 1.8.2 - Bugfix: (MBaker) - Fix to getCell() method when cell reference includes a worksheet reference - Bugfix: (ncrypthic) Work Item GH-570 - Ignore inlineStr type if formula element exists - General: (umpirsky) Work Item GH-548 - Optimize vlookup() sort +- Bugfix: (MBaker) Work Item GH-554 - Whitespace after toRichTextObject() 2015-04-30 (v1.8.1):