From 492cc4fb27b6ac104de3773f320ef72a12b7d878 Mon Sep 17 00:00:00 2001 From: Mark Baker Date: Mon, 30 Aug 2010 09:09:12 +0000 Subject: [PATCH] Feature: Support for Extended Workbook Properties in Excel2007, Excel5 and OOCalc Readers; Feature: Support for User-defined Workbook Properties in Excel2007 and OOCalc Readers git-svn-id: https://phpexcel.svn.codeplex.com/svn/trunk@60080 2327b42d-5241-43d6-9e2a-de5ac946f064 --- Classes/PHPExcel/Reader/Excel2007.php | 13 +- Classes/PHPExcel/Reader/Excel5.php | 249 +++++++++++++++++++++++--- Classes/PHPExcel/Reader/OOCalc.php | 33 ++++ Classes/PHPExcel/Shared/OLERead.php | 57 ++++++ changelog.txt | 4 +- 5 files changed, 324 insertions(+), 32 deletions(-) diff --git a/Classes/PHPExcel/Reader/Excel2007.php b/Classes/PHPExcel/Reader/Excel2007.php index 69e7ece..6186218 100644 --- a/Classes/PHPExcel/Reader/Excel2007.php +++ b/Classes/PHPExcel/Reader/Excel2007.php @@ -345,8 +345,19 @@ class PHPExcel_Reader_Excel2007 implements PHPExcel_Reader_IReader case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties": $xmlCore = simplexml_load_string($this->_getFromZipArchive($zip, "{$rel['Target']}")); if (is_object($xmlCore)) { - $xmlCore->registerXPathNamespace("vt", "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"); $docProps = $excel->getProperties(); + foreach ($xmlCore as $xmlProperty) { + $cellDataOfficeAttributes = $xmlProperty->attributes(); + if (isset($cellDataOfficeAttributes['name'])) { + $propertyName = (string) $cellDataOfficeAttributes['name']; + $cellDataOfficeChildren = $xmlProperty->children('http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes'); + $attributeType = $cellDataOfficeChildren->getName(); + $attributeValue = (string) $cellDataOfficeChildren->{$attributeType}; + $attributeValue = PHPExcel_DocumentProperties::convertProperty($attributeValue,$attributeType); + $attributeType = PHPExcel_DocumentProperties::convertPropertyType($attributeType); + $docProps->setCustomProperty($propertyName,$attributeValue,$attributeType); + } + } } break; diff --git a/Classes/PHPExcel/Reader/Excel5.php b/Classes/PHPExcel/Reader/Excel5.php index 96cca80..6e38d81 100644 --- a/Classes/PHPExcel/Reader/Excel5.php +++ b/Classes/PHPExcel/Reader/Excel5.php @@ -192,6 +192,13 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader */ private $_summaryInformation; + /** + * Extended Summary Information stream data. + * + * @var string + */ + private $_documentSummaryInformation; + /** * Workbook stream data. (Includes workbook globals substream as well as sheet substreams) * @@ -508,6 +515,9 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader // Read the summary information stream (containing meta data) $this->_readSummaryInformation(); + // Read the Additional document summary information stream (containing application-specific meta data) + $this->_readDocumentSummaryInformation(); + // total byte size of Excel data (workbook global substream + sheet substreams) $this->_dataSize = strlen($this->_data); @@ -930,6 +940,9 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader // Get summary information data $this->_summaryInformation = $ole->getSummaryInformation(); + + // Get additional document summary information data + $this->_documentSummaryInformation = $ole->getDocumentSummaryInformation(); } /** @@ -947,31 +960,34 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader // offset: 6; size: 2; OS indicator // offset: 8; size: 16 // offset: 24; size: 4; section count + $secCount = $this->_GetInt4d($this->_documentSummaryInformation, 24); // offset: 28; size: 16; first section's class id: e0 85 9f f2 f9 4f 68 10 ab 91 08 00 2b 27 b3 d9 // offset: 44; size: 4 + $secOffset = $this->_GetInt4d($this->_summaryInformation, 44); // section header - // offset: 48; size: 4; section length - $secLength = $this->_GetInt4d($this->_summaryInformation, 48); + // offset: $secOffset; size: 4; section length + $secLength = $this->_GetInt4d($this->_summaryInformation, $secOffset); - // offset: 52; size: 4; property count - $countProperties = $this->_GetInt4d($this->_summaryInformation, 52); + // offset: $secOffset+4; size: 4; property count + $countProperties = $this->_GetInt4d($this->_summaryInformation, $secOffset+4); // initialize code page (used to resolve string values) $codePage = 'CP1252'; - // offset: 56; size: var + // offset: ($secOffset+8); size: var // loop through property decarations and properties for ($i = 0; $i < $countProperties; ++$i) { - // offset: 56 + 8 * $i; size: 4; property ID - $id = $this->_GetInt4d($this->_summaryInformation, 56 + 8 * $i); + // offset: ($secOffset+8) + (8 * $i); size: 4; property ID + $id = $this->_GetInt4d($this->_summaryInformation, ($secOffset+8) + (8 * $i)); - // offset: 60 + 8 * $i; size: 4; offset from beginning of section (48) - $offset = $this->_GetInt4d($this->_summaryInformation, 60 + 8 * $i); + // Use value of property id as appropriate + // offset: ($secOffset+12) + (8 * $i); size: 4; offset from beginning of section (48) + $offset = $this->_GetInt4d($this->_summaryInformation, ($secOffset+12) + (8 * $i)); - $type = $this->_GetInt4d($this->_summaryInformation, 48 + $offset); + $type = $this->_GetInt4d($this->_summaryInformation, $secOffset + $offset); // initialize property value $value = null; @@ -979,11 +995,11 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader // extract property value based on property type switch ($type) { case 0x02: // 2 byte signed integer - $value = $this->_GetInt2d($this->_summaryInformation, 52 + $offset); + $value = $this->_GetInt2d($this->_summaryInformation, $secOffset + 4 + $offset); break; case 0x03: // 4 byte signed integer - $value = $this->_GetInt4d($this->_summaryInformation, 52 + $offset); + $value = $this->_GetInt4d($this->_summaryInformation, $secOffset + 4 + $offset); break; case 0x13: // 4 byte unsigned integer @@ -991,15 +1007,15 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader break; case 0x1E: // null-terminated string prepended by dword string length - $byteLength = $this->_GetInt4d($this->_summaryInformation, 52 + $offset); - $value = substr($this->_summaryInformation, 56 + $offset, $byteLength); + $byteLength = $this->_GetInt4d($this->_summaryInformation, $secOffset + 4 + $offset); + $value = substr($this->_summaryInformation, $secOffset + 8 + $offset, $byteLength); $value = PHPExcel_Shared_String::ConvertEncoding($value, 'UTF-8', $codePage); $value = rtrim($value); break; case 0x40: // Filetime (64-bit value representing the number of 100-nanosecond intervals since January 1, 1601) // PHP-time - $value = PHPExcel_Shared_OLE::OLE2LocalDate(substr($this->_summaryInformation, 52 + $offset, 8)); + $value = PHPExcel_Shared_OLE::OLE2LocalDate(substr($this->_summaryInformation, $secOffset + 4 + $offset, 8)); break; case 0x47: // Clipboard format @@ -1007,51 +1023,225 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader break; } - // Use value of property id as appropriate switch ($id) { - case 0x01: // Code Page + case 0x01: // Code Page $codePage = PHPExcel_Shared_CodePage::NumberToName($value); break; - case 0x02: // Title + case 0x02: // Title $this->_phpExcel->getProperties()->setTitle($value); break; - case 0x03: // Subject + case 0x03: // Subject $this->_phpExcel->getProperties()->setSubject($value); break; - case 0x04: // Author (Creator) + case 0x04: // Author (Creator) $this->_phpExcel->getProperties()->setCreator($value); break; - case 0x05: // Keywords + case 0x05: // Keywords $this->_phpExcel->getProperties()->setKeywords($value); break; - case 0x06: // Comments (Description) + case 0x06: // Comments (Description) $this->_phpExcel->getProperties()->setDescription($value); break; - case 0x08: // Last Saved By (LastModifiedBy) + case 0x07: // Template + // Not supported by PHPExcel + break; + + case 0x08: // Last Saved By (LastModifiedBy) $this->_phpExcel->getProperties()->setLastModifiedBy($value); break; - case 0x09: // Revision - // not supported by PHPExcel + case 0x09: // Revision + // Not supported by PHPExcel break; - case 0x0C: // Created + case 0x0A: // Total Editing Time + // Not supported by PHPExcel + break; + + case 0x0B: // Last Printed + // Not supported by PHPExcel + break; + + case 0x0C: // Created Date/Time $this->_phpExcel->getProperties()->setCreated($value); break; - case 0x0D: // Modified + case 0x0D: // Modified Date/Time $this->_phpExcel->getProperties()->setModified($value); break; - case 0x12: // Name of creating application - // not supported by PHPExcel + case 0x0E: // Number of Pages + // Not supported by PHPExcel break; + + case 0x0F: // Number of Words + // Not supported by PHPExcel + break; + + case 0x10: // Number of Characters + // Not supported by PHPExcel + break; + + case 0x11: // Thumbnail + // Not supported by PHPExcel + break; + + case 0x12: // Name of creating application + // Not supported by PHPExcel + break; + + case 0x13: // Security + // Not supported by PHPExcel + break; + + } + } + } + + /** + * Read additional document summary information + */ + private function _readDocumentSummaryInformation() + { + if (!isset($this->_documentSummaryInformation)) { + return; + } + + // offset: 0; size: 2; must be 0xFE 0xFF (UTF-16 LE byte order mark) + // offset: 2; size: 2; + // offset: 4; size: 2; OS version + // offset: 6; size: 2; OS indicator + // offset: 8; size: 16 + // offset: 24; size: 4; section count + $secCount = $this->_GetInt4d($this->_documentSummaryInformation, 24); + + // offset: 28; size: 16; first section's class id: 02 d5 cd d5 9c 2e 1b 10 93 97 08 00 2b 2c f9 ae + // offset: 44; size: 4; first section offset + $secOffset = $this->_GetInt4d($this->_documentSummaryInformation, 44); + + // section header + // offset: $secOffset; size: 4; section length + $secLength = $this->_GetInt4d($this->_documentSummaryInformation, $secOffset); + + // offset: $secOffset+4; size: 4; property count + $countProperties = $this->_GetInt4d($this->_documentSummaryInformation, $secOffset+4); + + // initialize code page (used to resolve string values) + $codePage = 'CP1252'; + + // offset: ($secOffset+8); size: var + // loop through property decarations and properties + for ($i = 0; $i < $countProperties; ++$i) { + // offset: ($secOffset+8) + (8 * $i); size: 4; property ID + $id = $this->_GetInt4d($this->_documentSummaryInformation, ($secOffset+8) + (8 * $i)); + + // Use value of property id as appropriate + // offset: 60 + 8 * $i; size: 4; offset from beginning of section (48) + $offset = $this->_GetInt4d($this->_documentSummaryInformation, ($secOffset+12) + (8 * $i)); + + $type = $this->_GetInt4d($this->_documentSummaryInformation, $secOffset + $offset); + + // initialize property value + $value = null; + + // extract property value based on property type + switch ($type) { + case 0x02: // 2 byte signed integer + $value = $this->_GetInt2d($this->_documentSummaryInformation, $secOffset + 4 + $offset); + break; + + case 0x03: // 4 byte signed integer + $value = $this->_GetInt4d($this->_documentSummaryInformation, $secOffset + 4 + $offset); + break; + + case 0x13: // 4 byte unsigned integer + // not needed yet, fix later if necessary + break; + + case 0x1E: // null-terminated string prepended by dword string length + $byteLength = $this->_GetInt4d($this->_documentSummaryInformation, $secOffset + 4 + $offset); + $value = substr($this->_documentSummaryInformation, $secOffset + 8 + $offset, $byteLength); + $value = PHPExcel_Shared_String::ConvertEncoding($value, 'UTF-8', $codePage); + $value = rtrim($value); + break; + + case 0x40: // Filetime (64-bit value representing the number of 100-nanosecond intervals since January 1, 1601) + // PHP-Time + $value = PHPExcel_Shared_OLE::OLE2LocalDate(substr($this->_documentSummaryInformation, $secOffset + 4 + $offset, 8)); + break; + + case 0x47: // Clipboard format + // not needed yet, fix later if necessary + break; + } + + switch ($id) { + case 0x02: // Category + $this->_phpExcel->getProperties()->setCategory($value); + break; + + case 0x03: // Presentation Target + // Not supported by PHPExcel + break; + + case 0x04: // Bytes + // Not supported by PHPExcel + break; + + case 0x05: // Lines + // Not supported by PHPExcel + break; + + case 0x06: // Paragraphs + // Not supported by PHPExcel + break; + + case 0x07: // Slides + // Not supported by PHPExcel + break; + + case 0x08: // Notes + // Not supported by PHPExcel + break; + + case 0x09: // Hidden Slides + // Not supported by PHPExcel + break; + + case 0x0A: // MM Clips + // Not supported by PHPExcel + break; + + case 0x0B: // Scale Crop + // Not supported by PHPExcel + break; + + case 0x0C: // Heading Pairs + // Not supported by PHPExcel + break; + + case 0x0D: // Titles of Parts + // Not supported by PHPExcel + break; + + case 0x0E: // Manager + $this->_phpExcel->getProperties()->setManager($value); + break; + + case 0x0F: // Company + $this->_phpExcel->getProperties()->setCompany($value); + break; + + case 0x10: // Links up-to-date + // Not supported by PHPExcel + break; + } } } @@ -6182,3 +6372,4 @@ class PHPExcel_Reader_Excel5 implements PHPExcel_Reader_IReader } } + diff --git a/Classes/PHPExcel/Reader/OOCalc.php b/Classes/PHPExcel/Reader/OOCalc.php index aa91692..df02722 100644 --- a/Classes/PHPExcel/Reader/OOCalc.php +++ b/Classes/PHPExcel/Reader/OOCalc.php @@ -297,9 +297,42 @@ class PHPExcel_Reader_OOCalc implements PHPExcel_Reader_IReader // echo '
'; // switch ($propertyName) { + case 'initial-creator' : + $docProps->setCreator($propertyValue); + break; case 'keyword' : $docProps->setKeywords($propertyValue); break; + case 'creation-date' : + $creationDate = strtotime($propertyValue); + $docProps->setCreated($creationDate); + break; + case 'user-defined' : + $propertyValueType = PHPExcel_DocumentProperties::PROPERTY_TYPE_STRING; + foreach ($propertyValueAttributes as $key => $value) { + if ($key == 'name') { + $propertyValueName = (string) $value; + } elseif($key == 'value-type') { + switch ($value) { + case 'date' : + $propertyValue = PHPExcel_DocumentProperties::convertProperty($propertyValue,'date'); + $propertyValueType = PHPExcel_DocumentProperties::PROPERTY_TYPE_DATE; + break; + case 'boolean' : + $propertyValue = PHPExcel_DocumentProperties::convertProperty($propertyValue,'bool'); + $propertyValueType = PHPExcel_DocumentProperties::PROPERTY_TYPE_BOOLEAN; + break; + case 'float' : + $propertyValue = PHPExcel_DocumentProperties::convertProperty($propertyValue,'r4'); + $propertyValueType = PHPExcel_DocumentProperties::PROPERTY_TYPE_FLOAT; + break; + default : + $propertyValueType = PHPExcel_DocumentProperties::PROPERTY_TYPE_STRING; + } + } + } + $docProps->setCustomProperty($propertyValueName,$propertyValue,$propertyValueType); + break; } } } diff --git a/Classes/PHPExcel/Shared/OLERead.php b/Classes/PHPExcel/Shared/OLERead.php index fdf802c..d0b7268 100644 --- a/Classes/PHPExcel/Shared/OLERead.php +++ b/Classes/PHPExcel/Shared/OLERead.php @@ -264,6 +264,58 @@ class PHPExcel_Shared_OLERead { } } + /** + * Extract binary stream data, additional document summary information + * + * @return string|null + */ + public function getDocumentSummaryInformation() + { + if (!isset($this->documentSummaryInformation)) { + return null; + } + + if ($this->props[$this->documentSummaryInformation]['size'] < self::SMALL_BLOCK_THRESHOLD){ + $rootdata = $this->_readData($this->props[$this->rootentry]['startBlock']); + + $streamData = ''; + $block = $this->props[$this->documentSummaryInformation]['startBlock']; + + $pos = 0; + while ($block != -2) { + $pos = $block * self::SMALL_BLOCK_SIZE; + $streamData .= substr($rootdata, $pos, self::SMALL_BLOCK_SIZE); + + $block = $this->smallBlockChain[$block]; + } + + return $streamData; + + + } else { + $numBlocks = $this->props[$this->documentSummaryInformation]['size'] / self::BIG_BLOCK_SIZE; + if ($this->props[$this->documentSummaryInformation]['size'] % self::BIG_BLOCK_SIZE != 0) { + ++$numBlocks; + } + + if ($numBlocks == 0) return ''; + + + $streamData = ''; + $block = $this->props[$this->documentSummaryInformation]['startBlock']; + + $pos = 0; + + while ($block != -2) { + $pos = ($block + 1) * self::BIG_BLOCK_SIZE; + $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE); + $block = $this->bigBlockChain[$block]; + } + + return $streamData; + } + } + /** * Read a standard stream (by joining sectors using information from SAT) * @@ -336,6 +388,11 @@ class PHPExcel_Shared_OLERead { $this->summaryInformation = count($this->props) - 1; } + // Additional Document Summary information + if ($name == chr(5) . 'DocumentSummaryInformation') { + $this->documentSummaryInformation = count($this->props) - 1; + } + $offset += self::PROPERTY_STORAGE_BLOCK_SIZE; } diff --git a/changelog.txt b/changelog.txt index bfcfa57..9aaa195 100644 --- a/changelog.txt +++ b/changelog.txt @@ -24,8 +24,8 @@ Fixed in SVN: -- Bugfix: (Progi1984) Workitem 7895 - Excel5 : Formula : Error constant -- Bugfix: (Progi1984) Workitem 7895 - Excel5 : Formula : Concatenation operator +- Feature: (MBaker) Support for Extended Workbook Properties in Excel2007, Excel5 and OOCalc Readers; support for User-defined Workbook Properties in Excel2007 and OOCalc Readers + 2010-08-26 (v1.7.4): - Bugfix: (Progi1984) Workitem 7895 - Excel5 : Formula : Power