MediaWiki
master
|
00001 <?php 00031 class SVGMetadataExtractor { 00032 static function getMetadata( $filename ) { 00033 $svg = new SVGReader( $filename ); 00034 return $svg->getMetadata(); 00035 } 00036 } 00037 00041 class SVGReader { 00042 const DEFAULT_WIDTH = 512; 00043 const DEFAULT_HEIGHT = 512; 00044 const NS_SVG = 'http://www.w3.org/2000/svg'; 00045 00046 private $reader = null; 00047 private $mDebug = false; 00048 private $metadata = Array(); 00049 00057 function __construct( $source ) { 00058 global $wgSVGMetadataCutoff; 00059 $this->reader = new XMLReader(); 00060 00061 // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. 00062 $size = filesize( $source ); 00063 if ( $size === false ) { 00064 throw new MWException( "Error getting filesize of SVG." ); 00065 } 00066 00067 if ( $size > $wgSVGMetadataCutoff ) { 00068 $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." ); 00069 $contents = file_get_contents( $source, false, null, -1, $wgSVGMetadataCutoff ); 00070 if ($contents === false) { 00071 throw new MWException( 'Error reading SVG file.' ); 00072 } 00073 $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING ); 00074 } else { 00075 $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING ); 00076 } 00077 00078 // Expand entities, since Adobe Illustrator uses them for xmlns 00079 // attributes (bug 31719). Note that libxml2 has some protection 00080 // against large recursive entity expansions so this is not as 00081 // insecure as it might appear to be. 00082 $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); 00083 00084 $this->metadata['width'] = self::DEFAULT_WIDTH; 00085 $this->metadata['height'] = self::DEFAULT_HEIGHT; 00086 00087 // The size in the units specified by the SVG file 00088 // (for the metadata box) 00089 // Per the SVG spec, if unspecified, default to '100%' 00090 $this->metadata['originalWidth'] = '100%'; 00091 $this->metadata['originalHeight'] = '100%'; 00092 00093 // Because we cut off the end of the svg making an invalid one. Complicated 00094 // try catch thing to make sure warnings get restored. Seems like there should 00095 // be a better way. 00096 wfSuppressWarnings(); 00097 try { 00098 $this->read(); 00099 } catch( Exception $e ) { 00100 // Note, if this happens, the width/height will be taken to be 0x0. 00101 // Should we consider it the default 512x512 instead? 00102 wfRestoreWarnings(); 00103 throw $e; 00104 } 00105 wfRestoreWarnings(); 00106 } 00107 00111 public function getMetadata() { 00112 return $this->metadata; 00113 } 00114 00120 public function read() { 00121 $keepReading = $this->reader->read(); 00122 00123 /* Skip until first element */ 00124 while( $keepReading && $this->reader->nodeType != XmlReader::ELEMENT ) { 00125 $keepReading = $this->reader->read(); 00126 } 00127 00128 if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) { 00129 throw new MWException( "Expected <svg> tag, got ". 00130 $this->reader->localName . " in NS " . $this->reader->namespaceURI ); 00131 } 00132 $this->debug( "<svg> tag is correct." ); 00133 $this->handleSVGAttribs(); 00134 00135 $exitDepth = $this->reader->depth; 00136 $keepReading = $this->reader->read(); 00137 while ( $keepReading ) { 00138 $tag = $this->reader->localName; 00139 $type = $this->reader->nodeType; 00140 $isSVG = ($this->reader->namespaceURI == self::NS_SVG); 00141 00142 $this->debug( "$tag" ); 00143 00144 if ( $isSVG && $tag == 'svg' && $type == XmlReader::END_ELEMENT && $this->reader->depth <= $exitDepth ) { 00145 break; 00146 } elseif ( $isSVG && $tag == 'title' ) { 00147 $this->readField( $tag, 'title' ); 00148 } elseif ( $isSVG && $tag == 'desc' ) { 00149 $this->readField( $tag, 'description' ); 00150 } elseif ( $isSVG && $tag == 'metadata' && $type == XmlReader::ELEMENT ) { 00151 $this->readXml( $tag, 'metadata' ); 00152 } elseif ( $isSVG && $tag == 'script' ) { 00153 // We normally do not allow scripted svgs. 00154 // However its possible to configure MW to let them 00155 // in, and such files should be considered animated. 00156 $this->metadata['animated'] = true; 00157 } elseif ( $tag !== '#text' ) { 00158 $this->debug( "Unhandled top-level XML tag $tag" ); 00159 00160 if ( !isset( $this->metadata['animated'] ) ) { 00161 // Recurse into children of current tag, looking for animation. 00162 $this->animateFilter( $tag ); 00163 } 00164 } 00165 00166 // Goto next element, which is sibling of current (Skip children). 00167 $keepReading = $this->reader->next(); 00168 } 00169 00170 $this->reader->close(); 00171 00172 return true; 00173 } 00174 00181 private function readField( $name, $metafield=null ) { 00182 $this->debug ( "Read field $metafield" ); 00183 if( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) { 00184 return; 00185 } 00186 $keepReading = $this->reader->read(); 00187 while( $keepReading ) { 00188 if( $this->reader->localName == $name && $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) { 00189 break; 00190 } elseif( $this->reader->nodeType == XmlReader::TEXT ){ 00191 $this->metadata[$metafield] = trim( $this->reader->value ); 00192 } 00193 $keepReading = $this->reader->read(); 00194 } 00195 } 00196 00203 private function readXml( $metafield=null ) { 00204 $this->debug ( "Read top level metadata" ); 00205 if( !$metafield || $this->reader->nodeType != XmlReader::ELEMENT ) { 00206 return; 00207 } 00208 // TODO: find and store type of xml snippet. metadata['metadataType'] = "rdf" 00209 if( method_exists( $this->reader, 'readInnerXML' ) ) { 00210 $this->metadata[$metafield] = trim( $this->reader->readInnerXML() ); 00211 } else { 00212 throw new MWException( "The PHP XMLReader extension does not come with readInnerXML() method. Your libxml is probably out of date (need 2.6.20 or later)." ); 00213 } 00214 $this->reader->next(); 00215 } 00216 00222 private function animateFilter( $name ) { 00223 $this->debug ( "animate filter for tag $name" ); 00224 if( $this->reader->nodeType != XmlReader::ELEMENT ) { 00225 return; 00226 } 00227 if ( $this->reader->isEmptyElement ) { 00228 return; 00229 } 00230 $exitDepth = $this->reader->depth; 00231 $keepReading = $this->reader->read(); 00232 while( $keepReading ) { 00233 if( $this->reader->localName == $name && $this->reader->depth <= $exitDepth 00234 && $this->reader->nodeType == XmlReader::END_ELEMENT ) { 00235 break; 00236 } elseif ( $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::ELEMENT ) { 00237 switch( $this->reader->localName ) { 00238 case 'script': 00239 // Normally we disallow files with 00240 // <script>, but its possible 00241 // to configure MW to disable 00242 // such checks. 00243 case 'animate': 00244 case 'set': 00245 case 'animateMotion': 00246 case 'animateColor': 00247 case 'animateTransform': 00248 $this->debug( "HOUSTON WE HAVE ANIMATION" ); 00249 $this->metadata['animated'] = true; 00250 break; 00251 } 00252 } 00253 $keepReading = $this->reader->read(); 00254 } 00255 } 00256 00257 private function throwXmlError( $err ) { 00258 $this->debug( "FAILURE: $err" ); 00259 wfDebug( "SVGReader XML error: $err\n" ); 00260 } 00261 00262 private function debug( $data ) { 00263 if( $this->mDebug ) { 00264 wfDebug( "SVGReader: $data\n" ); 00265 } 00266 } 00267 00268 private function warn( $data ) { 00269 wfDebug( "SVGReader: $data\n" ); 00270 } 00271 00272 private function notice( $data ) { 00273 wfDebug( "SVGReader WARN: $data\n" ); 00274 } 00275 00281 private function handleSVGAttribs( ) { 00282 $defaultWidth = self::DEFAULT_WIDTH; 00283 $defaultHeight = self::DEFAULT_HEIGHT; 00284 $aspect = 1.0; 00285 $width = null; 00286 $height = null; 00287 00288 if( $this->reader->getAttribute('viewBox') ) { 00289 // min-x min-y width height 00290 $viewBox = preg_split( '/\s+/', trim( $this->reader->getAttribute('viewBox') ) ); 00291 if( count( $viewBox ) == 4 ) { 00292 $viewWidth = $this->scaleSVGUnit( $viewBox[2] ); 00293 $viewHeight = $this->scaleSVGUnit( $viewBox[3] ); 00294 if( $viewWidth > 0 && $viewHeight > 0 ) { 00295 $aspect = $viewWidth / $viewHeight; 00296 $defaultHeight = $defaultWidth / $aspect; 00297 } 00298 } 00299 } 00300 if( $this->reader->getAttribute('width') ) { 00301 $width = $this->scaleSVGUnit( $this->reader->getAttribute('width'), $defaultWidth ); 00302 $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' ); 00303 } 00304 if( $this->reader->getAttribute('height') ) { 00305 $height = $this->scaleSVGUnit( $this->reader->getAttribute('height'), $defaultHeight ); 00306 $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' ); 00307 } 00308 00309 if( !isset( $width ) && !isset( $height ) ) { 00310 $width = $defaultWidth; 00311 $height = $width / $aspect; 00312 } elseif( isset( $width ) && !isset( $height ) ) { 00313 $height = $width / $aspect; 00314 } elseif( isset( $height ) && !isset( $width ) ) { 00315 $width = $height * $aspect; 00316 } 00317 00318 if( $width > 0 && $height > 0 ) { 00319 $this->metadata['width'] = intval( round( $width ) ); 00320 $this->metadata['height'] = intval( round( $height ) ); 00321 } 00322 } 00323 00332 static function scaleSVGUnit( $length, $viewportSize=512 ) { 00333 static $unitLength = array( 00334 'px' => 1.0, 00335 'pt' => 1.25, 00336 'pc' => 15.0, 00337 'mm' => 3.543307, 00338 'cm' => 35.43307, 00339 'in' => 90.0, 00340 'em' => 16.0, // fake it? 00341 'ex' => 12.0, // fake it? 00342 '' => 1.0, // "User units" pixels by default 00343 ); 00344 $matches = array(); 00345 if( preg_match( '/^\s*(\d+(?:\.\d+)?)(em|ex|px|pt|pc|cm|mm|in|%|)\s*$/', $length, $matches ) ) { 00346 $length = floatval( $matches[1] ); 00347 $unit = $matches[2]; 00348 if( $unit == '%' ) { 00349 return $length * 0.01 * $viewportSize; 00350 } else { 00351 return $length * $unitLength[$unit]; 00352 } 00353 } else { 00354 // Assume pixels 00355 return floatval( $length ); 00356 } 00357 } 00358 }