MediaWiki  master
HttpFunctions.php
Go to the documentation of this file.
00001 <?php
00032 class Http {
00033         static $httpEngine = false;
00034 
00059         public static function request( $method, $url, $options = array() ) {
00060                 wfDebug( "HTTP: $method: $url\n" );
00061                 $options['method'] = strtoupper( $method );
00062 
00063                 if ( !isset( $options['timeout'] ) ) {
00064                         $options['timeout'] = 'default';
00065                 }
00066 
00067                 $req = MWHttpRequest::factory( $url, $options );
00068                 $status = $req->execute();
00069 
00070                 if ( $status->isOK() ) {
00071                         return $req->getContent();
00072                 } else {
00073                         return false;
00074                 }
00075         }
00076 
00086         public static function get( $url, $timeout = 'default', $options = array() ) {
00087                 $options['timeout'] = $timeout;
00088                 return Http::request( 'GET', $url, $options );
00089         }
00090 
00099         public static function post( $url, $options = array() ) {
00100                 return Http::request( 'POST', $url, $options );
00101         }
00102 
00109         public static function isLocalURL( $url ) {
00110                 global $wgCommandLineMode, $wgConf;
00111 
00112                 if ( $wgCommandLineMode ) {
00113                         return false;
00114                 }
00115 
00116                 // Extract host part
00117                 $matches = array();
00118                 if ( preg_match( '!^http://([\w.-]+)[/:].*$!', $url, $matches ) ) {
00119                         $host = $matches[1];
00120                         // Split up dotwise
00121                         $domainParts = explode( '.', $host );
00122                         // Check if this domain or any superdomain is listed in $wgConf as a local virtual host
00123                         $domainParts = array_reverse( $domainParts );
00124 
00125                         $domain = '';
00126                         for ( $i = 0; $i < count( $domainParts ); $i++ ) {
00127                                 $domainPart = $domainParts[$i];
00128                                 if ( $i == 0 ) {
00129                                         $domain = $domainPart;
00130                                 } else {
00131                                         $domain = $domainPart . '.' . $domain;
00132                                 }
00133 
00134                                 if ( $wgConf->isLocalVHost( $domain ) ) {
00135                                         return true;
00136                                 }
00137                         }
00138                 }
00139 
00140                 return false;
00141         }
00142 
00147         public static function userAgent() {
00148                 global $wgVersion;
00149                 return "MediaWiki/$wgVersion";
00150         }
00151 
00164         public static function isValidURI( $uri ) {
00165                 return preg_match(
00166                         '/^https?:\/\/[^\/\s]\S*$/D',
00167                         $uri
00168                 );
00169         }
00170 }
00171 
00179 class MWHttpRequest {
00180         const SUPPORTS_FILE_POSTS = false;
00181 
00182         protected $content;
00183         protected $timeout = 'default';
00184         protected $headersOnly = null;
00185         protected $postData = null;
00186         protected $proxy = null;
00187         protected $noProxy = false;
00188         protected $sslVerifyHost = true;
00189         protected $sslVerifyCert = true;
00190         protected $caInfo = null;
00191         protected $method = "GET";
00192         protected $reqHeaders = array();
00193         protected $url;
00194         protected $parsedUrl;
00195         protected $callback;
00196         protected $maxRedirects = 5;
00197         protected $followRedirects = false;
00198 
00202         protected $cookieJar;
00203 
00204         protected $headerList = array();
00205         protected $respVersion = "0.9";
00206         protected $respStatus = "200 Ok";
00207         protected $respHeaders = array();
00208 
00209         public $status;
00210 
00215         protected function __construct( $url, $options = array() ) {
00216                 global $wgHTTPTimeout;
00217 
00218                 $this->url = wfExpandUrl( $url, PROTO_HTTP );
00219                 $this->parsedUrl = wfParseUrl( $this->url );
00220 
00221                 if ( !$this->parsedUrl || !Http::isValidURI( $this->url ) ) {
00222                         $this->status = Status::newFatal( 'http-invalid-url' );
00223                 } else {
00224                         $this->status = Status::newGood( 100 ); // continue
00225                 }
00226 
00227                 if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
00228                         $this->timeout = $options['timeout'];
00229                 } else {
00230                         $this->timeout = $wgHTTPTimeout;
00231                 }
00232                 if( isset( $options['userAgent'] ) ) {
00233                         $this->setUserAgent( $options['userAgent'] );
00234                 }
00235 
00236                 $members = array( "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
00237                                   "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" );
00238 
00239                 foreach ( $members as $o ) {
00240                         if ( isset( $options[$o] ) ) {
00241                                 // ensure that MWHttpRequest::method is always
00242                                 // uppercased. Bug 36137
00243                                 if ( $o == 'method' ) {
00244                                         $options[$o] = strtoupper( $options[$o] );
00245                                 }
00246                                 $this->$o = $options[$o];
00247                         }
00248                 }
00249 
00250                 if ( $this->noProxy ) {
00251                         $this->proxy = ''; // noProxy takes precedence
00252                 }
00253         }
00254 
00260         public static function canMakeRequests() {
00261                 return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' );
00262         }
00263 
00272         public static function factory( $url, $options = null ) {
00273                 if ( !Http::$httpEngine ) {
00274                         Http::$httpEngine = function_exists( 'curl_init' ) ? 'curl' : 'php';
00275                 } elseif ( Http::$httpEngine == 'curl' && !function_exists( 'curl_init' ) ) {
00276                         throw new MWException( __METHOD__ . ': curl (http://php.net/curl) is not installed, but' .
00277                                                                    ' Http::$httpEngine is set to "curl"' );
00278                 }
00279 
00280                 switch( Http::$httpEngine ) {
00281                         case 'curl':
00282                                 return new CurlHttpRequest( $url, $options );
00283                         case 'php':
00284                                 if ( !wfIniGetBool( 'allow_url_fopen' ) ) {
00285                                         throw new MWException( __METHOD__ . ': allow_url_fopen needs to be enabled for pure PHP' .
00286                                                 ' http requests to work. If possible, curl should be used instead. See http://php.net/curl.' );
00287                                 }
00288                                 return new PhpHttpRequest( $url, $options );
00289                         default:
00290                                 throw new MWException( __METHOD__ . ': The setting of Http::$httpEngine is not valid.' );
00291                 }
00292         }
00293 
00299         public function getContent() {
00300                 return $this->content;
00301         }
00302 
00309         public function setData( $args ) {
00310                 $this->postData = $args;
00311         }
00312 
00318         public function proxySetup() {
00319                 global $wgHTTPProxy;
00320 
00321                 if ( $this->proxy || !$this->noProxy ) {
00322                         return;
00323                 }
00324 
00325                 if ( Http::isLocalURL( $this->url ) || $this->noProxy ) {
00326                         $this->proxy = '';
00327                 } elseif ( $wgHTTPProxy ) {
00328                         $this->proxy = $wgHTTPProxy ;
00329                 } elseif ( getenv( "http_proxy" ) ) {
00330                         $this->proxy = getenv( "http_proxy" );
00331                 }
00332         }
00333 
00337         public function setReferer( $url ) {
00338                 $this->setHeader( 'Referer', $url );
00339         }
00340 
00345         public function setUserAgent( $UA ) {
00346                 $this->setHeader( 'User-Agent', $UA );
00347         }
00348 
00354         public function setHeader( $name, $value ) {
00355                 // I feel like I should normalize the case here...
00356                 $this->reqHeaders[$name] = $value;
00357         }
00358 
00363         public function getHeaderList() {
00364                 $list = array();
00365 
00366                 if ( $this->cookieJar ) {
00367                         $this->reqHeaders['Cookie'] =
00368                                 $this->cookieJar->serializeToHttpRequest(
00369                                         $this->parsedUrl['path'],
00370                                         $this->parsedUrl['host']
00371                                 );
00372                 }
00373 
00374                 foreach ( $this->reqHeaders as $name => $value ) {
00375                         $list[] = "$name: $value";
00376                 }
00377 
00378                 return $list;
00379         }
00380 
00399         public function setCallback( $callback ) {
00400                 if ( !is_callable( $callback ) ) {
00401                         throw new MWException( 'Invalid MwHttpRequest callback' );
00402                 }
00403                 $this->callback = $callback;
00404         }
00405 
00414         public function read( $fh, $content ) {
00415                 $this->content .= $content;
00416                 return strlen( $content );
00417         }
00418 
00424         public function execute() {
00425                 global $wgTitle;
00426 
00427                 $this->content = "";
00428 
00429                 if ( strtoupper( $this->method ) == "HEAD" ) {
00430                         $this->headersOnly = true;
00431                 }
00432 
00433                 if ( is_object( $wgTitle ) && !isset( $this->reqHeaders['Referer'] ) ) {
00434                         $this->setReferer( wfExpandUrl( $wgTitle->getFullURL(), PROTO_CURRENT ) );
00435                 }
00436 
00437                 $this->proxySetup(); // set up any proxy as needed
00438 
00439                 if ( !$this->callback ) {
00440                         $this->setCallback( array( $this, 'read' ) );
00441                 }
00442 
00443                 if ( !isset( $this->reqHeaders['User-Agent'] ) ) {
00444                         $this->setUserAgent( Http::userAgent() );
00445                 }
00446         }
00447 
00453         protected function parseHeader() {
00454                 $lastname = "";
00455 
00456                 foreach ( $this->headerList as $header ) {
00457                         if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
00458                                 $this->respVersion = $match[1];
00459                                 $this->respStatus = $match[2];
00460                         } elseif ( preg_match( "#^[ \t]#", $header ) ) {
00461                                 $last = count( $this->respHeaders[$lastname] ) - 1;
00462                                 $this->respHeaders[$lastname][$last] .= "\r\n$header";
00463                         } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
00464                                 $this->respHeaders[strtolower( $match[1] )][] = $match[2];
00465                                 $lastname = strtolower( $match[1] );
00466                         }
00467                 }
00468 
00469                 $this->parseCookies();
00470         }
00471 
00480         protected function setStatus() {
00481                 if ( !$this->respHeaders ) {
00482                         $this->parseHeader();
00483                 }
00484 
00485                 if ( (int)$this->respStatus > 399 ) {
00486                         list( $code, $message ) = explode( " ", $this->respStatus, 2 );
00487                         $this->status->fatal( "http-bad-status", $code, $message );
00488                 }
00489         }
00490 
00498         public function getStatus() {
00499                 if ( !$this->respHeaders ) {
00500                         $this->parseHeader();
00501                 }
00502 
00503                 return (int)$this->respStatus;
00504         }
00505 
00506 
00512         public function isRedirect() {
00513                 if ( !$this->respHeaders ) {
00514                         $this->parseHeader();
00515                 }
00516 
00517                 $status = (int)$this->respStatus;
00518 
00519                 if ( $status >= 300 && $status <= 303 ) {
00520                         return true;
00521                 }
00522 
00523                 return false;
00524         }
00525 
00534         public function getResponseHeaders() {
00535                 if ( !$this->respHeaders ) {
00536                         $this->parseHeader();
00537                 }
00538 
00539                 return $this->respHeaders;
00540         }
00541 
00548         public function getResponseHeader( $header ) {
00549                 if ( !$this->respHeaders ) {
00550                         $this->parseHeader();
00551                 }
00552 
00553                 if ( isset( $this->respHeaders[strtolower ( $header ) ] ) ) {
00554                         $v = $this->respHeaders[strtolower ( $header ) ];
00555                         return $v[count( $v ) - 1];
00556                 }
00557 
00558                 return null;
00559         }
00560 
00566         public function setCookieJar( $jar ) {
00567                 $this->cookieJar = $jar;
00568         }
00569 
00575         public function getCookieJar() {
00576                 if ( !$this->respHeaders ) {
00577                         $this->parseHeader();
00578                 }
00579 
00580                 return $this->cookieJar;
00581         }
00582 
00592         public function setCookie( $name, $value = null, $attr = null ) {
00593                 if ( !$this->cookieJar ) {
00594                         $this->cookieJar = new CookieJar;
00595                 }
00596 
00597                 $this->cookieJar->setCookie( $name, $value, $attr );
00598         }
00599 
00603         protected function parseCookies() {
00604                 if ( !$this->cookieJar ) {
00605                         $this->cookieJar = new CookieJar;
00606                 }
00607 
00608                 if ( isset( $this->respHeaders['set-cookie'] ) ) {
00609                         $url = parse_url( $this->getFinalUrl() );
00610                         foreach ( $this->respHeaders['set-cookie'] as $cookie ) {
00611                                 $this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] );
00612                         }
00613                 }
00614         }
00615 
00628         public function getFinalUrl() {
00629                 $headers = $this->getResponseHeaders();
00630 
00631                 //return full url (fix for incorrect but handled relative location)
00632                 if ( isset( $headers[ 'location' ] ) ) {
00633                         $locations = $headers[ 'location' ];
00634                         $domain = '';
00635                         $foundRelativeURI = false;
00636                         $countLocations = count($locations);
00637 
00638                         for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
00639                                 $url = parse_url( $locations[ $i ] );
00640 
00641                                 if ( isset($url[ 'host' ]) ) {
00642                                         $domain = $url[ 'scheme' ] . '://' . $url[ 'host' ];
00643                                         break;  //found correct URI (with host)
00644                                 } else {
00645                                         $foundRelativeURI = true;
00646                                 }
00647                         }
00648 
00649                         if ( $foundRelativeURI ) {
00650                                 if ( $domain ) {
00651                                         return $domain . $locations[ $countLocations - 1 ];
00652                                 } else {
00653                                         $url = parse_url( $this->url );
00654                                         if ( isset($url[ 'host' ]) ) {
00655                                                 return $url[ 'scheme' ] . '://' . $url[ 'host' ] . $locations[ $countLocations - 1 ];
00656                                         }
00657                                 }
00658                         } else {
00659                                 return $locations[ $countLocations - 1 ];
00660                         }
00661                 }
00662 
00663                 return $this->url;
00664         }
00665 
00671         public function canFollowRedirects() {
00672                 return true;
00673         }
00674 }
00675 
00679 class CurlHttpRequest extends MWHttpRequest {
00680         const SUPPORTS_FILE_POSTS = true;
00681 
00682         static $curlMessageMap = array(
00683                 6 => 'http-host-unreachable',
00684                 28 => 'http-timed-out'
00685         );
00686 
00687         protected $curlOptions = array();
00688         protected $headerText = "";
00689 
00695         protected function readHeader( $fh, $content ) {
00696                 $this->headerText .= $content;
00697                 return strlen( $content );
00698         }
00699 
00700         public function execute() {
00701                 parent::execute();
00702 
00703                 if ( !$this->status->isOK() ) {
00704                         return $this->status;
00705                 }
00706 
00707                 $this->curlOptions[CURLOPT_PROXY] = $this->proxy;
00708                 $this->curlOptions[CURLOPT_TIMEOUT] = $this->timeout;
00709                 $this->curlOptions[CURLOPT_HTTP_VERSION] = CURL_HTTP_VERSION_1_0;
00710                 $this->curlOptions[CURLOPT_WRITEFUNCTION] = $this->callback;
00711                 $this->curlOptions[CURLOPT_HEADERFUNCTION] = array( $this, "readHeader" );
00712                 $this->curlOptions[CURLOPT_MAXREDIRS] = $this->maxRedirects;
00713                 $this->curlOptions[CURLOPT_ENCODING] = ""; # Enable compression
00714 
00715                 /* not sure these two are actually necessary */
00716                 if ( isset( $this->reqHeaders['Referer'] ) ) {
00717                         $this->curlOptions[CURLOPT_REFERER] = $this->reqHeaders['Referer'];
00718                 }
00719                 $this->curlOptions[CURLOPT_USERAGENT] = $this->reqHeaders['User-Agent'];
00720 
00721                 if ( isset( $this->sslVerifyHost ) ) {
00722                         $this->curlOptions[CURLOPT_SSL_VERIFYHOST] = $this->sslVerifyHost;
00723                 }
00724 
00725                 if ( isset( $this->sslVerifyCert ) ) {
00726                         $this->curlOptions[CURLOPT_SSL_VERIFYPEER] = $this->sslVerifyCert;
00727                 }
00728 
00729                 if ( $this->caInfo ) {
00730                         $this->curlOptions[CURLOPT_CAINFO] = $this->caInfo;
00731                 }
00732 
00733                 if ( $this->headersOnly ) {
00734                         $this->curlOptions[CURLOPT_NOBODY] = true;
00735                         $this->curlOptions[CURLOPT_HEADER] = true;
00736                 } elseif ( $this->method == 'POST' ) {
00737                         $this->curlOptions[CURLOPT_POST] = true;
00738                         $this->curlOptions[CURLOPT_POSTFIELDS] = $this->postData;
00739                         // Suppress 'Expect: 100-continue' header, as some servers
00740                         // will reject it with a 417 and Curl won't auto retry
00741                         // with HTTP 1.0 fallback
00742                         $this->reqHeaders['Expect'] = '';
00743                 } else {
00744                         $this->curlOptions[CURLOPT_CUSTOMREQUEST] = $this->method;
00745                 }
00746 
00747                 $this->curlOptions[CURLOPT_HTTPHEADER] = $this->getHeaderList();
00748 
00749                 $curlHandle = curl_init( $this->url );
00750 
00751                 if ( !curl_setopt_array( $curlHandle, $this->curlOptions ) ) {
00752                         throw new MWException( "Error setting curl options." );
00753                 }
00754 
00755                 if ( $this->followRedirects && $this->canFollowRedirects() ) {
00756                         wfSuppressWarnings();
00757                         if ( ! curl_setopt( $curlHandle, CURLOPT_FOLLOWLOCATION, true ) ) {
00758                                 wfDebug( __METHOD__ . ": Couldn't set CURLOPT_FOLLOWLOCATION. " .
00759                                         "Probably safe_mode or open_basedir is set.\n" );
00760                                 // Continue the processing. If it were in curl_setopt_array,
00761                                 // processing would have halted on its entry
00762                         }
00763                         wfRestoreWarnings();
00764                 }
00765 
00766                 if ( false === curl_exec( $curlHandle ) ) {
00767                         $code = curl_error( $curlHandle );
00768 
00769                         if ( isset( self::$curlMessageMap[$code] ) ) {
00770                                 $this->status->fatal( self::$curlMessageMap[$code] );
00771                         } else {
00772                                 $this->status->fatal( 'http-curl-error', curl_error( $curlHandle ) );
00773                         }
00774                 } else {
00775                         $this->headerList = explode( "\r\n", $this->headerText );
00776                 }
00777 
00778                 curl_close( $curlHandle );
00779 
00780                 $this->parseHeader();
00781                 $this->setStatus();
00782 
00783                 return $this->status;
00784         }
00785 
00789         public function canFollowRedirects() {
00790                 if ( strval( ini_get( 'open_basedir' ) ) !== '' || wfIniGetBool( 'safe_mode' ) ) {
00791                         wfDebug( "Cannot follow redirects in safe mode\n" );
00792                         return false;
00793                 }
00794 
00795                 if ( !defined( 'CURLOPT_REDIR_PROTOCOLS' ) ) {
00796                         wfDebug( "Cannot follow redirects with libcurl < 7.19.4 due to CVE-2009-0037\n" );
00797                         return false;
00798                 }
00799 
00800                 return true;
00801         }
00802 }
00803 
00804 class PhpHttpRequest extends MWHttpRequest {
00805 
00810         protected function urlToTcp( $url ) {
00811                 $parsedUrl = parse_url( $url );
00812 
00813                 return 'tcp://' . $parsedUrl['host'] . ':' . $parsedUrl['port'];
00814         }
00815 
00816         public function execute() {
00817                 parent::execute();
00818 
00819                 if ( is_array( $this->postData ) ) {
00820                         $this->postData = wfArrayToCGI( $this->postData );
00821                 }
00822 
00823                 if ( $this->parsedUrl['scheme'] != 'http' &&
00824                          $this->parsedUrl['scheme'] != 'https' ) {
00825                         $this->status->fatal( 'http-invalid-scheme', $this->parsedUrl['scheme'] );
00826                 }
00827 
00828                 $this->reqHeaders['Accept'] = "*/*";
00829                 if ( $this->method == 'POST' ) {
00830                         // Required for HTTP 1.0 POSTs
00831                         $this->reqHeaders['Content-Length'] = strlen( $this->postData );
00832                         if( !isset( $this->reqHeaders['Content-Type'] ) ) {
00833                                 $this->reqHeaders['Content-Type'] = "application/x-www-form-urlencoded";
00834                         }
00835                 }
00836 
00837                 $options = array();
00838                 if ( $this->proxy ) {
00839                         $options['proxy'] = $this->urlToTCP( $this->proxy );
00840                         $options['request_fulluri'] = true;
00841                 }
00842 
00843                 if ( !$this->followRedirects ) {
00844                         $options['max_redirects'] = 0;
00845                 } else {
00846                         $options['max_redirects'] = $this->maxRedirects;
00847                 }
00848 
00849                 $options['method'] = $this->method;
00850                 $options['header'] = implode( "\r\n", $this->getHeaderList() );
00851                 // Note that at some future point we may want to support
00852                 // HTTP/1.1, but we'd have to write support for chunking
00853                 // in version of PHP < 5.3.1
00854                 $options['protocol_version'] = "1.0";
00855 
00856                 // This is how we tell PHP we want to deal with 404s (for example) ourselves.
00857                 // Only works on 5.2.10+
00858                 $options['ignore_errors'] = true;
00859 
00860                 if ( $this->postData ) {
00861                         $options['content'] = $this->postData;
00862                 }
00863 
00864                 $options['timeout'] = $this->timeout;
00865 
00866                 $context = stream_context_create( array( 'http' => $options ) );
00867 
00868                 $this->headerList = array();
00869                 $reqCount = 0;
00870                 $url = $this->url;
00871 
00872                 $result = array();
00873 
00874                 do {
00875                         $reqCount++;
00876                         wfSuppressWarnings();
00877                         $fh = fopen( $url, "r", false, $context );
00878                         wfRestoreWarnings();
00879 
00880                         if ( !$fh ) {
00881                                 break;
00882                         }
00883 
00884                         $result = stream_get_meta_data( $fh );
00885                         $this->headerList = $result['wrapper_data'];
00886                         $this->parseHeader();
00887 
00888                         if ( !$this->followRedirects ) {
00889                                 break;
00890                         }
00891 
00892                         # Handle manual redirection
00893                         if ( !$this->isRedirect() || $reqCount > $this->maxRedirects ) {
00894                                 break;
00895                         }
00896                         # Check security of URL
00897                         $url = $this->getResponseHeader( "Location" );
00898 
00899                         if ( !Http::isValidURI( $url ) ) {
00900                                 wfDebug( __METHOD__ . ": insecure redirection\n" );
00901                                 break;
00902                         }
00903                 } while ( true );
00904 
00905                 $this->setStatus();
00906 
00907                 if ( $fh === false ) {
00908                         $this->status->fatal( 'http-request-error' );
00909                         return $this->status;
00910                 }
00911 
00912                 if ( $result['timed_out'] ) {
00913                         $this->status->fatal( 'http-timed-out', $this->url );
00914                         return $this->status;
00915                 }
00916 
00917                 // If everything went OK, or we received some error code
00918                 // get the response body content.
00919                 if ( $this->status->isOK()
00920                                 || (int)$this->respStatus >= 300) {
00921                         while ( !feof( $fh ) ) {
00922                                 $buf = fread( $fh, 8192 );
00923 
00924                                 if ( $buf === false ) {
00925                                         $this->status->fatal( 'http-read-error' );
00926                                         break;
00927                                 }
00928 
00929                                 if ( strlen( $buf ) ) {
00930                                         call_user_func( $this->callback, $fh, $buf );
00931                                 }
00932                         }
00933                 }
00934                 fclose( $fh );
00935 
00936                 return $this->status;
00937         }
00938 }