<?php /** * base include file for SimpleTest * @package SimpleTest * @subpackage WebTester * @version $Id: url.php,v 1.22 2005/02/02 23:25:23 lastcraft Exp $ */ /**#@+ * include other SimpleTest class files */ require_once(dirname(__FILE__) . '/encoding.php'); /**#@-*/ /** * URL parser to replace parse_url() PHP function which * got broken in PHP 4.3.0. Adds some browser specific * functionality such as expandomatics. * Guesses a bit trying to separate the host from * the path. * @package SimpleTest * @subpackage WebTester */ class SimpleUrl { protected $_scheme; protected $_username; protected $_password; protected $_host; protected $_port; protected $_path; protected $_request; protected $_fragment; protected $_target; /** * Constructor. Parses URL into sections. * @param string $url Incoming URL. * @access public */ function SimpleUrl($url) { list($x, $y) = $this->_chompCoordinates($url); $this->_scheme = $this->_chompScheme($url); list($this->_username, $this->_password) = $this->_chompLogin($url); $this->_host = $this->_chompHost($url); $this->_port = false; if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) { $this->_host = $host_parts[1]; $this->_port = (integer)$host_parts[2]; } $this->_path = $this->_chompPath($url); $this->_request = $this->_parseRequest($this->_chompRequest($url)); $this->_request->setCoordinates($x, $y); $this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false); $this->_target = false; } /** * Extracts the X, Y coordinate pair from an image map. * @param string $url URL so far. The coordinates will be * removed. * @return array X, Y as a pair of integers. * @access private */ function _chompCoordinates($url) { if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) { $url = $matches[1]; return array((integer)$matches[2], (integer)$matches[3]); } return array(false, false); } /** * Extracts the scheme part of an incoming URL. * @param string $url URL so far. The scheme will be * removed. * @return string Scheme part or false. * @access private */ function _chompScheme($url) { if (preg_match('/(.*?):(\/\/)(.*)/', $url, $matches)) { $url = $matches[2] . $matches[3]; return $matches[1]; } return false; } /** * Extracts the username and password from the * incoming URL. The // prefix will be reattached * to the URL after the doublet is extracted. * @param string $url URL so far. The username and * password are removed. * @return array Two item list of username and * password. Will urldecode() them. * @access private */ function _chompLogin($url) { $prefix = ''; if (preg_match('/(\/\/)(.*)/', $url, $matches)) { $prefix = $matches[1]; $url = $matches[2]; } if (preg_match('/(.*?)@(.*)/', $url, $matches)) { $url = $prefix . $matches[2]; $parts = split(":", $matches[1]); return array( urldecode($parts[0]), isset($parts[1]) ? urldecode($parts[1]) : false); } $url = $prefix . $url; return array(false, false); } /** * Extracts the host part of an incoming URL. * Includes the port number part. Will extract * the host if it starts with // or it has * a top level domain or it has at least two * dots. * @param string $url URL so far. The host will be * removed. * @return string Host part guess or false. * @access private */ function _chompHost($url) { if (preg_match('/(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) { $url = $matches[3]; return $matches[2]; } if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) { $tlds = SimpleUrl::getAllTopLevelDomains(); if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) { $url = $matches[2] . $matches[3]; return $matches[1]; } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) { $url = $matches[2] . $matches[3]; return $matches[1]; } } return false; } /** * Extracts the path information from the incoming * URL. Strips this path from the URL. * @param string $url URL so far. The host will be * removed. * @return string Path part or '/'. * @access private */ function _chompPath($url) { if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) { $url = $matches[2] . $matches[3]; return ($matches[1] ? $matches[1] : ''); } return ''; } /** * Strips off the request data. * @param string $url URL so far. The request will be * removed. * @return string Raw request part. * @access private */ function _chompRequest($url) { if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) { $url = $matches[2] . $matches[3]; return $matches[1]; } return ''; } /** * Breaks the request down into an object. * @param string $raw Raw request. * @return SimpleFormEncoding Parsed data. * @access private */ function _parseRequest($raw) { $request = new SimpleFormEncoding(); foreach (split("&", $raw) as $pair) { if (preg_match('/(.*?)=(.*)/', $pair, $matches)) { $request->add($matches[1], urldecode($matches[2])); } elseif ($pair) { $request->add($pair, ''); } } return $request; } /** * Accessor for protocol part. * @param string $default Value to use if not present. * @return string Scheme name, e.g "http". * @access public */ function getScheme($default = false) { return $this->_scheme ? $this->_scheme : $default; } /** * Accessor for user name. * @return string Username preceding host. * @access public */ function getUsername() { return $this->_username; } /** * Accessor for password. * @return string Password preceding host. * @access public */ function getPassword() { return $this->_password; } /** * Accessor for hostname and port. * @param string $default Value to use if not present. * @return string Hostname only. * @access public */ function getHost($default = false) { return $this->_host ? $this->_host : $default; } /** * Accessor for top level domain. * @return string Last part of host. * @access public */ function getTld() { $path_parts = pathinfo($this->getHost()); return (isset($path_parts['extension']) ? $path_parts['extension'] : false); } /** * Accessor for port number. * @return integer TCP/IP port number. * @access public */ function getPort() { return $this->_port; } /** * Accessor for path. * @return string Full path including leading slash if implied. * @access public */ function getPath() { if (! $this->_path && $this->_host) { return '/'; } return $this->_path; } /** * Accessor for page if any. This may be a * directory name if ambiguious. * @return Page name. * @access public */ function getPage() { if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) { return false; } return $matches[1]; } /** * Gets the path to the page. * @return string Path less the page. * @access public */ function getBasePath() { if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) { return false; } return $matches[1]; } /** * Accessor for fragment at end of URL after the "#". * @return string Part after "#". * @access public */ function getFragment() { return $this->_fragment; } /** * Accessor for horizontal image coordinate. * @return integer X value. * @access public */ function getX() { return $this->_request->getX(); } /** * Accessor for vertical image coordinate. * @return integer Y value. * @access public */ function getY() { return $this->_request->getY(); } /** * Accessor for current request parameters * in URL string form * @return string Form is string "?a=1&b=2", etc. * @access public */ function getEncodedRequest() { $encoded = $this->_request->asString(); if ($encoded) { return '?' . preg_replace('/^\?/', '', $encoded); } return ''; } /** * Adds an additional parameter to the request. * @param string $key Name of parameter. * @param string $value Value as string. * @access public */ function addRequestParameter($key, $value) { $this->_request->add($key, $value); } /** * Adds additional parameters to the request. * @param hash/SimpleFormEncoding $parameters Additional * parameters. * @access public */ function addRequestParameters($parameters) { $this->_request->merge($parameters); } /** * Clears down all parameters. * @access public */ function clearRequest() { $this->_request = new SimpleFormEncoding(); } /** * Sets image coordinates. Set to flase to clear * them. * @param integer $x Horizontal position. * @param integer $y Vertical position. * @access public */ function setCoordinates($x = false, $y = false) { $this->_request->setCoordinates($x, $y); } /** * Gets the frame target if present. Although * not strictly part of the URL specification it * acts as similarily to the browser. * @return boolean/string Frame name or false if none. * @access public */ function getTarget() { return $this->_target; } /** * Attaches a frame target. * @param string $frame Name of frame. * @access public */ function setTarget($frame) { $this->_target = $frame; } /** * Renders the URL back into a string. * @return string URL in canonical form. * @access public */ function asString() { $scheme = $identity = $host = $path = $encoded = $fragment = ''; if ($this->_username && $this->_password) { $identity = $this->_username . ':' . $this->_password . '@'; } if ($this->getHost()) { $scheme = $this->getScheme() ? $this->getScheme() : 'http'; $host = $this->getHost(); } if (substr($this->_path, 0, 1) == '/') { $path = $this->normalisePath($this->_path); } $encoded = $this->getEncodedRequest(); $fragment = $this->getFragment() ? '#'. $this->getFragment() : ''; return "$scheme://$identity$host$path$encoded$fragment"; } /** * Replaces unknown sections to turn a relative * URL into an absolute one. The base URL can * be either a string or a SimpleUrl object. * @param string/SimpleUrl $base Base URL. * @access public */ function makeAbsolute($base) { if (! is_object($base)) { $base = new SimpleUrl($base); } $scheme = $this->getScheme() ? $this->getScheme() : $base->getScheme(); $host = $this->getHost() ? $this->getHost() : $base->getHost(); $port = $this->_extractAbsolutePort($base); $path = $this->normalisePath($this->_extractAbsolutePath($base)); $identity = $this->_getIdentity() ? $this->_getIdentity() . '@' : ''; $encoded = $this->getEncodedRequest(); $fragment = $this->getFragment() ? '#'. $this->getFragment() : ''; return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment"); } /** * Extracts the port from the base URL if it's needed, but * not present, in the current URL. * @param string/SimpleUrl $base Base URL. * @param string Absolute port number. * @access private */ function _extractAbsolutePort($base) { if ($this->getHost()) { return ($this->getPort() ? ':' . $this->getPort() : ''); } return ($base->getPort() ? ':' . $base->getPort() : ''); } /** * Replaces unknown sections of the path with base parts * to return a complete absolute one. * @param string/SimpleUrl $base Base URL. * @param string Absolute path. * @access private */ function _extractAbsolutePath($base) { if ($this->getHost()) { return $this->_path; } if (! $this->_isRelativePath($this->_path)) { return $this->_path; } if ($this->_path) { return $base->getBasePath() . $this->_path; } return $base->getPath(); } /** * Simple test to see if a path part is relative. * @param string $path Path to test. * @return boolean True if starts with a "/". * @access private */ function _isRelativePath($path) { return (substr($path, 0, 1) != '/'); } /** * Extracts the username and password for use in rendering * a URL. * @return string/boolean Form of username:password@ or false. * @access private */ function _getIdentity() { if ($this->_username && $this->_password) { return $this->_username . ':' . $this->_password; } return false; } /** * Replaces . and .. sections of the path. * @param string $path Unoptimised path. * @return string Path with dots removed if possible. * @access public */ function normalisePath($path) { $path = preg_replace('|/[^/]+/\.\./|', '/', $path); return preg_replace('|/\./|', '/', $path); } /** * A pipe seperated list of all TLDs that result in two part * domain names. * @return string Pipe separated list. * @access public * @static */ function getAllTopLevelDomains() { return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum'; } } ?>