diff options
Diffstat (limited to 'test_tools/simpletest/url.php')
-rw-r--r-- | test_tools/simpletest/url.php | 508 |
1 files changed, 508 insertions, 0 deletions
diff --git a/test_tools/simpletest/url.php b/test_tools/simpletest/url.php new file mode 100644 index 00000000..03902ed0 --- /dev/null +++ b/test_tools/simpletest/url.php @@ -0,0 +1,508 @@ +<?php + /** + * base include file for SimpleTest + * @package SimpleTest + * @subpackage WebTester + * @version $Id: url.php,v 1.22 2005/02/02 23:25:23 lastcraft Exp $ + */ + + /**#@+ + * include other SimpleTest class files + */ + require_once(dirname(__FILE__) . '/encoding.php'); + /**#@-*/ + + /** + * URL parser to replace parse_url() PHP function which + * got broken in PHP 4.3.0. Adds some browser specific + * functionality such as expandomatics. + * Guesses a bit trying to separate the host from + * the path. + * @package SimpleTest + * @subpackage WebTester + */ + class SimpleUrl { + protected $_scheme; + protected $_username; + protected $_password; + protected $_host; + protected $_port; + protected $_path; + protected $_request; + protected $_fragment; + protected $_target; + + /** + * Constructor. Parses URL into sections. + * @param string $url Incoming URL. + * @access public + */ + function SimpleUrl($url) { + list($x, $y) = $this->_chompCoordinates($url); + $this->_scheme = $this->_chompScheme($url); + list($this->_username, $this->_password) = $this->_chompLogin($url); + $this->_host = $this->_chompHost($url); + $this->_port = false; + if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) { + $this->_host = $host_parts[1]; + $this->_port = (integer)$host_parts[2]; + } + $this->_path = $this->_chompPath($url); + $this->_request = $this->_parseRequest($this->_chompRequest($url)); + $this->_request->setCoordinates($x, $y); + $this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false); + $this->_target = false; + } + + /** + * Extracts the X, Y coordinate pair from an image map. + * @param string $url URL so far. The coordinates will be + * removed. + * @return array X, Y as a pair of integers. + * @access private + */ + function _chompCoordinates($url) { + if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) { + $url = $matches[1]; + return array((integer)$matches[2], (integer)$matches[3]); + } + return array(false, false); + } + + /** + * Extracts the scheme part of an incoming URL. + * @param string $url URL so far. The scheme will be + * removed. + * @return string Scheme part or false. + * @access private + */ + function _chompScheme($url) { + if (preg_match('/(.*?):(\/\/)(.*)/', $url, $matches)) { + $url = $matches[2] . $matches[3]; + return $matches[1]; + } + return false; + } + + /** + * Extracts the username and password from the + * incoming URL. The // prefix will be reattached + * to the URL after the doublet is extracted. + * @param string $url URL so far. The username and + * password are removed. + * @return array Two item list of username and + * password. Will urldecode() them. + * @access private + */ + function _chompLogin($url) { + $prefix = ''; + if (preg_match('/(\/\/)(.*)/', $url, $matches)) { + $prefix = $matches[1]; + $url = $matches[2]; + } + if (preg_match('/(.*?)@(.*)/', $url, $matches)) { + $url = $prefix . $matches[2]; + $parts = split(":", $matches[1]); + return array( + urldecode($parts[0]), + isset($parts[1]) ? urldecode($parts[1]) : false); + } + $url = $prefix . $url; + return array(false, false); + } + + /** + * Extracts the host part of an incoming URL. + * Includes the port number part. Will extract + * the host if it starts with // or it has + * a top level domain or it has at least two + * dots. + * @param string $url URL so far. The host will be + * removed. + * @return string Host part guess or false. + * @access private + */ + function _chompHost($url) { + if (preg_match('/(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) { + $url = $matches[3]; + return $matches[2]; + } + if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) { + $tlds = SimpleUrl::getAllTopLevelDomains(); + if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) { + $url = $matches[2] . $matches[3]; + return $matches[1]; + } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) { + $url = $matches[2] . $matches[3]; + return $matches[1]; + } + } + return false; + } + + /** + * Extracts the path information from the incoming + * URL. Strips this path from the URL. + * @param string $url URL so far. The host will be + * removed. + * @return string Path part or '/'. + * @access private + */ + function _chompPath($url) { + if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) { + $url = $matches[2] . $matches[3]; + return ($matches[1] ? $matches[1] : ''); + } + return ''; + } + + /** + * Strips off the request data. + * @param string $url URL so far. The request will be + * removed. + * @return string Raw request part. + * @access private + */ + function _chompRequest($url) { + if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) { + $url = $matches[2] . $matches[3]; + return $matches[1]; + } + return ''; + } + + /** + * Breaks the request down into an object. + * @param string $raw Raw request. + * @return SimpleFormEncoding Parsed data. + * @access private + */ + function _parseRequest($raw) { + $request = new SimpleFormEncoding(); + foreach (split("&", $raw) as $pair) { + if (preg_match('/(.*?)=(.*)/', $pair, $matches)) { + $request->add($matches[1], urldecode($matches[2])); + } elseif ($pair) { + $request->add($pair, ''); + } + } + return $request; + } + + /** + * Accessor for protocol part. + * @param string $default Value to use if not present. + * @return string Scheme name, e.g "http". + * @access public + */ + function getScheme($default = false) { + return $this->_scheme ? $this->_scheme : $default; + } + + /** + * Accessor for user name. + * @return string Username preceding host. + * @access public + */ + function getUsername() { + return $this->_username; + } + + /** + * Accessor for password. + * @return string Password preceding host. + * @access public + */ + function getPassword() { + return $this->_password; + } + + /** + * Accessor for hostname and port. + * @param string $default Value to use if not present. + * @return string Hostname only. + * @access public + */ + function getHost($default = false) { + return $this->_host ? $this->_host : $default; + } + + /** + * Accessor for top level domain. + * @return string Last part of host. + * @access public + */ + function getTld() { + $path_parts = pathinfo($this->getHost()); + return (isset($path_parts['extension']) ? $path_parts['extension'] : false); + } + + /** + * Accessor for port number. + * @return integer TCP/IP port number. + * @access public + */ + function getPort() { + return $this->_port; + } + + /** + * Accessor for path. + * @return string Full path including leading slash if implied. + * @access public + */ + function getPath() { + if (! $this->_path && $this->_host) { + return '/'; + } + return $this->_path; + } + + /** + * Accessor for page if any. This may be a + * directory name if ambiguious. + * @return Page name. + * @access public + */ + function getPage() { + if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) { + return false; + } + return $matches[1]; + } + + /** + * Gets the path to the page. + * @return string Path less the page. + * @access public + */ + function getBasePath() { + if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) { + return false; + } + return $matches[1]; + } + + /** + * Accessor for fragment at end of URL after the "#". + * @return string Part after "#". + * @access public + */ + function getFragment() { + return $this->_fragment; + } + + /** + * Accessor for horizontal image coordinate. + * @return integer X value. + * @access public + */ + function getX() { + return $this->_request->getX(); + } + + /** + * Accessor for vertical image coordinate. + * @return integer Y value. + * @access public + */ + function getY() { + return $this->_request->getY(); + } + + /** + * Accessor for current request parameters + * in URL string form + * @return string Form is string "?a=1&b=2", etc. + * @access public + */ + function getEncodedRequest() { + $encoded = $this->_request->asString(); + if ($encoded) { + return '?' . preg_replace('/^\?/', '', $encoded); + } + return ''; + } + + /** + * Adds an additional parameter to the request. + * @param string $key Name of parameter. + * @param string $value Value as string. + * @access public + */ + function addRequestParameter($key, $value) { + $this->_request->add($key, $value); + } + + /** + * Adds additional parameters to the request. + * @param hash/SimpleFormEncoding $parameters Additional + * parameters. + * @access public + */ + function addRequestParameters($parameters) { + $this->_request->merge($parameters); + } + + /** + * Clears down all parameters. + * @access public + */ + function clearRequest() { + $this->_request = new SimpleFormEncoding(); + } + + /** + * Sets image coordinates. Set to flase to clear + * them. + * @param integer $x Horizontal position. + * @param integer $y Vertical position. + * @access public + */ + function setCoordinates($x = false, $y = false) { + $this->_request->setCoordinates($x, $y); + } + + /** + * Gets the frame target if present. Although + * not strictly part of the URL specification it + * acts as similarily to the browser. + * @return boolean/string Frame name or false if none. + * @access public + */ + function getTarget() { + return $this->_target; + } + + /** + * Attaches a frame target. + * @param string $frame Name of frame. + * @access public + */ + function setTarget($frame) { + $this->_target = $frame; + } + + /** + * Renders the URL back into a string. + * @return string URL in canonical form. + * @access public + */ + function asString() { + $scheme = $identity = $host = $path = $encoded = $fragment = ''; + if ($this->_username && $this->_password) { + $identity = $this->_username . ':' . $this->_password . '@'; + } + if ($this->getHost()) { + $scheme = $this->getScheme() ? $this->getScheme() : 'http'; + $host = $this->getHost(); + } + if (substr($this->_path, 0, 1) == '/') { + $path = $this->normalisePath($this->_path); + } + $encoded = $this->getEncodedRequest(); + $fragment = $this->getFragment() ? '#'. $this->getFragment() : ''; + return "$scheme://$identity$host$path$encoded$fragment"; + } + + /** + * Replaces unknown sections to turn a relative + * URL into an absolute one. The base URL can + * be either a string or a SimpleUrl object. + * @param string/SimpleUrl $base Base URL. + * @access public + */ + function makeAbsolute($base) { + if (! is_object($base)) { + $base = new SimpleUrl($base); + } + $scheme = $this->getScheme() ? $this->getScheme() : $base->getScheme(); + $host = $this->getHost() ? $this->getHost() : $base->getHost(); + $port = $this->_extractAbsolutePort($base); + $path = $this->normalisePath($this->_extractAbsolutePath($base)); + $identity = $this->_getIdentity() ? $this->_getIdentity() . '@' : ''; + $encoded = $this->getEncodedRequest(); + $fragment = $this->getFragment() ? '#'. $this->getFragment() : ''; + return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment"); + } + + /** + * Extracts the port from the base URL if it's needed, but + * not present, in the current URL. + * @param string/SimpleUrl $base Base URL. + * @param string Absolute port number. + * @access private + */ + function _extractAbsolutePort($base) { + if ($this->getHost()) { + return ($this->getPort() ? ':' . $this->getPort() : ''); + } + return ($base->getPort() ? ':' . $base->getPort() : ''); + } + + /** + * Replaces unknown sections of the path with base parts + * to return a complete absolute one. + * @param string/SimpleUrl $base Base URL. + * @param string Absolute path. + * @access private + */ + function _extractAbsolutePath($base) { + if ($this->getHost()) { + return $this->_path; + } + if (! $this->_isRelativePath($this->_path)) { + return $this->_path; + } + if ($this->_path) { + return $base->getBasePath() . $this->_path; + } + return $base->getPath(); + } + + /** + * Simple test to see if a path part is relative. + * @param string $path Path to test. + * @return boolean True if starts with a "/". + * @access private + */ + function _isRelativePath($path) { + return (substr($path, 0, 1) != '/'); + } + + /** + * Extracts the username and password for use in rendering + * a URL. + * @return string/boolean Form of username:password@ or false. + * @access private + */ + function _getIdentity() { + if ($this->_username && $this->_password) { + return $this->_username . ':' . $this->_password; + } + return false; + } + + /** + * Replaces . and .. sections of the path. + * @param string $path Unoptimised path. + * @return string Path with dots removed if possible. + * @access public + */ + function normalisePath($path) { + $path = preg_replace('|/[^/]+/\.\./|', '/', $path); + return preg_replace('|/\./|', '/', $path); + } + + /** + * A pipe seperated list of all TLDs that result in two part + * domain names. + * @return string Pipe separated list. + * @access public + * @static + */ + function getAllTopLevelDomains() { + return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum'; + } + } +?>
\ No newline at end of file |