281 lines
6.9 KiB
PHP
281 lines
6.9 KiB
PHP
<?php
|
|
|
|
/**
|
|
*
|
|
* Parse for URLS in the source text.
|
|
*
|
|
* @category Text
|
|
*
|
|
* @package Text_Wiki
|
|
*
|
|
* @author Paul M. Jones <pmjones@php.net>
|
|
*
|
|
* @license LGPL
|
|
*
|
|
* @version $Id: Url.php,v 1.3 2005/02/23 17:38:29 pmjones Exp $
|
|
*
|
|
*/
|
|
|
|
/**
|
|
*
|
|
* Parse for URLS in the source text.
|
|
*
|
|
* Various URL markings are supported: inline (the URL by itself),
|
|
* numbered or footnote reference (where the URL is enclosed in square
|
|
* brackets), and named reference (where the URL is enclosed in square
|
|
* brackets and has a name included inside the brackets). E.g.:
|
|
*
|
|
* inline -- http://example.com
|
|
* numbered -- [http://example.com]
|
|
* described -- [http://example.com Example Description]
|
|
*
|
|
* When rendering a URL token, this will convert URLs pointing to a .gif,
|
|
* .jpg, or .png image into an inline <img /> tag (for the 'xhtml'
|
|
* format).
|
|
*
|
|
* Token options are:
|
|
*
|
|
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
|
|
*
|
|
* 'href' => the URL link href portion
|
|
*
|
|
* 'text' => the displayed text of the URL link
|
|
*
|
|
* @category Text
|
|
*
|
|
* @package Text_Wiki
|
|
*
|
|
* @author Paul M. Jones <pmjones@php.net>
|
|
*
|
|
*/
|
|
|
|
class Text_Wiki_Parse_Url extends Text_Wiki_Parse {
|
|
|
|
|
|
/**
|
|
*
|
|
* Keeps a running count of numbered-reference URLs.
|
|
*
|
|
* @access public
|
|
*
|
|
* @var int
|
|
*
|
|
*/
|
|
|
|
var $footnoteCount = 0;
|
|
|
|
|
|
/**
|
|
*
|
|
* URL schemes recognized by this rule.
|
|
*
|
|
* @access public
|
|
*
|
|
* @var array
|
|
*
|
|
*/
|
|
|
|
var $conf = array(
|
|
'schemes' => array(
|
|
'http://',
|
|
'https://',
|
|
'ftp://',
|
|
'gopher://',
|
|
'news://',
|
|
'mailto:'
|
|
)
|
|
);
|
|
|
|
|
|
/**
|
|
*
|
|
* Constructor.
|
|
*
|
|
* We override the constructor so we can comment the regex nicely.
|
|
*
|
|
* @access public
|
|
*
|
|
*/
|
|
|
|
function Text_Wiki_Parse_Url(&$obj)
|
|
{
|
|
parent::Text_Wiki_Parse($obj);
|
|
|
|
// convert the list of recognized schemes to a regex-safe string,
|
|
// where the pattern delim is a slash
|
|
$tmp = array();
|
|
$list = $this->getConf('schemes', array());
|
|
foreach ($list as $val) {
|
|
$tmp[] = preg_quote($val, '/');
|
|
}
|
|
$schemes = implode('|', $tmp);
|
|
|
|
// build the regex
|
|
$this->regex =
|
|
"($schemes)" . // allowed schemes
|
|
"(" . // start pattern
|
|
"[^ \\/\"\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters;
|
|
")*" . // end pattern
|
|
"[^ \\t\\n\\/\"\'{$this->wiki->delim}]*" .
|
|
"[A-Za-z0-9\\/?=&~_]";
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* Find three different kinds of URLs in the source text.
|
|
*
|
|
* @access public
|
|
*
|
|
*/
|
|
|
|
function parse()
|
|
{
|
|
// -------------------------------------------------------------
|
|
//
|
|
// Described-reference (named) URLs.
|
|
//
|
|
|
|
// the regular expression for this kind of URL
|
|
$tmp_regex = '/\[(' . $this->regex . ') ([^\]]+)\]/';
|
|
|
|
// use a custom callback processing method to generate
|
|
// the replacement text for matches.
|
|
$this->wiki->source = preg_replace_callback(
|
|
$tmp_regex,
|
|
array(&$this, 'processDescr'),
|
|
$this->wiki->source
|
|
);
|
|
|
|
|
|
// -------------------------------------------------------------
|
|
//
|
|
// Numbered-reference (footnote-style) URLs.
|
|
//
|
|
|
|
// the regular expression for this kind of URL
|
|
$tmp_regex = '/\[(' . $this->regex . ')\]/U';
|
|
|
|
// use a custom callback processing method to generate
|
|
// the replacement text for matches.
|
|
$this->wiki->source = preg_replace_callback(
|
|
$tmp_regex,
|
|
array(&$this, 'processFootnote'),
|
|
$this->wiki->source
|
|
);
|
|
|
|
|
|
// -------------------------------------------------------------
|
|
//
|
|
// Normal inline URLs.
|
|
//
|
|
|
|
// the regular expression for this kind of URL
|
|
|
|
$tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/';
|
|
|
|
// use the standard callback for inline URLs
|
|
$this->wiki->source = preg_replace_callback(
|
|
$tmp_regex,
|
|
array(&$this, 'process'),
|
|
$this->wiki->source
|
|
);
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* Process inline URLs.
|
|
*
|
|
* @param array &$matches
|
|
*
|
|
* @param array $matches An array of matches from the parse() method
|
|
* as generated by preg_replace_callback. $matches[0] is the full
|
|
* matched string, $matches[1] is the first matched pattern,
|
|
* $matches[2] is the second matched pattern, and so on.
|
|
*
|
|
* @return string The processed text replacement.
|
|
*
|
|
*/
|
|
|
|
function process(&$matches)
|
|
{
|
|
// set options
|
|
$options = array(
|
|
'type' => 'inline',
|
|
'href' => $matches[2],
|
|
'text' => $matches[2]
|
|
);
|
|
|
|
// tokenize
|
|
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5];
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* Process numbered (footnote) URLs.
|
|
*
|
|
* Token options are:
|
|
* @param array &$matches
|
|
*
|
|
* @param array $matches An array of matches from the parse() method
|
|
* as generated by preg_replace_callback. $matches[0] is the full
|
|
* matched string, $matches[1] is the first matched pattern,
|
|
* $matches[2] is the second matched pattern, and so on.
|
|
*
|
|
* @return string The processed text replacement.
|
|
*
|
|
*/
|
|
|
|
function processFootnote(&$matches)
|
|
{
|
|
// keep a running count for footnotes
|
|
$this->footnoteCount++;
|
|
|
|
// set options
|
|
$options = array(
|
|
'type' => 'footnote',
|
|
'href' => $matches[1],
|
|
'text' => $this->footnoteCount
|
|
);
|
|
|
|
// tokenize
|
|
return $this->wiki->addToken($this->rule, $options);
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* Process described-reference (named-reference) URLs.
|
|
*
|
|
* Token options are:
|
|
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
|
|
* 'href' => the URL link href portion
|
|
* 'text' => the displayed text of the URL link
|
|
*
|
|
* @param array &$matches
|
|
*
|
|
* @param array $matches An array of matches from the parse() method
|
|
* as generated by preg_replace_callback. $matches[0] is the full
|
|
* matched string, $matches[1] is the first matched pattern,
|
|
* $matches[2] is the second matched pattern, and so on.
|
|
*
|
|
* @return string The processed text replacement.
|
|
*
|
|
*/
|
|
|
|
function processDescr(&$matches)
|
|
{
|
|
// set options
|
|
$options = array(
|
|
'type' => 'descr',
|
|
'href' => $matches[1],
|
|
'text' => $matches[4]
|
|
);
|
|
|
|
// tokenize
|
|
return $this->wiki->addToken($this->rule, $options);
|
|
}
|
|
}
|
|
?>
|