348 lines
8.9 KiB
PHP
348 lines
8.9 KiB
PHP
|
<?php
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Parse for URLS in the source text.
|
||
|
*
|
||
|
* @category Text
|
||
|
*
|
||
|
* @package Text_Wiki
|
||
|
*
|
||
|
* @author Paul M. Jones <pmjones@php.net>
|
||
|
*
|
||
|
* @author Moritz Venn <moritz.venn@freaque.net>
|
||
|
*
|
||
|
* @license LGPL
|
||
|
*
|
||
|
* @version $Id: Url.php,v 1.1 2005/12/06 15:54:56 ritzmo Exp $
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Parse for URLS in the source text.
|
||
|
*
|
||
|
* Various URL markings are supported: inline (the URL by itself),
|
||
|
* inline (where the URL is enclosed in square brackets), and named
|
||
|
* reference (where the URL is enclosed in square brackets and has a
|
||
|
* name included inside the brackets). E.g.:
|
||
|
*
|
||
|
* inline -- http://example.com
|
||
|
* undescribed -- [http://example.com]
|
||
|
* described -- [http://example.com Example Description]
|
||
|
* described -- [http://www.example.com|Example Description]
|
||
|
*
|
||
|
* When rendering a URL token, this will convert URLs pointing to a .gif,
|
||
|
* .jpg, or .png image into an inline <img /> tag (for the 'xhtml'
|
||
|
* format).
|
||
|
*
|
||
|
* Token options are:
|
||
|
*
|
||
|
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
|
||
|
*
|
||
|
* 'href' => the URL link href portion
|
||
|
*
|
||
|
* 'text' => the displayed text of the URL link
|
||
|
*
|
||
|
* @category Text
|
||
|
*
|
||
|
* @package Text_Wiki
|
||
|
*
|
||
|
* @author Paul M. Jones <pmjones@php.net>
|
||
|
*
|
||
|
* @author Moritz Venn <moritz.venn@freaque.net>
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
class Text_Wiki_Parse_Url extends Text_Wiki_Parse {
|
||
|
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Keeps a running count of numbered-reference URLs.
|
||
|
*
|
||
|
* @access public
|
||
|
*
|
||
|
* @var int
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
var $footnoteCount = 0;
|
||
|
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* URL schemes recognized by this rule.
|
||
|
*
|
||
|
* @access public
|
||
|
*
|
||
|
* @var array
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
var $conf = array(
|
||
|
'schemes' => array(
|
||
|
'http://',
|
||
|
'https://',
|
||
|
'ftp://',
|
||
|
'gopher://',
|
||
|
'news://',
|
||
|
'mailto:'
|
||
|
)
|
||
|
);
|
||
|
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Constructor.
|
||
|
*
|
||
|
* We override the constructor so we can comment the regex nicely.
|
||
|
*
|
||
|
* @access public
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
function Text_Wiki_Parse_Url(&$obj)
|
||
|
{
|
||
|
parent::Text_Wiki_Parse($obj);
|
||
|
|
||
|
// convert the list of recognized schemes to a regex-safe string,
|
||
|
// where the pattern delim is a slash
|
||
|
$tmp = array();
|
||
|
$list = $this->getConf('schemes', array());
|
||
|
foreach ($list as $val) {
|
||
|
$tmp[] = preg_quote($val, '/');
|
||
|
}
|
||
|
$schemes = implode('|', $tmp);
|
||
|
|
||
|
// build the regex
|
||
|
$this->regex =
|
||
|
"($schemes)" . // allowed schemes
|
||
|
"(" . // start pattern
|
||
|
"[^ \\/\"\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters;
|
||
|
")*" . // end pattern
|
||
|
"[^ \\t\\n\\/\"\'{$this->wiki->delim}]*" .
|
||
|
"[A-Za-z0-9\\/?=&~_]";
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Find three different kinds of URLs in the source text.
|
||
|
*
|
||
|
* @access public
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
function parse()
|
||
|
{
|
||
|
// -------------------------------------------------------------
|
||
|
//
|
||
|
// Described-reference (named) URLs.
|
||
|
//
|
||
|
|
||
|
// the regular expression for this kind of URL
|
||
|
$tmp_regex = '/\[(' . $this->regex . ')[ |]([^\]]+)\]/';
|
||
|
|
||
|
// use a custom callback processing method to generate
|
||
|
// the replacement text for matches.
|
||
|
$this->wiki->source = preg_replace_callback(
|
||
|
$tmp_regex,
|
||
|
array(&$this, 'processDescr'),
|
||
|
$this->wiki->source
|
||
|
);
|
||
|
|
||
|
|
||
|
// -------------------------------------------------------------
|
||
|
//
|
||
|
// Unnamed-reference ('Ordinary'-style) URLs.
|
||
|
//
|
||
|
|
||
|
// the regular expression for this kind of URL
|
||
|
$tmp_regex = '/\[(' . $this->regex . ')\]/U';
|
||
|
|
||
|
// use a custom callback processing method to generate
|
||
|
// the replacement text for matches.
|
||
|
$this->wiki->source = preg_replace_callback(
|
||
|
$tmp_regex,
|
||
|
//array(&$this, 'processFootnote'),
|
||
|
array(&$this, 'processOrdinary'),
|
||
|
$this->wiki->source
|
||
|
);
|
||
|
|
||
|
|
||
|
// -------------------------------------------------------------
|
||
|
//
|
||
|
// Normal inline URLs.
|
||
|
//
|
||
|
|
||
|
// the regular expression for this kind of URL
|
||
|
|
||
|
$tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/';
|
||
|
|
||
|
// use the standard callback for inline URLs
|
||
|
$this->wiki->source = preg_replace_callback(
|
||
|
$tmp_regex,
|
||
|
array(&$this, 'process'),
|
||
|
$this->wiki->source
|
||
|
);
|
||
|
|
||
|
|
||
|
//$tmp_regex = '/(^|[^A-Za-z])([a-zA-Z])(.*?)/';
|
||
|
$tmp_regex = '/(^|\s)([a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)($|\s)/';
|
||
|
|
||
|
// use the standard callback for inline URLs
|
||
|
$this->wiki->source = preg_replace_callback(
|
||
|
$tmp_regex,
|
||
|
array(&$this, 'processWithoutProtocol'),
|
||
|
$this->wiki->source
|
||
|
);
|
||
|
|
||
|
$tmp_regex = '/(^|\s|'.$this->wiki->delim.')<([a-zA-Z0-9\-\.%_\+\!\*\'\(\)\,]+@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)>(\s|'.$this->wiki->delim.'|$)/';
|
||
|
|
||
|
// use the standard callback for inline URLs
|
||
|
$this->wiki->source = preg_replace_callback(
|
||
|
$tmp_regex,
|
||
|
array(&$this, 'processInlineEmail'),
|
||
|
$this->wiki->source
|
||
|
);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Process inline URLs.
|
||
|
*
|
||
|
* @param array &$matches
|
||
|
*
|
||
|
* @param array $matches An array of matches from the parse() method
|
||
|
* as generated by preg_replace_callback. $matches[0] is the full
|
||
|
* matched string, $matches[1] is the first matched pattern,
|
||
|
* $matches[2] is the second matched pattern, and so on.
|
||
|
*
|
||
|
* @return string The processed text replacement.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
function process(&$matches)
|
||
|
{
|
||
|
// set options
|
||
|
$options = array(
|
||
|
'type' => 'inline',
|
||
|
'href' => $matches[2],
|
||
|
'text' => $matches[2]
|
||
|
);
|
||
|
|
||
|
// tokenize
|
||
|
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5];
|
||
|
}
|
||
|
|
||
|
function processWithoutProtocol(&$matches)
|
||
|
{
|
||
|
// set options
|
||
|
$options = array(
|
||
|
'type' => 'inline',
|
||
|
'href' => 'http://'.$matches[2],
|
||
|
'text' => $matches[2]
|
||
|
);
|
||
|
|
||
|
// tokenize
|
||
|
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
|
||
|
}
|
||
|
|
||
|
function processInlineEmail(&$matches)
|
||
|
{
|
||
|
// set options
|
||
|
$options = array(
|
||
|
'type' => 'inline',
|
||
|
'href' => 'mailto://'.$matches[2],
|
||
|
'text' => $matches[2]
|
||
|
);
|
||
|
|
||
|
// tokenize
|
||
|
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Process numbered (footnote) URLs.
|
||
|
*
|
||
|
* Token options are:
|
||
|
* @param array &$matches
|
||
|
*
|
||
|
* @param array $matches An array of matches from the parse() method
|
||
|
* as generated by preg_replace_callback. $matches[0] is the full
|
||
|
* matched string, $matches[1] is the first matched pattern,
|
||
|
* $matches[2] is the second matched pattern, and so on.
|
||
|
*
|
||
|
* @return string The processed text replacement.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
function processFootnote(&$matches)
|
||
|
{
|
||
|
// keep a running count for footnotes
|
||
|
$this->footnoteCount++;
|
||
|
|
||
|
// set options
|
||
|
$options = array(
|
||
|
'type' => 'footnote',
|
||
|
'href' => $matches[1],
|
||
|
'text' => $this->footnoteCount
|
||
|
);
|
||
|
|
||
|
// tokenize
|
||
|
return $this->wiki->addToken($this->rule, $options);
|
||
|
}
|
||
|
|
||
|
function processOrdinary(&$matches)
|
||
|
{
|
||
|
// keep a running count for footnotes
|
||
|
$this->footnoteCount++;
|
||
|
|
||
|
// set options
|
||
|
$options = array(
|
||
|
'type' => 'descr',
|
||
|
'href' => $matches[1],
|
||
|
'text' => $matches[1]
|
||
|
);
|
||
|
|
||
|
// tokenize
|
||
|
return $this->wiki->addToken($this->rule, $options);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
*
|
||
|
* Process described-reference (named-reference) URLs.
|
||
|
*
|
||
|
* Token options are:
|
||
|
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
|
||
|
* 'href' => the URL link href portion
|
||
|
* 'text' => the displayed text of the URL link
|
||
|
*
|
||
|
* @param array &$matches
|
||
|
*
|
||
|
* @param array $matches An array of matches from the parse() method
|
||
|
* as generated by preg_replace_callback. $matches[0] is the full
|
||
|
* matched string, $matches[1] is the first matched pattern,
|
||
|
* $matches[2] is the second matched pattern, and so on.
|
||
|
*
|
||
|
* @return string The processed text replacement.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
function processDescr(&$matches)
|
||
|
{
|
||
|
// set options
|
||
|
$options = array(
|
||
|
'type' => 'descr',
|
||
|
'href' => $matches[1],
|
||
|
'text' => $matches[4]
|
||
|
);
|
||
|
|
||
|
// tokenize
|
||
|
return $this->wiki->addToken($this->rule, $options);
|
||
|
}
|
||
|
}
|
||
|
?>
|