Source for file Standard.php

Documentation is available at Standard.php

  1. <?php
  2. /*
  3.  *  $Id$
  4.  *
  5.  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  6.  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  7.  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  8.  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  9.  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  10.  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  11.  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  12.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  13.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  14.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  15.  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  16.  *
  17.  * This software consists of voluntary contributions made by many individuals
  18.  * and is licensed under the LGPL. For more information, see
  19.  * <http://www.phpdoctrine.com>.
  20.  */
  21.  
  22. /**
  23.  * Doctrine_Search_Analyzer_Standard
  24.  *
  25.  * @author      Konsta Vesterinen <kvesteri@cc.hut.fi>
  26.  * @package     Doctrine
  27.  * @license     http://www.opensource.org/licenses/lgpl-license.php LGPL
  28.  * @version     $Revision$
  29.  * @category    Object Relational Mapping
  30.  * @link        www.phpdoctrine.com
  31.  * @since       1.0
  32.  */
  33. class Doctrine_Search_Analyzer_Standard implements Doctrine_Search_Analyzer_Interface
  34. {
  35.     protected static $_stopwords array(
  36.                             '0',
  37.                             '1',
  38.                             '2',
  39.                             '3',
  40.                             '4',
  41.                             '5',
  42.                             '6',
  43.                             '7',
  44.                             '8',
  45.                             '9',
  46.                             '10',
  47.                             'a',
  48.                             'about',
  49.                             'after',
  50.                             'all',
  51.                             'almost',
  52.                             'along',
  53.                             'also',
  54.                             'amp',
  55.                             'an',
  56.                             'and',
  57.                             'another',
  58.                             'any',
  59.                             'are',
  60.                             'area',
  61.                             'around',
  62.                             'as',
  63.                             'at',
  64.                             'available',
  65.                             'back',
  66.                             'be',
  67.                             'because',
  68.                             'been',
  69.                             'being',
  70.                             'best',
  71.                             'better',
  72.                             'big',
  73.                             'bit',
  74.                             'both',
  75.                             'but',
  76.                             'by',
  77.                             'c',
  78.                             'came',
  79.                             'can',
  80.                             'capable',
  81.                             'control',
  82.                             'could',
  83.                             'course',
  84.                             'd',
  85.                             'dan',
  86.                             'day',
  87.                             'decided',
  88.                             'did',
  89.                             'didn',
  90.                             'different',
  91.                             'div',
  92.                             'do',
  93.                             'doesn',
  94.                             'don',
  95.                             'down',
  96.                             'drive',
  97.                             'e',
  98.                             'each',
  99.                             'easily',
  100.                             'easy',
  101.                             'edition',
  102.                             'end',
  103.                             'enough',
  104.                             'even',
  105.                             'every',
  106.                             'example',
  107.                             'few',
  108.                             'find',
  109.                             'first',
  110.                             'for',
  111.                             'found',
  112.                             'from',
  113.                             'get',
  114.                             'go',
  115.                             'going',
  116.                             'good',
  117.                             'got',
  118.                             'gt',
  119.                             'had',
  120.                             'hard',
  121.                             'has',
  122.                             'have',
  123.                             'he',
  124.                             'her',
  125.                             'here',
  126.                             'how',
  127.                             'i',
  128.                             'if',
  129.                             'in',
  130.                             'into',
  131.                             'is',
  132.                             'isn',
  133.                             'it',
  134.                             'just',
  135.                             'know',
  136.                             'last',
  137.                             'left',
  138.                             'li',
  139.                             'like',
  140.                             'little',
  141.                             'll',
  142.                             'long',
  143.                             'look',
  144.                             'lot',
  145.                             'lt',
  146.                             'm',
  147.                             'made',
  148.                             'make',
  149.                             'many',
  150.                             'mb',
  151.                             'me',
  152.                             'menu',
  153.                             'might',
  154.                             'mm',
  155.                             'more',
  156.                             'most',
  157.                             'much',
  158.                             'my',
  159.                             'name',
  160.                             'nbsp',
  161.                             'need',
  162.                             'new',
  163.                             'no',
  164.                             'not',
  165.                             'now',
  166.                             'number',
  167.                             'of',
  168.                             'off',
  169.                             'old',
  170.                             'on',
  171.                             'one',
  172.                             'only',
  173.                             'or',
  174.                             'original',
  175.                             'other',
  176.                             'our',
  177.                             'out',
  178.                             'over',
  179.                             'part',
  180.                             'place',
  181.                             'point',
  182.                             'pretty',
  183.                             'probably',
  184.                             'problem',
  185.                             'put',
  186.                             'quite',
  187.                             'quot',
  188.                             'r',
  189.                             're',
  190.                             'really',
  191.                             'results',
  192.                             'right',
  193.                             's',
  194.                             'same',
  195.                             'saw',
  196.                             'see',
  197.                             'set',
  198.                             'several',
  199.                             'she',
  200.                             'sherree',
  201.                             'should',
  202.                             'since',
  203.                             'size',
  204.                             'small',
  205.                             'so',
  206.                             'some',
  207.                             'something',
  208.                             'special',
  209.                             'still',
  210.                             'stuff',
  211.                             'such',
  212.                             'sure',
  213.                             'system',
  214.                             't',
  215.                             'take',
  216.                             'than',
  217.                             'that',
  218.                             'the',
  219.                             'their',
  220.                             'them',
  221.                             'then',
  222.                             'there',
  223.                             'these',
  224.                             'they',
  225.                             'thing',
  226.                             'things',
  227.                             'think',
  228.                             'this',
  229.                             'those',
  230.                             'though',
  231.                             'through',
  232.                             'time',
  233.                             'to',
  234.                             'today',
  235.                             'together',
  236.                             'too',
  237.                             'took',
  238.                             'two',
  239.                             'up',
  240.                             'us',
  241.                             'use',
  242.                             'used',
  243.                             'using',
  244.                             've',
  245.                             'very',
  246.                             'want',
  247.                             'was',
  248.                             'way',
  249.                             'we',
  250.                             'well',
  251.                             'went',
  252.                             'were',
  253.                             'what',
  254.                             'when',
  255.                             'where',
  256.                             'which',
  257.                             'while',
  258.                             'white',
  259.                             'who',
  260.                             'will',
  261.                             'with',
  262.                             'would',
  263.                             'you',
  264.                             'your',
  265.                             );
  266.  
  267.     public function analyze($text)
  268.     {
  269.         $text preg_replace('/[.()&#!,?^£@%&{}+]/'' '$text);
  270.         $text str_replace('  '' '$text);
  271.  
  272.         $terms explode(' '$text);
  273.         
  274.         $ret array();
  275.         if empty($terms)) {
  276.             foreach ($terms as $i => $term{
  277.                 if (empty($term)) {
  278.                     continue;
  279.                 }
  280.                 $lower strtolower(trim($term));
  281.  
  282.                 if (in_array($lowerself::$_stopwords)) {
  283.                     continue;
  284.                 }
  285.  
  286.                 $ret[$i$lower;
  287.             }
  288.         }
  289.         return $ret;
  290.     }
  291. }