Coverage for Doctrine_Search_Analyzer_Standard

Back to coverage report

1 <?php
2 /*
3  *  $Id$
4  *
5  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
6  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
7  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
8  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
9  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
10  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
11  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
12  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
13  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
14  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
15  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16  *
17  * This software consists of voluntary contributions made by many individuals
18  * and is licensed under the LGPL. For more information, see
19  * <http://www.phpdoctrine.org>.
20  */
21
22 /**
23  * Doctrine_Search_Analyzer_Standard
24  *
25  * @package     Doctrine
26  * @subpackage  Search
27  * @author      Konsta Vesterinen <kvesteri@cc.hut.fi>
28  * @license     http://www.opensource.org/licenses/lgpl-license.php LGPL
29  * @version     $Revision$
30  * @link        www.phpdoctrine.org
31  * @since       1.0
32  */
33 class Doctrine_Search_Analyzer_Standard implements Doctrine_Search_Analyzer_Interface
34 {
35     protected static $_stopwords = array(
36                             '0',
37                             '1',
38                             '2',
39                             '3',
40                             '4',
41                             '5',
42                             '6',
43                             '7',
44                             '8',
45                             '9',
46                             '10',
47                             'a',
48                             'about',
49                             'after',
50                             'all',
51                             'almost',
52                             'along',
53                             'also',
54                             'although',
55                             'amp',
56                             'an',
57                             'and',
58                             'another',
59                             'any',
60                             'are',
61                             'area',
62                             'arent',
63                             'around',
64                             'as',
65                             'at',
66                             'available',
67                             'back',
68                             'be',
69                             'because',
70                             'been',
71                             'before',
72                             'being',
73                             'best',
74                             'better',
75                             'big',
76                             'bit',
77                             'both',
78                             'but',
79                             'by',
80                             'c',
81                             'came',
82                             'can',
83                             'capable',
84                             'control',
85                             'could',
86                             'course',
87                             'd',
88                             'dan',
89                             'day',
90                             'decided',
91                             'did',
92                             'didn',
93                             'different',
94                             'div',
95                             'do',
96                             'doesn',
97                             'don',
98                             'down',
99                             'drive',
100                             'e',
101                             'each',
102                             'easily',
103                             'easy',
104                             'edition',
105                             'either',
106                             'end',
107                             'enough',
108                             'even',
109                             'every',
110                             'example',
111                             'few',
112                             'find',
113                             'first',
114                             'for',
115                             'found',
116                             'from',
117                             'get',
118                             'go',
119                             'going',
120                             'good',
121                             'got',
122                             'gt',
123                             'had',
124                             'hard',
125                             'has',
126                             'have',
127                             'he',
128                             'her',
129                             'here',
130                             'how',
131                             'i',
132                             'if',
133                             'in',
134                             'into',
135                             'is',
136                             'isn',
137                             'it',
138                             'just',
139                             'know',
140                             'last',
141                             'left',
142                             'li',
143                             'like',
144                             'little',
145                             'll',
146                             'long',
147                             'look',
148                             'lot',
149                             'lt',
150                             'm',
151                             'made',
152                             'make',
153                             'many',
154                             'mb',
155                             'me',
156                             'menu',
157                             'might',
158                             'mm',
159                             'more',
160                             'most',
161                             'much',
162                             'my',
163                             'name',
164                             'nbsp',
165                             'need',
166                             'new',
167                             'no',
168                             'not',
169                             'now',
170                             'number',
171                             'of',
172                             'off',
173                             'old',
174                             'on',
175                             'one',
176                             'only',
177                             'or',
178                             'original',
179                             'other',
180                             'our',
181                             'out',
182                             'over',
183                             'part',
184                             'place',
185                             'point',
186                             'pretty',
187                             'probably',
188                             'problem',
189                             'put',
190                             'quite',
191                             'quot',
192                             'r',
193                             're',
194                             'really',
195                             'results',
196                             'right',
197                             's',
198                             'same',
199                             'saw',
200                             'see',
201                             'set',
202                             'several',
203                             'she',
204                             'sherree',
205                             'should',
206                             'since',
207                             'size',
208                             'small',
209                             'so',
210                             'some',
211                             'something',
212                             'special',
213                             'still',
214                             'stuff',
215                             'such',
216                             'sure',
217                             'system',
218                             't',
219                             'take',
220                             'than',
221                             'that',
222                             'the',
223                             'their',
224                             'them',
225                             'then',
226                             'there',
227                             'these',
228                             'they',
229                             'thing',
230                             'things',
231                             'think',
232                             'this',
233                             'those',
234                             'though',
235                             'through',
236                             'time',
237                             'to',
238                             'today',
239                             'together',
240                             'too',
241                             'took',
242                             'two',
243                             'up',
244                             'us',
245                             'use',
246                             'used',
247                             'using',
248                             've',
249                             'very',
250                             'want',
251                             'was',
252                             'way',
253                             'we',
254                             'well',
255                             'went',
256                             'were',
257                             'what',
258                             'when',
259                             'where',
260                             'which',
261                             'while',
262                             'white',
263                             'who',
264                             'will',
265                             'with',
266                             'would',
267                             'yet',
268                             'you',
269                             'your',
270                             'yours'
271                             );
272
273     public function analyze($text)
274     {
275      $text = preg_replace('/[\'`´"]/', '', $text);
276         $text = preg_replace('/[^A-Za-z0-9]/', ' ', $text);
277         $text = str_replace('  ', ' ', $text);
278
279         $terms = explode(' ', $text);
280         
281         $ret = array();
282         if ( ! empty($terms)) {
283             foreach ($terms as $i => $term) {
284                 if (empty($term)) {
285                     continue;
286                 }
287                 $lower = strtolower(trim($term));
288
289                 if (in_array($lower, self::$_stopwords)) {
290                     continue;
291                 }
292
293                 $ret[$i] = $lower;
294             }
295         }
296         return $ret;
297     }
298 }