1 <?php
2
3 /*
4 * This file is part of the Icybee package.
5 *
6 * (c) Olivier Laviale <olivier.laviale@gmail.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12 function wd_spamScore($body, $url, $author, $words=array(), $starters=array())
13 {
14 #
15 # score >= 1 - The message doesn't look like spam
16 # score == 0 - The message should be put to moderation
17 # score < 10 - The message is most certainly spam
18 #
19
20 $score = 0;
21
22 #
23 # put our body in lower case for checking
24 #
25
26 $body = strtolower($body);
27
28 #
29 # how many links are in the body ?
30 #
31
32 $n = max
33 (
34 array
35 (
36 substr_count($body, 'http://'),
37 substr_count($body, 'href'),
38 substr_count($body, 'ftp')
39 )
40 );
41
42 if ($n > 2)
43 {
44 #
45 # more than 2 : -1 point per link
46 #
47
48 $score -= $n;
49 }
50 else
51 {
52 #
53 # 2 or less : +2 points
54 #
55
56 $score += 2;
57 }
58
59 #
60 # Keyword search
61 #
62
63 $words = array_merge
64 (
65 $words, array
66 (
67 'levitra', 'viagra', 'casino', 'porn', 'sex', 'tape'
68 )
69 );
70
71 foreach ($words as $word)
72 {
73 $n = substr_count($body, $word);
74
75 if (!$n)
76 {
77 continue;
78 }
79
80 $score -= $n;
81 }
82
83 #
84 # now remove links
85 #
86
87 # html style: <a> <a/>
88
89 $body = preg_replace('#\<a\s.+\<\/a\>#', NULL, $body);
90
91 # bb style: [url] [/url]
92
93 $body = preg_replace('#\[url.+\/url\]#', NULL, $body);
94
95 # remaining addresses: http://
96
97 $body = preg_replace('#http://[^\s]+#', NULL, $body);
98
99 #
100 # how long is the body ?
101 #
102
103 $l = strlen($body);
104
105 if ($l > 20 && $n = 0)
106 {
107 #
108 # More than 20 characters and there's no links : +2 points
109 #
110
111 $score += 2;
112 }
113 else if ($l < 20)
114 {
115 #
116 # Less than 20 characters : -1 point
117 #
118
119 $score--;
120 }
121
122 #
123 # URL length
124 #
125
126 if (strlen($url) > 32)
127 {
128 $score--;
129 }
130
131 #
132 # Body starts with...
133 #
134
135 $starters += array
136 (
137 'interesting', 'sorry', 'nice', 'cool', 'hi'
138 );
139
140 foreach ($starters as $word)
141 {
142 $pos = strpos($body, $word . ' ');
143
144 if ($pos === false)
145 {
146 continue;
147 }
148
149 if ($pos > 10)
150 {
151 continue;
152 }
153
154 $score -= 10;
155
156 break;
157 }
158
159 #
160 # Author name has 'http://' in it
161 #
162
163 if (strpos($author, 'http://'))
164 {
165 $score -= 2;
166 }
167
168 #
169 # How many different words are used
170 #
171
172 $count = str_word_count($body);
173
174 if ($count < 10)
175 {
176 $score -= 5;
177 }
178
179 return $score;
180
181 # TODO:
182 #
183 # Number of previous comments from email
184 #
185 # -> Approved comments : +1 per comment
186 # -> Marked as spam : -1 per comment
187 #
188 # Body used in previous comment
189 #
190 }
191