<?php
/**
* This file implements the basic Antispam plugin.
*
* This file is part of the b2evolution project - {@link http://b2evolution.net/}
*
* @license GNU GPL v2 - {@link http://b2evolution.net/about/gnu-gpl-license}
*
* @copyright (c)2003-2020 by Francois Planque - {@link http://fplanque.com/}
* Parts of this file are copyright (c)2004-2006 by Daniel HAHLER - {@link http://thequod.de/contact}.
*
* @package plugins
*/
if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
/**
* Basic Antispam Plugin
*
* This plugin doublechecks referers/referrers for Hit logging and trackbacks.
*
* @todo Ideas:
* - forbid cloned comments (same content) (on the same entry or all entries)
* - detect same/similar URLs in a short period (also look at author name: if it differs, it's more likely to be spam)
*/
class basic_antispam_plugin extends Plugin
{
/**
* Variables below MUST be overriden by plugin implementations,
* either in the subclass declaration or in the subclass constructor.
*/
var $name = 'Basic Antispam';
var $code = 'b2evBAspm';
var $priority = 60;
var $version = '7.2.3';
var $author = 'The b2evo Group';
var $group = 'antispam';
var $number_of_installs = 1;
/**
* Init
*/
function PluginInit( & $params )
{
$this->short_desc = T_('Basic antispam methods');
$this->long_desc = T_('This plugin provides basic methods to detect & block spam on referers, comments & trackbacks.');
}
/**
* Define the GLOBAL settings of the plugin here. These can then be edited in the backoffice in System > Plugins.
*
* @param array Associative array of parameters (since v1.9).
* 'for_editing': true, if the settings get queried for editing;
* false, if they get queried for instantiating {@link Plugin::$Settings}.
* @return array see {@link Plugin::GetDefaultSettings()}.
* The array to be returned should define the names of the settings as keys (max length is 30 chars)
* and assign an array with the following keys to them (only 'label' is required):
*/
function GetDefaultSettings( & $params )
{
return array(
'min_comment_interval' => array(
'type' => 'integer',
'label' => T_('Minimum comment interval'),
'note' => T_('Minimum interval (in seconds) between consecutive comments from same IP or email address. 0 to disable it.'),
'defaultvalue' => '30',
'size' => 3,
'valid_range' => array( 'min' => 0 ),
),
'check_dupes' => array(
'type' => 'checkbox',
'label' => T_('Detect feedback duplicates'),
'note' => T_('Check this to check comments and trackback for duplicate content.'),
'defaultvalue' => '1',
),
'max_number_of_links_feedback' => array(
'type' => 'integer',
'label' => T_('Feedback sensitivity to links'),
'note' => T_('If a comment has more than this number of links in it, it will get 100 percent spam karma. -1 to disable it.'),
'help' => '#set_max_number_of_links',
'defaultvalue' => '4',
'size' => 3,
),
'trim_whitespace' => array(
'type' => 'checkbox',
'label' => T_('Strip whitespace'),
'note' => T_('Strip whitespace from the beginning and end of comment content.'),
'defaultvalue' => 1,
),
'remove_repetitions' => array(
'type' => 'checkbox',
'label' => T_('Remove repetitive characters'),
'note'=>T_('Remove repetitive characters in name and content. The string like "Thaaaaaaaaaanks!" becomes "Thaaanks!".'),
'defaultvalue' => 0,
),
'block_common_spam' => array(
'type' => 'checkbox',
'label' => T_('Block common spam comments'),
'note'=>T_('Block comments with both "[link=" and "[url=" tags.'),
'defaultvalue' => 1,
),
'check_url_referers' => array(
'type' => 'checkbox',
'label' => T_('Check referers for URL'),
'note' => T_('Check refering pages, if they contain our URL. This may generate a lot of additional traffic!'),
'defaultvalue' => '0',
),
);
}
/**
* Define the PER-GROUP settings of the plugin here. These can then be edited by each user.
*
* @see Plugin::GetDefaultSettings()
* @param array Associative array of parameters.
* 'for_editing': true, if the settings get queried for editing;
* false, if they get queried for instantiating
* @return array See {@link Plugin::GetDefaultSettings()}.
*/
function GetDefaultGroupSettings( & $params )
{
return array(
'comment_throttling' => array(
'label' => T_('Minimum comment interval'),
'note' => T_('Users from this group will only be able to post a comment every x seconds.').' '.T_('0 to disable it.'),
'type' => 'integer',
'defaultvalue' => 2,
'size' => 3,
'valid_range' => array( 'min' => 0 ),
),
);
}
/**
* Handle max_number_of_links_feedback setting.
*
* Try to detect as many links as possible
*/
function GetSpamKarmaForComment( & $params )
{
$max_comments = $this->Settings->get('max_number_of_links_feedback');
if( $max_comments != -1 )
{ // not deactivated:
$count = preg_match_all( '~(https?|ftp)://~i', $params['Comment']->content, $matches );
if( $count > $max_comments )
{
return 100;
}
if( $count == 0 )
{
return 0;
}
return (100/$max_comments) * $count;
}
}
/**
* Disable/Enable events according to settings.
*
* "AppendHitLog" gets enabled according to check_url_referers setting.
* "BeforeTrackbackInsert" gets disabled, if we do not check for duplicate content.
*/
function BeforeEnable()
{
if( $this->Settings->get('check_url_referers') )
{
$this->enable_event( 'AppendHitLog' );
}
else
{
$this->disable_event( 'AppendHitLog' );
}
if( ! $this->Settings->get('check_dupes') )
{
$this->disable_event( 'BeforeTrackbackInsert' );
}
else
{
$this->enable_event( 'BeforeTrackbackInsert' );
}
return true;
}
/**
* - Check for duplicate trackbacks.
*/
function BeforeTrackbackInsert( & $params )
{
if( $this->is_duplicate_comment( $params['Comment'] ) )
{
$this->msg( T_('The trackback seems to be a duplicate.'), 'error' );
if( $comment_Item = & $params['Comment']->get_Item() )
{
syslog_insert( 'The trackback seems to be a duplicate', 'info', 'item', $comment_Item->ID, 'plugin', $this->ID );
}
}
}
function CommentFormSent( & $params )
{
if( $this->Settings->get('trim_whitespace') )
{ // Strip whitespace
$params['comment'] = trim( $params['comment'] );
}
if( $this->Settings->get('remove_repetitions') )
{ // Remove repetitions
$params['anon_name'] = $this->remove_repetition( $params['anon_name'] );
$params['comment'] = $this->remove_repetition( $params['comment'] );
}
}
/**
* Check for duplicate comments.
*
* @param array Params
*/
function BeforeCommentFormInsert( & $params )
{
$comment_Item = & $params['Comment']->get_Item();
if( is_logged_in() )
{ // If user is logged in then we can get minimum comment interval per group:
global $current_User;
$min_comment_interval = $this->GroupSettings->get( 'comment_throttling', $current_User->grp_ID );
}
else
{ // Use plugin setting of minimum comment interval for all anonymous users:
$min_comment_interval = $this->Settings->get( 'min_comment_interval' );
}
if( $params['action'] != 'preview' &&
! empty( $min_comment_interval ) &&
! $params['Comment']->is_meta() )
{ // If a comment posting should be blocked by minumum interval:
global $Hit, $DB, $localtimenow;
$SQL = new SQL( 'Get time of lat comment from current IP or email address' );
$SQL->SELECT( 'MAX( comment_date )' );
$SQL->FROM( 'T_comments' );
$SQL->WHERE( 'comment_author_IP = '.$DB->quote( $Hit->IP ) );
$SQL->WHERE_or( 'comment_author_email = '.$DB->quote( $params['Comment']->get_author_email() ) );
if( $last_comment_time = $DB->get_var( $SQL ) )
{ // If last comment is found from current IP or email address:
$last_comment_time = mysql2date( 'U', $last_comment_time );
$new_comment_time = mysql2date( 'U', date( 'Y-m-d H:i:s', $localtimenow ) );
if( ( $new_comment_time - $last_comment_time ) < $min_comment_interval )
{ // Block new comment because of minumum interval:
$this->msg( sprintf( T_('You can only post a new comment every %d seconds.'), $min_comment_interval ), 'error' );
if( $comment_Item )
{
syslog_insert( sprintf( 'The comment can be posted only every %d seconds', $min_comment_interval ), 'info', 'item', $comment_Item->ID, 'plugin', $this->ID );
}
// Exit here to don't spend a time to next checking:
return;
}
}
}
if( $this->is_duplicate_comment( $params['Comment'] ) )
{ // Block a duplicate comment:
$this->msg( T_('The comment seems to be a duplicate.'), 'error' );
if( $comment_Item )
{
syslog_insert( 'The comment seems to be a duplicate', 'info', 'item', $comment_Item->ID, 'plugin', $this->ID );
}
}
if( $this->Settings->get('block_common_spam') && preg_match_all( '~\[(link|url)=~', $params['Comment']->content, $m ) )
{ // Block common bbcode spam comments with both [url= and [link= tags
if( !empty($m[1]) && count($m[1]) > 1 )
{
$this->msg( T_('Your comment was rejected because it appeared to be spam.'), 'error' );
if( $comment_Item )
{
syslog_insert( 'The comment was rejected because it appeared to be spam', 'warning', 'item', $comment_Item->ID, 'plugin', $this->ID );
}
}
}
}
function remove_repetition( $str = '' )
{
if( ($newstring = @preg_replace( '~(.)\\1{3,}~u', '$1$1$1', $str )) === NULL )
{ // Some error occured, just return the original string
$newstring = $str;
}
return $newstring;
}
/**
* Check if the deprecated hit_doublecheck_referer setting is set and then
* do not disable the AppendHitLog event. Also removes the old setting.
*/
function AfterInstall()
{
global $Settings;
if( $Settings->get('hit_doublecheck_referer') )
{ // old general settings, "transform it"
$this->Settings->set( 'check_url_referers', '1' );
$this->Settings->dbupdate();
}
$Settings->delete('hit_doublecheck_referer');
$Settings->dbupdate();
}
/**
* Check if our Host+URI is in the referred page, preferrably through
* {@link register_shutdown_function()}.
*
* @return boolean true, if we handle {@link Hit::record_the_hit() recording of the Hit} ourself
*/
function AppendHitLog( & $params )
{
$Hit = & $params['Hit'];
if( $Hit->referer_type != 'referer' )
{
return false;
}
if( function_exists( 'register_shutdown_function' ) )
{ // register it as a shutdown function, because it will be slow!
$this->debug_log( 'AppendHitLog: loading referering page.. (through register_shutdown_function())' );
register_shutdown_function( array( &$this, 'double_check_referer' ), $Hit->referer ); // this will also call Hit::record_the_hit()
}
else
{
// flush now, so that the meat of the page will get shown before it tries to check back against the refering URL.
evo_flush();
$this->debug_log( 'AppendHitLog: loading referering page..' );
$this->double_check_referer($Hit->referer); // this will also call Hit::record_the_hit()
}
return true; // we handle recording
}
/**
* This function gets called (as a {@link register_shutdown_function() shutdown function}, if possible) and checks
* if the referering URL's content includes the current URL - if not it is probably spam!
*
* On success, this methods records the hit.
*
* @uses Hit::record_the_hit()
*/
function double_check_referer( $referer )
{
global $Hit, $ReqURI;
if( $this->is_referer_linking_us( $referer, $ReqURI ) )
{
$Hit->record_the_hit();
}
return;
}
/**
* Check the content of a given URL (referer), if the requested URI (with different hostname variations)
* is present.
*
* @todo Use DB cache to avoid checking the same page again and again! (Plugin DB table)
*
* @param string
* @param string URI to append to matching pattern for hostnames
* @return boolean
*/
function is_referer_linking_us( $referer, $uri )
{
global $misc_inc_path, $lib_subdir, $ReqHost;
if( empty($referer) )
{
return false;
}
// Load page content (max. 500kb), using fsockopen:
$url_parsed = @parse_url($referer);
if( ! $url_parsed )
{
return false;
}
if( empty($url_parsed['scheme']) ) {
$url_parsed = parse_url('http://'.$referer);
}
$host = $url_parsed['host'];
$port = ( empty($url_parsed['port']) ? 80 : $url_parsed['port'] );
$path = empty($url_parsed['path']) ? '/' : $url_parsed['path'];
if( ! empty($url_parsed['query']) )
{
$path .= '?'.$url_parsed['query'];
}
$fp = @fsockopen($host, $port, $errno, $errstr, 30);
if( ! $fp )
{ // could not access referring page
$this->debug_log( 'is_referer_linking_us(): could not access «'.$referer.'» (host: '.$host.'): '.$errstr.' (#'.$errno.')' );
return false;
}
// Set timeout for data:
if( function_exists('stream_set_timeout') )
stream_set_timeout( $fp, 20 ); // PHP 4.3.0
else
socket_set_timeout( $fp, 20 ); // PHP 4
// Send request:
$out = "GET $path HTTP/1.0\r\n";
$out .= "Host: $host:$port\r\n";
$out .= "Connection: Close\r\n\r\n";
fwrite($fp, $out);
// Skip headers:
$i = 0;
$source_charset = 'iso-8859-1'; // default
while( ($s = fgets($fp, 4096)) !== false )
{
$i++;
if( $s == "\r\n" || $i > 100 /* max 100 head lines */ )
{
break;
}
if( preg_match('~^Content-Type:.*?charset=([\w-]+)~i', $s, $match ) )
{
$source_charset = $match[1];
}
}
// Get the refering page's content
$content_ref_page = '';
$bytes_read = 0;
while( ($s = fgets($fp, 4096)) !== false )
{
$content_ref_page .= $s;
$bytes_read += strlen($s);
if( $bytes_read > 512000 )
{ // do not pull more than 500kb of data!
break;
}
}
fclose($fp);
if( ! strlen($content_ref_page) )
{
$this->debug_log( 'is_referer_linking_us(): empty $content_ref_page ('.bytesreadable($bytes_read).' read)' );
return false;
}
$have_idn_name = false;
// Build the search pattern:
// We match for basically for 'href="[SERVER][URI]', where [SERVER] is a list of possible hosts (especially IDNA)
$search_pattern = '~\shref=["\']?https?://(';
$possible_hosts = array( $_SERVER['HTTP_HOST'] );
if( $_SERVER['SERVER_NAME'] != $_SERVER['HTTP_HOST'] )
{
$possible_hosts[] = $_SERVER['SERVER_NAME'];
}
$search_pattern_hosts = array();
foreach( $possible_hosts as $l_host )
{
if( preg_match( '~^([^.]+\.)(.*?)([^.]+\.[^.]+)$~', $l_host, $match ) )
{ // we have subdomains in this hostname
if( stristr( $match[1], 'www' ) )
{ // search also for hostname without 'www.'
$search_pattern_hosts[] = $match[2].$match[3];
}
}
$search_pattern_hosts[] = $l_host;
}
$search_pattern_hosts = array_unique($search_pattern_hosts);
foreach( $search_pattern_hosts as $l_host )
{ // add IDN, because this could be linked:
$l_idn_host = idna_decode( $l_host ); // the decoded puny-code ("xn--..") name (utf8)
if( $l_idn_host != $l_host )
{
$have_idn_name = true;
$search_pattern_hosts[] = $l_idn_host;
}
}
// add hosts to pattern, preg_quoted
for( $i = 0, $n = count($search_pattern_hosts); $i < $n; $i++ )
{
$search_pattern_hosts[$i] = preg_quote( $search_pattern_hosts[$i], '~' );
}
$search_pattern .= implode( '|', $search_pattern_hosts ).')';
if( empty($uri) )
{ // host(s) should end with "/", "'", '"', "?" or whitespace
$search_pattern .= '[/"\'\s?]';
}
else
{
$search_pattern .= preg_quote($uri, '~');
// URI should end with "'", '"' or whitespace
$search_pattern .= '["\'\s]';
}
$search_pattern .= '~i';
if( $have_idn_name )
{ // Convert charset to UTF-8, because the decoded domain name is UTF-8, too:
if( can_convert_charsets( 'utf-8', $source_charset ) )
{
$content_ref_page = convert_charset( $content_ref_page, 'utf-8', $source_charset );
}
else
{
$this->debug_log( 'is_referer_linking_us(): warning: cannot convert charset of referring page' );
}
}
if( preg_match( $search_pattern, $content_ref_page ) )
{
$this->debug_log( 'is_referer_linking_us(): found current URL in page ('.bytesreadable($bytes_read).' read)' );
return true;
}
else
{
if( strpos( $referer, $ReqHost ) === 0 && ! empty($uri) )
{ // Referer is the same host.. just search for $uri
if( strpos( $content_ref_page, $uri ) !== false )
{
$this->debug_log( 'is_referer_linking_us(): found current URI in page ('.bytesreadable($bytes_read).' read)' );
return true;
}
}
$this->debug_log( 'is_referer_linking_us(): '.sprintf('did not find «%s» in «%s» (%s bytes read).', $search_pattern, $referer, bytesreadable($bytes_read) ) );
return false;
}
}
/**
* Simple check for duplicate comment/content from same author
*
* @param Comment
*/
function is_duplicate_comment( $Comment )
{
global $DB;
if( ! $this->Settings->get( 'check_dupes' ) )
{
return false;
}
if( $Comment->content == '' )
{ // User may has many comments with empty content but with attachment pictures
return false;
}
$sql = '
SELECT comment_ID
FROM T_comments
WHERE comment_item_ID = '.$Comment->item_ID;
if( isset( $Comment->author_user_ID ) )
{ // registered user:
$sql .= ' AND comment_author_user_ID = '.$Comment->author_user_ID;
}
else
{ // visitor (also trackback):
$sql_ors = array();
if( ! empty( $Comment->author ) )
{
$sql_ors[] = 'comment_author = '.$DB->quote( $Comment->author );
}
if( ! empty( $Comment->author_email ) )
{
$sql_ors[] = 'comment_author_email = '.$DB->quote( $Comment->author_email );
}
if( ! empty( $Comment->author_url ) )
{
$sql_ors[] = 'comment_author_url = '.$DB->quote( $Comment->author_url );
}
if( ! empty($sql_ors) )
{
$sql .= ' AND ( '.implode( ' OR ', $sql_ors ).' )';
}
}
$sql .= ' AND comment_type = '.$DB->quote( $Comment->get( 'type' ) );
$sql .= ' AND comment_content = '.$DB->quote( $Comment->content ).' LIMIT 1';
return $DB->get_var( $sql, 0, 0, 'Checking for duplicate feedback content.' );
}
/**
* A little housekeeping.
* @return true
*/
function PluginVersionChanged( & $params )
{
$this->Settings->delete('check_url_trackbacks');
$this->Settings->dbupdate();
return true;
}
}
?>