<?php
/**
* @author: Jörg Reinholz, fastix WebDesign & Consult, Kassel - http://www.fastix.org/
* @version: 1.2 (2024-03-13)
* @license: https://code.fastix.org/lizenz.php
**/
class SpamDetector {
private $sWortliste = 'known_spam';
private $arSpamRegex = false;
private $arReplacements = false;
private $arURLRegex = [];
private $arHexRegex = [];
function __construct() {
$this -> sWortliste = __DIR__ . '/' . $this -> sWortliste;
# SpamRegex: Eintrag beginnt mit Link:
$arURLRegex[] = '^ *https{0,1}:\/\/';
$arURLRegex[] = '^ *<a href *= *';
$arURLRegex[] = '^ *\[url *= *';
#3 Links:
$arURLRegex[] = 'https{0,1}:\/\/.*https{0,1}:\/\/.*https{0,1}:\/\/';
$this -> arURLRegex = $arURLRegex;
#5 Sonderzeichen aufeinander:
$arHexRegex[] = '&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};';
$this -> arHexRegex = $arHexRegex;
#Spam-Begriffe:
$ar = file( $this -> sWortliste );
foreach ( $ar as $wort ) {
if ( '' !== $wort ) {
}
}
$this -> arSpamRegex = $arSpamRegex;
#Ersetzungen:
$arReplacements['i'] = '########I#######';
$arReplacements['1'] = '########I#######';
$arReplacements['l'] = '########I#######';
$arReplacements['o'] = '########O#######';
$arReplacements['a'] = '########A#######';
$arReplacements['c'] = '########C#######';
$arReplacements['z'] = '########C#######';
$arReplacements['########I#######'] = '[il1]';
$arReplacements['########O#######'] = '[o0]';
$arReplacements['########A#######'] = '[a@]';
$arReplacements['########C#######'] = '[czxs]';
$this -> arReplacements = $arReplacements;
}
function detect( $str ) {
foreach ( $this -> arURLRegex as $strURLRegex ) {
return true;
}
}
foreach ( $this -> arHexRegex as $strHexRegex ) {
return true;
}
}
foreach ( $this -> arSpamRegex as $strSpamRegex ) {
foreach ( $arKeys as $key ) {
$strSpamRegex = trim( str_replace( $key, $this -> arReplacements[$key], $strSpamRegex ) );
}
return true;
}
}
return false;
}
}