<?php
 
/**
* @author: Jörg Reinholz, fastix WebDesign & Consult, Kassel - http://www.fastix.org/
* @version: 1.2 (2024-03-13)
* @license: https://code.fastix.org/lizenz.php
**/
 
class SpamDetector {
    private $sWortliste     = 'known_spam';
    private $arSpamRegex    = false;
    private $arReplacements = false;
    private $arURLRegex     = [];
    private $arHexRegex     = [];
 
    function __construct() {
   
        $this -> sWortliste = __DIR__ . '/' . $this -> sWortliste;
 
        # SpamRegex: Eintrag beginnt mit Link:
 
        $arURLRegex[] = '^ *https{0,1}:\/\/';
        $arURLRegex[] = '^ *<a href *= *';
        $arURLRegex[] = '^ *\[url *= *';
 
        #3 Links:
 
        $arURLRegex[] = 'https{0,1}:\/\/.*https{0,1}:\/\/.*https{0,1}:\/\/';
 
       
        $this -> arURLRegex = $arURLRegex;
 
        #5 Sonderzeichen aufeinander:
 
        $arHexRegex[] = '&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};&#[0-9a-f]{2,4};';
        $this -> arHexRegex = $arHexRegex;
 
        #Spam-Begriffe:
 
        $ar = file( $this -> sWortliste );
        foreach ( $ar as $wort ) {
            $wort = trim( $wort );
            if ( '' !==  $wort ) {
                $arSpamRegex[] = strtolower( $wort );
            }
        }
        $this -> arSpamRegex = $arSpamRegex;
 
        #Ersetzungen:
 
        $arReplacements['i'] = '########I#######';
        $arReplacements['1'] = '########I#######';
        $arReplacements['l'] = '########I#######';
        $arReplacements['o'] = '########O#######';
        $arReplacements['a'] = '########A#######';
        $arReplacements['c'] = '########C#######';
        $arReplacements['z'] = '########C#######';
 
        $arReplacements['########I#######'] = '[il1]';
        $arReplacements['########O#######'] = '[o0]';
        $arReplacements['########A#######'] = '[a@]';
        $arReplacements['########C#######'] = '[czxs]';
 
        $this -> arReplacements = $arReplacements;
    }
 
    function detect( $str ) {
 
        $str = trim( strtolower( $str ) );
        $str = str_replace("\t", "    ", $str );
 
        foreach ( $this -> arURLRegex as $strURLRegex ) {
                        if ( preg_match( '#' . str_replace( '#', '\#',  $strURLRegex ) . '#',  $str ) ) {
                                return true;
                        }
        }
       
        foreach ( $this -> arHexRegex as $strHexRegex ) {
                        if ( preg_match( '#' . str_replace( '#', '\#',  $strHexRegex ) . '#',  $str ) ) {
                                return true;
                        }
        }        
       
        $arKeys = array_keys( $this -> arReplacements );
 
        foreach ( $this -> arSpamRegex as $strSpamRegex ) {
            foreach ( $arKeys as $key ) {
                    $strSpamRegex = trim( str_replace( $key, $this -> arReplacements[$key], $strSpamRegex ) );
            }
            if ( preg_match( '#' . str_replace( '#', '\#',  $strSpamRegex ) . '#',  $str ) ) {
                return true;
            }
        }
        return false;
    }
}
