source code: ./othercms/dotclear-2.22/inc/libs/clearbricks/common/lib.text.php

Seditio Source
./othercms/dotclear-2.22/inc/libs/clearbricks/common/lib.text.php

<?php


/**


 * @class text


 * @brief Text utilities


 *


 * @package Clearbricks


 * @subpackage Common


 *


 * @copyright Olivier Meunier & Association Dotclear


 * @copyright GPL-2.0-only


 */


class text


{


    /**


     * Check email address


     *


     * Returns true if $email is a valid email address.


     *


     * @param string    $email    Email string


     * @return boolean


     */


    public static function isEmail(string $email): bool


    {


        return (filter_var($email, FILTER_VALIDATE_EMAIL) !== false);


    }





    /**


     * Accents replacement


     *


     * Replaces some occidental accentuated characters by their ASCII


     * representation.


     *


     * @param    string    $str        String to deaccent


     * @return    string


     */


    public static function deaccent(string $str): string


    {


        $pattern['A']  = '\x{00C0}-\x{00C5}';


        $pattern['AE'] = '\x{00C6}';


        $pattern['C']  = '\x{00C7}';


        $pattern['D']  = '\x{00D0}';


        $pattern['E']  = '\x{00C8}-\x{00CB}';


        $pattern['I']  = '\x{00CC}-\x{00CF}';


        $pattern['N']  = '\x{00D1}';


        $pattern['O']  = '\x{00D2}-\x{00D6}\x{00D8}';


        $pattern['OE'] = '\x{0152}';


        $pattern['S']  = '\x{0160}';


        $pattern['U']  = '\x{00D9}-\x{00DC}';


        $pattern['Y']  = '\x{00DD}';


        $pattern['Z']  = '\x{017D}';





        $pattern['a']  = '\x{00E0}-\x{00E5}';


        $pattern['ae'] = '\x{00E6}';


        $pattern['c']  = '\x{00E7}';


        $pattern['d']  = '\x{00F0}';


        $pattern['e']  = '\x{00E8}-\x{00EB}';


        $pattern['i']  = '\x{00EC}-\x{00EF}';


        $pattern['n']  = '\x{00F1}';


        $pattern['o']  = '\x{00F2}-\x{00F6}\x{00F8}';


        $pattern['oe'] = '\x{0153}';


        $pattern['s']  = '\x{0161}';


        $pattern['u']  = '\x{00F9}-\x{00FC}';


        $pattern['y']  = '\x{00FD}\x{00FF}';


        $pattern['z']  = '\x{017E}';





        $pattern['ss'] = '\x{00DF}';





        foreach ($pattern as $r => $p) {


            $str = preg_replace('/[' . $p . ']/u', $r, $str);


        }





        return $str;


    }





    /**


     * String to URL


     *


     * Transforms a string to a proper URL.


     *


     * @param string    $str            String to transform


     * @param boolean    $with_slashes    Keep slashes in URL


     * @return string


     */


    public static function str2URL(string $str, bool $with_slashes = true): string


    {


        $str = self::deaccent($str);


        $str = preg_replace('/[^A-Za-z0-9_\s\'\:\/[\]-]/', '', $str);





        return self::tidyURL($str, $with_slashes);


    }





    /**


     * URL cleanup


     *


     * @param string    $str            URL to tidy


     * @param boolean    $keep_slashes    Keep slashes in URL


     * @param boolean    $keep_spaces    Keep spaces in URL


     * @return string


     */


    public static function tidyURL(string $str, bool $keep_slashes = true, bool $keep_spaces = false): string


    {


        $str = strip_tags($str);


        $str = str_replace(['?', '&', '#', '=', '+', '<', '>', '"', '%'], '', $str);


        $str = str_replace("'", ' ', $str);


        $str = preg_replace('/[\s]+/u', ' ', trim($str));





        if (!$keep_slashes) {


            $str = str_replace('/', '-', $str);


        }





        if (!$keep_spaces) {


            $str = str_replace(' ', '-', $str);


        }





        $str = preg_replace('/[-]+/', '-', $str);





        # Remove path changes in URL


        $str = preg_replace('%^/%', '', $str);


        $str = preg_replace('%\.+/%', '', $str);





        return $str;


    }





    /**


     * Cut string


     *


     * Returns a cuted string on spaced at given length $l.


     *


     * @param    string    $str        String to cut


     * @param    integer    $l        Length to keep


     * @return    string


     */


    public static function cutString(string $str, int $l): string


    {


        $s = preg_split('/([\s]+)/u', $str, -1, PREG_SPLIT_DELIM_CAPTURE);





        $res = '';


        $L   = 0;





        if (mb_strlen($s[0]) >= $l) {


            return mb_substr($s[0], 0, $l);


        }





        foreach ($s as $v) {


            $L = $L + mb_strlen($v);





            if ($L > $l) {


                break;


            }


            $res .= $v;


        }





        return trim($res);


    }





    /**


     * Split words


     *


     * Returns an array of words from a given string.


     *


     * @param string    $str        Words to split


     * @return array


     */


    public static function splitWords(string $str): array


    {


        $non_word = '\x{0000}-\x{002F}\x{003A}-\x{0040}\x{005b}-\x{0060}\x{007B}-\x{007E}\x{00A0}-\x{00BF}\s';


        if (preg_match_all('/([^' . $non_word . ']{3,})/msu', html::clean($str), $match)) {


            foreach ($match[1] as $i => $v) {


                $match[1][$i] = mb_strtolower($v);


            }





            return $match[1];


        }





        return [];


    }





    /**


     * Encoding detection


     *


     * Returns the encoding (in lowercase) of given $str.


     *


     * @param string    $str        String


     * @return string


     */


    public static function detectEncoding(string $str): string


    {


        return strtolower(mb_detect_encoding(


            $str . ' ',


            'UTF-8,ISO-8859-1,ISO-8859-2,ISO-8859-3,' .


            'ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,' .


            'ISO-8859-9,ISO-8859-10,ISO-8859-13,ISO-8859-14,ISO-8859-15'


        ));


    }





    /**


     * UTF8 conversions


     *


     * Returns an UTF-8 converted string. If $encoding is not specified, the


     * function will try to detect encoding.


     *


     * @param string    $str        String to convert


     * @param string    $encoding    Optionnal "from" encoding


     * @return string


     */


    public static function toUTF8(string $str, ?string $encoding = null): string


    {


        if (!$encoding) {


            $encoding = self::detectEncoding($str);


        }





        if ($encoding != 'utf-8') {


            $str = iconv($encoding, 'UTF-8', $str);


        }





        return $str;


    }





    /**


     * Find bad UTF8 tokens


     *


     * Locates the first bad byte in a UTF-8 string returning it's


     * byte index in the string


     * PCRE Pattern to locate bad bytes in a UTF-8 string


     * Comes from W3 FAQ: Multilingual Forms


     * Note: modified to include full ASCII range including control chars


     *


     * @copyright Harry Fuecks (http://phputf8.sourceforge.net <a href="http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html">GNU LGPL 2.1</a>)


     *


     * @param string    $str        String to search


     * @return integer|false


     */


    public static function utf8badFind(string $str)


    {


        $UTF8_BAD = '([\x00-\x7F]' . # ASCII (including control chars)


        '|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte


        '|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs


        '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte


        '|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates


        '|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3


        '|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15


        '|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16


        '|(.{1}))'; # invalid byte


        $pos     = 0;


        $badList = [];





        while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {


            $bytes = strlen($matches[0]);


            if (isset($matches[2])) {


                return $pos;


            }


            $pos += $bytes;


            $str = substr($str, $bytes);


        }





        return false;


    }





    /**


     * UTF8 cleanup


     *


     * Replaces non utf8 bytes in $str by $repl.


     *


     * @copyright Harry Fuecks (http://phputf8.sourceforge.net <a href="http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html">GNU LGPL 2.1</a>)


     *


     * @param string    $str        String to clean


     * @param string    $repl    Replacement string


     * @return string


     */


    public static function cleanUTF8(string $str, string $repl = '?'): string


    {


        while (($bad_index = self::utf8badFind($str)) !== false) {


            $str = substr_replace($str, $repl, $bad_index, 1);


        }





        return $str;


    }





    /**


     * BOM removal


     *


     * Removes BOM from the begining of a string if present.


     *


     * @param string    $str        String to clean


     * @return string


     */


    public static function removeBOM(string $str): string


    {


        if (substr_count($str, '')) {


            return str_replace('', '', $str);


        }





        return $str;


    }





    /**


     * Quoted printable conversion


     *


     * Encodes given str to quoted printable


     *


     * @param string    $str        String to encode


     * @return string


     */


    public static function QPEncode(string $str): string


    {


        $res = '';





        foreach (preg_split("/\r?\n/msu", $str) as $line) {


            $l = '';


            preg_match_all('/./', $line, $m);





            foreach ($m[0] as $c) {


                $a = ord($c);





                if ($a < 32 || $a == 61 || $a > 126) {


                    $c = sprintf('=%02X', $a);


                }





                $l .= $c;


            }





            $res .= $l . "\r\n";


        }





        return $res;


    }


}