 * This file implements functions for handling charsets.
 * This file is part of the evoCore framework - {@link}
 * See also {@link}.
 * @license GNU GPL v2 - {@link}
 * @copyright (c)2003-2020 by Francois Planque - {@link}
 * Parts of this file are copyright (c)2004-2006 by Daniel HAHLER - {@link}.
 * @package evocore
 * @todo dh> Move this to some other directory?
if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );

 * Use iconv() to transliterate non-ASCII chars in a string encoded with $evo_charset.
 * This function will figure out a usable LC_CTYPE setting and revert it to the original value
 * after calling iconv().
 * @author Tilman BLUMENBACH - tblue246
 * @todo Tblue> Try more locales.
 * @param string The string to transliterate.
 * @param NULL|string The post locale. NULL to not try switching to it.
 * @return string|boolean The transliterated ASCII string on success or false on failure.
function evo_iconv_transliterate( $str, $post_locale = NULL )
$evo_charset, $current_locale, $default_locale;

    if( !
function_exists( 'iconv' ) )

// iconv() needs a proper LC_CTYPE to work.
    // See
$orig_lc_ctype  = setlocale( LC_CTYPE, 0 );
$lc_evo_charset = strtolower( str_replace( '-', '', $evo_charset ) );

$locales_to_try = array(
str_replace( '-', '_', $current_locale ).'.'.$lc_evo_charset, // Try to use current b2evo locale
str_replace( '-', '_', $default_locale ).'.'.$lc_evo_charset, // Fallback to default b2evo locale
$post_locale !== NULL )
// Try to switch to the post locale:
array_unshift( $locales_to_try, str_replace( '-', '_', $post_locale ).'.'.$lc_evo_charset );

setlocale( LC_CTYPE, $locales_to_try ) === false )
// The last thing we try is to use the system locale with our charset.
if( ( $pos = strrpos( $orig_lc_ctype, '.' ) ) !== false )
// Remove existing charset string:
$syslocale = substr( $orig_lc_ctype, 0, $pos );
$syslocale = $orig_lc_ctype;

setlocale( LC_CTYPE, $syslocale.'.'.$lc_evo_charset ) === false )
// We could not set a usable locale, giving up...
return false;

//pre_dump( setlocale( LC_CTYPE, 0 ) );

    // Transliterate the string:
$newstr = iconv( $evo_charset, 'ASCII//TRANSLIT', $str );

// Restore the original locale:
setlocale( LC_CTYPE, $orig_lc_ctype );


 * Convert special chars (like german umlauts) to ASCII characters.
 * @param string Input string to operate on
 * @param NULL|string The post locale or NULL if there is no specific locale.
 *                    Gets passed to evo_iconv_transliterate().
 * @param boolean Accept period '.' as valid character
 * @return string The input string with replaced chars.
function replace_special_chars( $str, $post_locale = NULL, $accept_period = false, $separator = '-' )
$evo_charset, $default_locale, $current_locale, $locales;

// Decode ALL entities to be able to transliterate the associated chars:
    // (We should convert even double quote &quot; to " in order to avoid a word "qout" in autogenerated slug):
$str = html_entity_decode( $str, ENT_QUOTES, $evo_charset );

$our_locale = $post_locale;
$our_locale === NULL )
// post locale is not set, try to guess current locale
if( !empty($default_locale) )
$our_locale = $default_locale;
        if( !empty(
$current_locale) )
// Override with current locale if available
$our_locale = $current_locale;
$our_locale !== NULL && isset($locales[$our_locale]) && !empty($locales[$our_locale]['transliteration_map']) )
// Use locale 'transliteration_map' if present
if( ! array_key_exists( '', $locales[$our_locale]['transliteration_map'] ) )
// Make sure there's no empty string key, otherwise strtr() returns false
if( $tmp_str = strtr( $str, $locales[$our_locale]['transliteration_map'] ) )
// Use newly transliterated string
$str = $tmp_str;

    if( (
$newstr = evo_iconv_transliterate( $str, $post_locale ) ) !== false )
// iconv allows us to get nice URL titles by transliterating non-ASCII chars.
        // Tblue> htmlentities() does not know anything about ASCII?! ISO-8859-1 will work too, though.
$newstr_charset = 'ISO-8859-1';
// TODO: sam2kb> convert this to 'transliteration_map'
else if( can_convert_charsets('UTF-8', $evo_charset) && can_convert_charsets('UTF-8', 'ISO-8859-1') /* source */ )
// Fallback to the limited old method: Transliterate only a few known chars.
$newstr = convert_charset( $str, 'UTF-8', $evo_charset );
$newstr_charset = 'UTF-8';

$search = array( 'Ä', 'ä', 'Ö', 'ö', 'Ü', 'ü', 'ß', 'à', 'ç', 'è', 'é', 'ì', 'ò', 'ô', 'ù' ); // iso-8859-1
$replace = array( 'Ae', 'ae', 'Oe', 'oe', 'Ue', 'ue', 'ss', 'a', 'c', 'e', 'e', 'i', 'o', 'o', 'u' );

$search as $k => $v )
// convert $search to UTF-8
$search[$k] = convert_charset( $v, 'UTF-8', 'ISO-8859-1' );

$newstr = str_replace( $search, $replace, $newstr );
// Replace HTML entities only.
$newstr = $str;
$newstr_charset = $evo_charset;

// Replace HTML entities
$newstr = htmlentities( $newstr, ENT_NOQUOTES, $newstr_charset );

// Handle special entities (e.g., use $separator instead of "a" for "&"):
$newstr = str_replace(
'&amp;', '&laquo;', '&raquo;' ),
$newstr );

// Keep only one char in entities!
$newstr = preg_replace( '/&(.).+?;/', '$1', $newstr );
// Replace non acceptable chars
if( $accept_period )
$newstr = preg_replace( '/[^A-Za-z0-9_\.]+/', $separator, $newstr );
$newstr = preg_replace( '/[^A-Za-z0-9_]+/', $separator, $newstr );
// Remove '-' at start and end:
$newstr = preg_replace( '/^-+/', '', $newstr );
$newstr = preg_replace( '/-+$/', '', $newstr );

//pre_dump( $str, $newstr );

return $newstr;
