<?php
/**
* This file implements functions for handling charsets.
*
* This file is part of the evoCore framework - {@link http://evocore.net/}
* See also {@link https://github.com/b2evolution/b2evolution}.
*
* @license GNU GPL v2 - {@link http://b2evolution.net/about/gnu-gpl-license}
*
* @copyright (c)2003-2020 by Francois Planque - {@link http://fplanque.com/}
* Parts of this file are copyright (c)2004-2006 by Daniel HAHLER - {@link http://daniel.hahler.de/}.
*
* @package evocore
*
* @todo dh> Move this to some other directory?
*/
if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
/**
* Use iconv() to transliterate non-ASCII chars in a string encoded with $evo_charset.
*
* This function will figure out a usable LC_CTYPE setting and revert it to the original value
* after calling iconv().
*
* @author Tilman BLUMENBACH - tblue246
* @todo Tblue> Try more locales.
*
* @param string The string to transliterate.
* @param NULL|string The post locale. NULL to not try switching to it.
* @return string|boolean The transliterated ASCII string on success or false on failure.
*/
function evo_iconv_transliterate( $str, $post_locale = NULL )
{
global $evo_charset, $current_locale, $default_locale;
if( ! function_exists( 'iconv' ) )
{
return false;
}
// iconv() needs a proper LC_CTYPE to work.
// See http://www.php.net/manual/en/function.iconv.php#94481
$orig_lc_ctype = setlocale( LC_CTYPE, 0 );
$lc_evo_charset = strtolower( str_replace( '-', '', $evo_charset ) );
$locales_to_try = array(
str_replace( '-', '_', $current_locale ).'.'.$lc_evo_charset, // Try to use current b2evo locale
str_replace( '-', '_', $default_locale ).'.'.$lc_evo_charset, // Fallback to default b2evo locale
);
if( $post_locale !== NULL )
{ // Try to switch to the post locale:
array_unshift( $locales_to_try, str_replace( '-', '_', $post_locale ).'.'.$lc_evo_charset );
}
if( setlocale( LC_CTYPE, $locales_to_try ) === false )
{ // The last thing we try is to use the system locale with our charset.
if( ( $pos = strrpos( $orig_lc_ctype, '.' ) ) !== false )
{ // Remove existing charset string:
$syslocale = substr( $orig_lc_ctype, 0, $pos );
}
else
{
$syslocale = $orig_lc_ctype;
}
if( setlocale( LC_CTYPE, $syslocale.'.'.$lc_evo_charset ) === false )
{ // We could not set a usable locale, giving up...
return false;
}
}
//pre_dump( setlocale( LC_CTYPE, 0 ) );
// Transliterate the string:
$newstr = iconv( $evo_charset, 'ASCII//TRANSLIT', $str );
// Restore the original locale:
setlocale( LC_CTYPE, $orig_lc_ctype );
return $newstr;
}
/**
* Convert special chars (like german umlauts) to ASCII characters.
*
* @param string Input string to operate on
* @param NULL|string The post locale or NULL if there is no specific locale.
* Gets passed to evo_iconv_transliterate().
* @param boolean Accept period '.' as valid character
* @return string The input string with replaced chars.
*/
function replace_special_chars( $str, $post_locale = NULL, $accept_period = false, $separator = '-' )
{
global $evo_charset, $default_locale, $current_locale, $locales;
// Decode ALL entities to be able to transliterate the associated chars:
// (We should convert even double quote " to " in order to avoid a word "qout" in autogenerated slug):
$str = html_entity_decode( $str, ENT_QUOTES, $evo_charset );
$our_locale = $post_locale;
if( $our_locale === NULL )
{ // post locale is not set, try to guess current locale
if( !empty($default_locale) )
{
$our_locale = $default_locale;
}
if( !empty($current_locale) )
{ // Override with current locale if available
$our_locale = $current_locale;
}
}
if( $our_locale !== NULL && isset($locales[$our_locale]) && !empty($locales[$our_locale]['transliteration_map']) )
{ // Use locale 'transliteration_map' if present
if( ! array_key_exists( '', $locales[$our_locale]['transliteration_map'] ) )
{ // Make sure there's no empty string key, otherwise strtr() returns false
if( $tmp_str = strtr( $str, $locales[$our_locale]['transliteration_map'] ) )
{ // Use newly transliterated string
$str = $tmp_str;
}
}
}
if( ( $newstr = evo_iconv_transliterate( $str, $post_locale ) ) !== false )
{ // iconv allows us to get nice URL titles by transliterating non-ASCII chars.
// Tblue> htmlentities() does not know anything about ASCII?! ISO-8859-1 will work too, though.
$newstr_charset = 'ISO-8859-1';
}
// TODO: sam2kb> convert this to 'transliteration_map'
else if( can_convert_charsets('UTF-8', $evo_charset) && can_convert_charsets('UTF-8', 'ISO-8859-1') /* source */ )
{ // Fallback to the limited old method: Transliterate only a few known chars.
$newstr = convert_charset( $str, 'UTF-8', $evo_charset );
$newstr_charset = 'UTF-8';
$search = array( 'Ä', 'ä', 'Ö', 'ö', 'Ü', 'ü', 'ß', 'à', 'ç', 'è', 'é', 'ì', 'ò', 'ô', 'ù' ); // iso-8859-1
$replace = array( 'Ae', 'ae', 'Oe', 'oe', 'Ue', 'ue', 'ss', 'a', 'c', 'e', 'e', 'i', 'o', 'o', 'u' );
foreach( $search as $k => $v )
{ // convert $search to UTF-8
$search[$k] = convert_charset( $v, 'UTF-8', 'ISO-8859-1' );
}
$newstr = str_replace( $search, $replace, $newstr );
}
else
{
// Replace HTML entities only.
$newstr = $str;
$newstr_charset = $evo_charset;
}
// Replace HTML entities
$newstr = htmlentities( $newstr, ENT_NOQUOTES, $newstr_charset );
// Handle special entities (e.g., use $separator instead of "a" for "&"):
$newstr = str_replace(
array( '&', '«', '»' ),
$separator,
$newstr );
// Keep only one char in entities!
$newstr = preg_replace( '/&(.).+?;/', '$1', $newstr );
// Replace non acceptable chars
if( $accept_period )
{
$newstr = preg_replace( '/[^A-Za-z0-9_\.]+/', $separator, $newstr );
}
else
{
$newstr = preg_replace( '/[^A-Za-z0-9_]+/', $separator, $newstr );
}
// Remove '-' at start and end:
$newstr = preg_replace( '/^-+/', '', $newstr );
$newstr = preg_replace( '/-+$/', '', $newstr );
//pre_dump( $str, $newstr );
return $newstr;
}
?>