--- convertWiki2SQL.php.orig 2002-09-19 07:48:35.000000000 +0200 +++ convertWiki2SQL.php 2002-11-03 18:09:45.000000000 +0100 @@ -66,6 +66,23 @@ if ( $wikiLanguage == "pl" ) { $wikiTalk = "Dyskusja" ; $fieldSeparator = "\xff"; + +function num_to_utf8($num) +{ + if ($num[0][2] == 'x') + $i = hexdec($num[1]); + else + $i = (int)$num[1]; + if ($i > 0 && $i < 0x7f) + return sprintf ("%c", $i); + else if ($i > 0x80 && $i < 0x7ff) + return sprintf ("%c%c", 0xC0 | (($i>>6) & 0x1f), 0x80 | ($i & 0x3f)); + else if ($i > 0x800 && $i < 0xffff) + return sprintf ("%c%c%c", 0xE0 | (($i>>12) & 0xf), 0x80 | (($i>>6) & 0x3f), 0x80 | ($i & 0x3f)); + else + return $num[0]; +} + function RecodeCharsetPl ( $text ) { # Convert iso8859-2 to UTF-8 # In a happy world, we could use iconv for this @@ -85,7 +102,10 @@ "\xEE" => "\xc3\xae", "\xEF" => "\xc4\x8f", "\xF0" => "\xc4\x91", "\xF1" => "\xc5\x84", "\xF2" => "\xc5\x88", "\xF3" => "\xc3\xb3", "\xF4" => "\xc3\xb4", "\xF5" => "\xc5\x91", "\xF6" => "\xc3\xb6", "\xF7" => "\xc3\xb7", "\xF8" => "\xc5\x99", "\xF9" => "\xc5\xaf", "\xFA" => "\xc3\xba", "\xFB" => "\xc5\xb1", "\xFC" => "\xc3\xbc", "\xFD" => "\xc3\xbd", "\xFE" => "\xc5\xa3", "\xFF" => "\xcb\x99" ) ; - return strtr ( $text , $l2u8 ) ; + $text = strtr ($text , $l2u8); + $text = preg_replace_callback ('/&#(\d+);/', num_to_utf8, $text); + $text = preg_replace_callback ('/&#x([0-9a-fA-F]+);/', num_to_utf8, $text); + return $text; } $recodeCharset = recodeCharsetPl ; $firstIsLowercase = firstIsLowercasePl;