--- convertWiki2SQL.php.orig Mon Jun 17 09:06:37 2002 +++ convertWiki2SQL.php Mon Jun 17 09:38:18 2002 @@ -28,6 +28,25 @@ #$recodeCharset = recodeCharsetStub ; $recodeCharset = recodeCharsetLatin1 ; # Future conversions should all go to UTF-8 +function firstIsLowercaseEn ( $text ) { + $first = ord(substr($text, 0, 1)); + if ($first >= ord("a") && $first <= ord("z")) + return true; + else + return false; +} +function firstIsLowercasePl ( $text ) { + $first = ord(substr($text, 0, 1)); + if ($first > 128) { + return in_array ($first, array (0xB1, 0xE6, 0xEA, 0xB3, 0xF1, 0xB6, 0xFE, 0xBF, 0xBC)); + } else if ($first >= ord("a") && $first <= ord("z")) { + return true; + } else { + return false; + } +} +$firstIsLowercase = firstIsLowercaseEn; + ## Esperanto: if ( $wikiLanguage =="eo" ) { $wikiTalk = "Priparolu" ; @@ -69,6 +88,7 @@ return strtr ( $text , $l2u8 ) ; } $recodeCharset = recodeCharsetPl ; + $firstIsLowercase = firstIsLowercasePl; } ## Spanish @@ -370,6 +392,7 @@ } function getTopics ( $dir ) { + global $firstIsLowercase; $ret = array () ; $mydir = opendir($dir); @@ -379,11 +402,13 @@ $a = getTopics ( "$dir/$entry" ) ; foreach ( $a as $x ) array_push ( $ret , "$entry/$x" ) ; } else { - if (substr ($entry, strlen ( $entry ) - 3, 3) == '.db') { + if ( $firstIsLowercase ( $entry ) ) { + print "Warning: File \"$entry\" starts with lower case letter, ignored
\n" ; + } else if (substr ($entry, strlen ( $entry ) - 3, 3) == '.db') { $x = substr ( $entry , 0 , strlen ( $entry ) - 3 ) ; array_push ( $ret , $x ) ; } else { - print "Warning: File \"$entry\" doesn't seem to contain an article
\n" ; + print "Warning: File \"$entry\" doesn't seem to contain an article, ignored
\n" ; } } }