http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10152
Revision: 10152
Author: amir
Date: 2012-04-25 18:16:32 +0000 (Wed, 25 Apr 2012)
Log Message:
-----------
Adding categories sort for Persian Wikipedia
Modified Paths:
--------------
trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2012-04-25 16:55:53 UTC (rev 10151)
+++ trunk/pywikipedia/cosmetic_changes.py 2012-04-25 18:16:32 UTC (rev 10152)
@@ -295,6 +295,14 @@
# Adding categories
if categories:
+ #Sorting categories in alphabetic order. beta test only on Persian Wikipedia
+ if self.site.language() == 'fa':
+ categories.sort()
+ #Taking main cats to top
+ for name in categories:
+ if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
+ categories.remove(name)
+ categories.insert(0, name)
text = pywikibot.replaceCategoryLinks(text, categories,
site=self.site)
# Put the iw message back
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10149
Revision: 10149
Author: xqt
Date: 2012-04-21 13:16:19 +0000 (Sat, 21 Apr 2012)
Log Message:
-----------
make DiscussionPage a pywikibot.Page subclass
additional messages
TM regex sometimes does not match timezone name, should be omitted by finding the timestamp
TODO: Unfortunately some month strings aren't recognized; must be changed.
Modified Paths:
--------------
trunk/pywikipedia/archivebot.py
Modified: trunk/pywikipedia/archivebot.py
===================================================================
--- trunk/pywikipedia/archivebot.py 2012-04-21 10:26:58 UTC (rev 10148)
+++ trunk/pywikipedia/archivebot.py 2012-04-21 13:16:19 UTC (rev 10149)
@@ -157,7 +157,7 @@
def txt2timestamp(txt, format):
"""Attempts to convert the timestamp 'txt' according to given 'format'.
On success, returns the time tuple; on failure, returns None."""
- #print txt, format
+## print txt, format
try:
return time.strptime(txt,format)
except ValueError:
@@ -234,17 +234,16 @@
TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d) \(.*?\)', line)
if not TM:
# 14:23, 12. Jan. 2009 (UTC)
- pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d) \(UTC\)')
+ pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d) \((?:UTC|CES?T)\)')
TM = pat.search(line)
if TM:
-# pywikibot.output(TM)
TIME = txt2timestamp(TM.group(0),"%d. %b %Y kl. %H:%M (%Z)")
if not TIME:
TIME = txt2timestamp(TM.group(0), "%Y. %B %d., %H:%M (%Z)")
if not TIME:
TIME = txt2timestamp(TM.group(0),"%d. %b %Y kl.%H:%M (%Z)")
if not TIME:
- TIME = txt2timestamp(TM.group(0),"%H:%M, %d %B %Y (%Z)")
+ TIME = txt2timestamp(re.sub(' *\([^ ]+\) *', '', TM.group(0)),"%H:%M, %d %B %Y")
if not TIME:
TIME = txt2timestamp(TM.group(0),"%H:%M, %d %b %Y (%Z)")
if not TIME:
@@ -260,12 +259,12 @@
if not TIME:
TIME = txt2timestamp(TM.group(0),"%d. %Bta %Y kello %H.%M (%Z)")
if not TIME:
- TIME = txt2timestamp(TM.group(0),"%H:%M, %d. %b. %Y (%Z)")
+ TIME = txt2timestamp(re.sub(' *\([^ ]+\) *', '', TM.group(0)), "%H:%M, %d. %b. %Y")
if TIME:
- self.timestamp = max(self.timestamp,time.mktime(TIME))
-# pywikibot.output(u'Time to be parsed: %s' % TM.group(0))
-# pywikibot.output(u'Parsed time: %s' % TIME)
-# pywikibot.output(u'Newest timestamp in thread: %s' % TIME)
+ self.timestamp = max(self.timestamp, time.mktime(TIME))
+## pywikibot.output(u'Time to be parsed: %s' % TM.group(0))
+## pywikibot.output(u'Parsed time: %s' % TIME)
+## pywikibot.output(u'Newest timestamp in thread: %s' % TIME)
def size(self):
return len(self.title) + len(self.content) + 12
@@ -286,15 +285,15 @@
return message('archivebot-older-than') + ' ' + reT.group(1)
return ''
-class DiscussionPage(object):
+class DiscussionPage(pywikibot.Page):
"""A class that represents a single discussion page as well as an archive
page. Feed threads to it and run an update() afterwards."""
- #TODO: Make it a subclass of pywikibot.Page
def __init__(self, title, archiver, vars=None):
- self.title = title
+ pywikibot.Page.__init__(self, Site, title, defaultNamespace=3)
+## self.title = title
self.threads = []
- self.Page = pywikibot.Page(Site,self.title)
+## self.Page = self
self.full = False
self.archiver = archiver
self.vars = vars
@@ -312,7 +311,7 @@
self.threads = []
self.archives = {}
self.archivedThreads = 0
- lines = self.Page.get().split('\n')
+ lines = self.get().split('\n')
state = 0 #Reading header
curThread = None
for line in lines:
@@ -329,6 +328,7 @@
self.header += line + '\n'
if curThread:
self.threads.append(curThread)
+ pywikibot.output(u'%d Threads found.' % len(self.threads))
def feedThread(self, thread, maxArchiveSize=(250*1024,'B')):
self.threads.append(thread)
@@ -353,7 +353,7 @@
newtext += t.toText()
if self.full:
summary += ' ' + message('archivebot-archive-full')
- self.Page.put(newtext, minorEdit=True, comment=summary)
+ self.put(newtext, minorEdit=True, comment=summary)
class PageArchiver(object):
"""A class that encapsulates all archiving methods.
@@ -376,7 +376,7 @@
self.Page = DiscussionPage(Page.title(),self)
self.loadConfig()
self.commentParams = {
- 'from' : self.Page.title,
+ 'from' : self.Page.title(),
}
self.archives = {}
self.archivedThreads = 0
@@ -400,12 +400,12 @@
def key_ok(self):
s = new_hash()
s.update(self.salt+'\n')
- s.update(self.Page.title.encode('utf8')+'\n')
+ s.update(self.Page.title().encode('utf8')+'\n')
return self.get('key') == s.hexdigest()
def loadConfig(self):
hdrlines = self.Page.header.split('\n')
-# pywikibot.output(u'Looking for: %s' % self.tpl)
+ pywikibot.output(u'Looking for: %s in %s' % (self.tpl, self.Page.title()))
mode = 0
for line in hdrlines:
if mode == 0 and re.search('{{'+self.tpl,line):
@@ -419,7 +419,7 @@
continue
if mode == 0 or not self.get('algo',''):
- raise MissingConfigError
+ raise MissingConfigError(u'Missing od malformed template or missing algo')
#Last minute fix:
self.set('archive', self.get('archive').replace('_',' '), True)
@@ -433,7 +433,7 @@
if not archive:
return
if not self.force \
- and not self.Page.title+'/' == archive[:len(self.Page.title)+1] \
+ and not self.Page.title()+'/' == archive[:len(self.Page.title())+1] \
and not self.key_ok():
raise ArchiveSecurityError
if not archive in self.archives:
@@ -447,6 +447,7 @@
self.Page.threads = []
T = time.mktime(time.gmtime())
whys = []
+ pywikibot.output(u'Processing %d threads' % len(oldthreads))
for t in oldthreads:
if len(oldthreads) - self.archivedThreads \
<= int(self.get('minthreadsleft',5)):
@@ -477,12 +478,14 @@
return set(whys)
def run(self):
- if not self.Page.Page.botMayEdit(Site.username):
+ if not self.Page.botMayEdit(Site.username):
return
whys = self.analyzePage()
if self.archivedThreads < int(self.get('minthreadstoarchive',2)):
# We might not want to archive a measly few threads
# (lowers edit frequency)
+ pywikibot.output(u'There are only %d Threads. Skipping'
+ % self.archivedThreads)
return
if whys:
#Save the archives first (so that bugs don't cause a loss of data)
@@ -498,7 +501,7 @@
self.Page.header = rx.sub(self.attr2text(),self.Page.header)
self.commentParams['count'] = self.archivedThreads
self.commentParams['archives'] \
- = ', '.join(['[['+a.title+']]' for a in self.archives.values()])
+ = ', '.join(['[['+a.title()+']]' for a in self.archives.values()])
if not self.commentParams['archives']:
self.commentParams['archives'] = '/dev/null'
self.commentParams['why'] = ', '.join(whys)
@@ -588,8 +591,8 @@
pagelist = sorted(pagelist)
#if not options.namespace == None:
# pagelist = [pg for pg in pagelist if pg.namespace()==options.namespace]
-
- for pg in pagelist:
+ for pg in iter(pagelist):
+ pywikibot.output(u'Processing %s' % pg)
# Catching exceptions, so that errors in one page do not bail out
# the entire process
try:
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10147
Revision: 10147
Author: xqt
Date: 2012-04-21 04:30:25 +0000 (Sat, 21 Apr 2012)
Log Message:
-----------
re-arrange doc; left margin on pagegenerators' indent; update from trunk r10146
Modified Paths:
--------------
branches/rewrite/scripts/category.py
Modified: branches/rewrite/scripts/category.py
===================================================================
--- branches/rewrite/scripts/category.py 2012-04-21 04:29:45 UTC (rev 10146)
+++ branches/rewrite/scripts/category.py 2012-04-21 04:30:25 UTC (rev 10147)
@@ -6,54 +6,55 @@
Syntax: python category.py action [-option]
where action can be one of these:
- * add - mass-add a category to a list of pages
- * remove - remove category tag from all pages in a category
- * move - move all pages in a category to another category
- * tidy - tidy up a category by moving its articles into subcategories
- * tree - show a tree of subcategories of a given category
- * listify - make a list of all of the articles that are in a category
+ * add - mass-add a category to a list of pages
+ * remove - remove category tag from all pages in a category
+ * move - move all pages in a category to another category
+ * tidy - tidy up a category by moving its articles into subcategories
+ * tree - show a tree of subcategories of a given category
+ * listify - make a list of all of the articles that are in a category
and option can be one of these:
Options for "add" action:
- * -person - sort persons by their last name
- * -create - If a page doesn't exist, do not skip it, create it instead
+ * -person - sort persons by their last name
+ * -create - If a page doesn't exist, do not skip it, create it instead
If action is "add", the following options are supported:
¶ms;
Options for "listify" action:
- * -overwrite - This overwrites the current page with the list even if
- something is already there.
- * -showimages - This displays images rather than linking them in the list.
- * -talkpages - This outputs the links to talk pages of the pages to be
- listified in addition to the pages themselves.
+ * -overwrite - This overwrites the current page with the list even if
+ something is already there.
+ * -showimages - This displays images rather than linking them in the list.
+ * -talkpages - This outputs the links to talk pages of the pages to be
+ listified in addition to the pages themselves.
Options for "remove" action:
- * -nodelsum - This specifies not to use the custom edit summary as the
- deletion reason. Instead, it uses the default deletion reason
- for the language, which is "Category was disbanded" in English.
+ * -nodelsum - This specifies not to use the custom edit summary as the
+ deletion reason. Instead, it uses the default deletion reason
+ for the language, which is "Category was disbanded" in
+ English.
Options for several actions:
- * -rebuild - reset the database
- * -from: - The category to move from (for the move option)
- Also, the category to remove from in the remove option
- Also, the category to make a list of in the listify option
- * -to: - The category to move to (for the move option)
- - Also, the name of the list to make in the listify option
+ * -rebuild - reset the database
+ * -from: - The category to move from (for the move option)
+ Also, the category to remove from in the remove option
+ Also, the category to make a list of in the listify option
+ * -to: - The category to move to (for the move option)
+ - Also, the name of the list to make in the listify option
NOTE: If the category names have spaces in them you may need to use
a special syntax in your shell so that the names aren't treated as
separate parameters. For instance, in BASH, use single quotes,
e.g. -from:'Polar bears'
- * -batch - Don't prompt to delete emptied categories (do it
- automatically).
- * -summary: - Pick a custom edit summary for the bot.
- * -inplace - Use this flag to change categories in place rather than
- rearranging them.
- * -recurse - Recurse through all subcategories of categories.
- * -match - Only work on pages whose titles match the given regex (for
- move and remove actions).
+ * -batch - Don't prompt to delete emptied categories (do it
+ automatically).
+ * -summary: - Pick a custom edit summary for the bot.
+ * -inplace - Use this flag to change categories in place rather than
+ rearranging them.
+ * -recurse - Recurse through all subcategories of categories.
+ * -match - Only work on pages whose titles match the given regex (for
+ move and remove actions).
For the actions tidy and tree, the bot will store the category structure
locally in category.dump. This saves time and server load, but if it uses
@@ -81,6 +82,7 @@
# (C) leogregianin, 2004-2008
# (C) Cyde, 2006-2010
# (C) Anreas J Schwab, 2007
+# (C) xqt, 2009-2012
# (C) Pywikipedia team, 2008-2012
#
__version__ = '$Id$'
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10146
Revision: 10146
Author: xqt
Date: 2012-04-21 04:29:45 +0000 (Sat, 21 Apr 2012)
Log Message:
-----------
re-arrange doc; left margin on pagegenerators' indent
Modified Paths:
--------------
trunk/pywikipedia/category.py
Modified: trunk/pywikipedia/category.py
===================================================================
--- trunk/pywikipedia/category.py 2012-04-20 16:40:43 UTC (rev 10145)
+++ trunk/pywikipedia/category.py 2012-04-21 04:29:45 UTC (rev 10146)
@@ -6,58 +6,59 @@
Syntax: python category.py action [-option]
where action can be one of these:
- * add - mass-add a category to a list of pages
- * remove - remove category tag from all pages in a category
- * move - move all pages in a category to another category
- * tidy - tidy up a category by moving its articles into subcategories
- * tree - show a tree of subcategories of a given category
- * listify - make a list of all of the articles that are in a category
+ * add - mass-add a category to a list of pages
+ * remove - remove category tag from all pages in a category
+ * move - move all pages in a category to another category
+ * tidy - tidy up a category by moving its articles into subcategories
+ * tree - show a tree of subcategories of a given category
+ * listify - make a list of all of the articles that are in a category
and option can be one of these:
Options for "add" action:
- * -person - sort persons by their last name
- * -create - If a page doesn't exist, do not skip it, create it instead
+ * -person - sort persons by their last name
+ * -create - If a page doesn't exist, do not skip it, create it instead
If action is "add", the following options are supported:
¶ms;
Options for "listify" action:
- * -overwrite - This overwrites the current page with the list even if
- something is already there.
- * -showimages - This displays images rather than linking them in the list.
- * -talkpages - This outputs the links to talk pages of the pages to be
- listified in addition to the pages themselves.
+ * -overwrite - This overwrites the current page with the list even if
+ something is already there.
+ * -showimages - This displays images rather than linking them in the list.
+ * -talkpages - This outputs the links to talk pages of the pages to be
+ listified in addition to the pages themselves.
Options for "remove" action:
- * -nodelsum - This specifies not to use the custom edit summary as the
- deletion reason. Instead, it uses the default deletion reason
- for the language, which is "Category was disbanded" in English.
+ * -nodelsum - This specifies not to use the custom edit summary as the
+ deletion reason. Instead, it uses the default deletion reason
+ for the language, which is "Category was disbanded" in
+ English.
Options for "move" action:
- * -hist - Creates a nice wikitable on the talk page of target category
- that contains detailed page history of the source category.
+ * -hist - Creates a nice wikitable on the talk page of target category
+ that contains detailed page history of the source category.
Options for several actions:
- * -rebuild - reset the database
- * -from: - The category to move from (for the move option)
- Also, the category to remove from in the remove option
- Also, the category to make a list of in the listify option
- * -to: - The category to move to (for the move option)
- - Also, the name of the list to make in the listify option
+ * -rebuild - reset the database
+ * -from: - The category to move from (for the move option)
+ Also, the category to remove from in the remove option
+ Also, the category to make a list of in the listify option
+ * -to: - The category to move to (for the move option)
+ - Also, the name of the list to make in the listify option
NOTE: If the category names have spaces in them you may need to use
a special syntax in your shell so that the names aren't treated as
separate parameters. For instance, in BASH, use single quotes,
e.g. -from:'Polar bears'
- * -batch - Don't prompt to delete emptied categories (do it
- automatically).
- * -summary: - Pick a custom edit summary for the bot.
- * -inplace - Use this flag to change categories in place rather than
- rearranging them.
- * -recurse - Recurse through all subcategories of categories.
- * -match - Only work on pages whose titles match the given regex (for
- move and remove actions).
+ * -batch - Don't prompt to delete emptied categories (do it
+ automatically).
+ * -summary: - Pick a custom edit summary for the bot.
+ * -inplace - Use this flag to change categories in place rather than
+ rearranging them.
+ * -recurse - Recurse through all subcategories of categories.
+ * -match - Only work on pages whose titles match the given regex (for
+ move and remove actions).
For the actions tidy and tree, the bot will store the category structure
locally in category.dump. This saves time and server load, but if it uses
@@ -85,6 +86,7 @@
# (C) leogregianin, 2004-2008
# (C) Cyde, 2006-2010
# (C) Anreas J Schwab, 2007
+# (C) xqt, 2009-2012
# (C) Pywikipedia team, 2008-2012
#
__version__ = '$Id$'