Revision: 6350
Author: valhallasw
Date: 2009-02-13 21:45:07 +0000 (Fri, 13 Feb 2009)
Log Message:
-----------
Verbose debug information. There should be no private information leaks, but a review would be appreciated.
Modified Paths:
--------------
trunk/pywikipedia/login.py
Modified: trunk/pywikipedia/login.py
===================================================================
--- trunk/pywikipedia/login.py 2009-02-13 21:19:31 UTC (rev 6349)
+++ trunk/pywikipedia/login.py 2009-02-13 21:45:07 UTC (rev 6350)
@@ -27,6 +27,11 @@
-force Ignores if the user is already logged in, and tries to log in.
+ -v -v Shows http requests made when logging in. This might leak
+ (doubly private data (password, session id), so make sure to check the
+ verbose) output. Using -log is recommended: this will output a lot of
+ data
+
If not given as parameter, the script will ask for your username and password
(password entry will be hidden), log in to your home wiki using this
combination, and store the resulting cookies (containing your password hash,
@@ -65,7 +70,7 @@
class LoginManager:
- def __init__(self, password = None, sysop = False, site = None, username=None):
+ def __init__(self, password = None, sysop = False, site = None, username=None, verbose=False):
self.site = site or wikipedia.getSite()
if username:
self.username=username
@@ -85,6 +90,7 @@
except:
raise wikipedia.NoUsername(u'ERROR: Username for %s:%s is undefined.\nIf you have an account for that site, please add such a line to user-config.py:\n\nusernames[\'%s\'][\'%s\'] = \'myUsername\'' % (self.site.family.name, self.site.lang, self.site.family.name, self.site.lang))
self.password = password
+ self.verbose = verbose
if getattr(config, 'password_file', ''):
self.readPassword()
@@ -135,19 +141,36 @@
predata["wpCaptchaWord"] = captcha['answer']
login_address = self.site.login_address()
address = login_address + '&action=submit'
-
- if self.site.hostname() in config.authenticate.keys():
+
+ if self.site.hostname() in config.authenticate.keys():
headers = {
"Content-type": "application/x-www-form-urlencoded",
"User-agent": wikipedia.useragent
}
data = self.site.urlEncode(predata)
+ if self.verbose:
+ fakepredata = predata
+ fakepredata['wpPassword'] = u'XXXX'
+ wikipedia.output(u"urllib2.urlopen(urllib2.Request('%s', %s, %s)):" % (self.site.protocol() + '://' + self.site.hostname() + address, self.site.urlEncode(fakepredata), headers))
response = urllib2.urlopen(urllib2.Request(self.site.protocol() + '://' + self.site.hostname() + address, data, headers))
data = response.read()
+ if self.verbose:
+ fakedata = re.sub(r"(session|Token)=..........", r"session=XXXXXXXXXX", data)
+ trans = config.transliterate
+ config.transliterate = False #transliteration breaks for some reason
+ wikipedia.output(data.decode(self.site.encoding()))
+ config.transliterate = trans
wikipedia.cj.save(wikipedia.COOKIEFILE)
return "Ok"
else:
response, data = self.site.postData(address, self.site.urlEncode(predata))
+ if self.verbose:
+ fakepredata = predata
+ fakepredata['wpPassword'] = u'XXXXX'
+ wikipedia.output(u"self.site.postData(%s, %s)" % (address, self.site.urlEncode(fakepredata)))
+ fakeresponsemsg = re.sub(r"(session|Token)=..........", r"session=XXXXXXXXXX", response.msg.__str__())
+ wikipedia.output(u"%s/%s\n%s" % (response.status, response.reason, fakeresponsemsg))
+ wikipedia.output(u"%s" % data)
Reat=re.compile(': (.*?);')
L = []
@@ -261,6 +284,10 @@
else:
wikipedia.showHelp('login')
return
+
+ if wikipedia.verbose > 1:
+ wikipedia.output(u"WARNING: Using -v -v on login.py might leak private data. When sharing, please double check your password is not readable and log out your bots session.")
+ verbose = True # only use this verbose when running from login.py
if logall:
if sysop:
namedict = config.sysopnames
@@ -273,13 +300,13 @@
if not forceLogin and site.loggedInAs(sysop = sysop) != None:
wikipedia.output(u'Already logged in on %s' % site)
else:
- loginMan = LoginManager(password, sysop = sysop, site = site)
+ loginMan = LoginManager(password, sysop = sysop, site = site, verbose=verbose)
loginMan.login()
except wikipedia.NoSuchSite:
wikipedia.output(lang+ u'.' + familyName + u' is not a valid site, please remove it from your config')
else:
- loginMan = LoginManager(password, sysop = sysop)
+ loginMan = LoginManager(password, sysop = sysop, verbose=verbose)
loginMan.login()
if __name__ == "__main__":
Bugs item #2597015, was opened at 2009-02-13 20:43
Message generated for change (Comment added) made by valhallasw
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2597015&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
>Status: Closed
Resolution: None
Priority: 7
Private: No
Submitted By: Razorflame (razorflame)
Assigned to: Nobody/Anonymous (nobody)
Summary: Interwiki.py gives error
Initial Comment:
Whenever I try to run interwiki.py, it gives me the error message:
TypeError: object of type 'None Type' has no len().
I use Python 2.5.4, and I have downloaded the most recent Pywikipediabot nightly update, and it still does not work. I use Windows Vista Home Premium and I am wondering why it does not want to run for me.
Thanks,
Razorflame
----------------------------------------------------------------------
>Comment By: Merlijn S. van Deen (valhallasw)
Date: 2009-02-13 20:53
Message:
Has been fixed in SVN already. Next time, please note you are using the
nightly version and copy the /entire/ stacktrace: this error does not give
any information.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2597015&group_…
Bugs item #2597015, was opened at 2009-02-13 13:43
Message generated for change (Settings changed) made by razorflame
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2597015&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
Status: Open
Resolution: None
>Priority: 7
Private: No
Submitted By: Razorflame (razorflame)
Assigned to: Nobody/Anonymous (nobody)
Summary: Interwiki.py gives error
Initial Comment:
Whenever I try to run interwiki.py, it gives me the error message:
TypeError: object of type 'None Type' has no len().
I use Python 2.5.4, and I have downloaded the most recent Pywikipediabot nightly update, and it still does not work. I use Windows Vista Home Premium and I am wondering why it does not want to run for me.
Thanks,
Razorflame
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2597015&group_…
Bugs item #2597015, was opened at 2009-02-13 13:43
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2597015&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Razorflame (razorflame)
Assigned to: Nobody/Anonymous (nobody)
Summary: Interwiki.py gives error
Initial Comment:
Whenever I try to run interwiki.py, it gives me the error message:
TypeError: object of type 'None Type' has no len().
I use Python 2.5.4, and I have downloaded the most recent Pywikipediabot nightly update, and it still does not work. I use Windows Vista Home Premium and I am wondering why it does not want to run for me.
Thanks,
Razorflame
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2597015&group_…
Revision: 6347
Author: purodha
Date: 2009-02-13 15:29:12 +0000 (Fri, 13 Feb 2009)
Log Message:
-----------
Add -randomredirect page generator.
Modified Paths:
--------------
trunk/pywikipedia/family.py
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2009-02-13 12:58:56 UTC (rev 6346)
+++ trunk/pywikipedia/family.py 2009-02-13 15:29:12 UTC (rev 6347)
@@ -3376,6 +3376,9 @@
def random_address(self, code):
return "%s?useskin=monobook&title=%s:Random" % (self.path(code), self.special_namespace_url(code))
+ def randomredirect_address(self, code):
+ return "%s?useskin=monobook&title=%s:RandomRedirect" % (self.path(code), self.special_namespace_url(code))
+
def allmessages_address(self, code):
return "%s?useskin=monobook&title=%s:Allmessages&ot=html" % (self.path(code), self.special_namespace_url(code))
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-02-13 12:58:56 UTC (rev 6346)
+++ trunk/pywikipedia/pagegenerators.py 2009-02-13 15:29:12 UTC (rev 6347)
@@ -126,6 +126,11 @@
-random Work on random pages returned by [[Special:Random]].
Can also be given as "-random:n" where n is the number
of pages to be returned, else 100 pages are returned.
+
+-randomredirect Work on random redirect target pages returned by
+ [[Special:Randomredirect]]. Can also be given as
+ "-randomredirect:n" where n is the number of pages to be
+ returned, else 100 pages are returned.
"""
@@ -391,6 +396,12 @@
for page in site.randompages(number=number, repeat=repeat):
yield page
+def RandomRedirectPageGenerator(number = 100, repeat = False, site = None):
+ if site is None:
+ site = wikipedia.getSite()
+ for page in site.randomredirectpages(number=number, repeat=repeat):
+ yield page
+
def TextfilePageGenerator(filename=None, site=None):
'''
Read a file of page links between double-square-brackets, and return
@@ -919,6 +930,11 @@
title = wikipedia.input(u'Which page should be processed?')
page = wikipedia.Page(site, title)
gen = InterwikiPageGenerator(page)
+ elif arg.startswith('-randomredirect'):
+ if len(arg) == 7:
+ gen = RandomRedirectPageGenerator()
+ else:
+ gen = RandomRedirectPageGenerator(number = int(arg[8:]))
elif arg.startswith('-random'):
if len(arg) == 7:
gen = RandomPageGenerator()
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-02-13 12:58:56 UTC (rev 6346)
+++ trunk/pywikipedia/wikipedia.py 2009-02-13 15:29:12 UTC (rev 6347)
@@ -4225,6 +4225,7 @@
unusedcategories(): Special:Unusuedcategories (yields Category)
unusedfiles(): Special:Unusedimages (yields ImagePage)
randompages: Special:Random
+ randomredirectpages: Special:Random
withoutinterwiki: Special:Withoutinterwiki
linksearch: Special:Linksearch
@@ -4292,6 +4293,7 @@
double_redirects_address: Special:Doubleredirects.
broken_redirects_address: Special:Brokenredirects.
random_address: Special:Random.
+ randomredirect_address: Special:Random.
login_address: Special:Userlogin.
captcha_image_address(id): Special:Captcha for image 'id'.
watchlist_address: Special:Watchlist editor.
@@ -5376,10 +5378,13 @@
if not repeat:
break
- def randompages(self, number=1, repeat=False):
- """Yield irandom pages via Special:Random."""
+ def randompages(self, number=1, repeat=False, randmoredirect=False):
+ """Yield irandom pages via Special:Random, or Special:RandmRedirect."""
seen = set()
- path = self.random_address()
+ if randomredirect:
+ path = self.randomredirect_address()
+ else:
+ path = self.random_address()
entryR = re.compile('var wgPageName = "(?P<title>.+?)";')
while True:
for ignored in range(number):
@@ -5394,6 +5399,28 @@
if title not in seen:
seen.add(title)
page = Page(self, title)
+
+ def randomredirectpages(self, number=1, repeat=False, randmoredirect=True):
+ """Yield irandom pages via Special:Random, or Special:RandmRedirect."""
+ seen = set()
+ if randomredirect:
+ path = self.randomredirect_address()
+ else:
+ path = self.random_address()
+ entryR = re.compile('var wgPageName = "(?P<title>.+?)";')
+ while True:
+ for ignored in range(number):
+ # MediaWiki advances its random pages only every second.
+ time.sleep(1)
+ html = self.getUrl(path)
+ # output(u' html=%s' % (html))
+ m = entryR.search(html)
+ if m != None:
+ title = m.group('title')
+ # output(u' title=%s' % ( title ))
+ if title not in seen:
+ seen.add(title)
+ page = Page(self, title)
yield page
if not repeat:
break
@@ -5965,6 +5992,10 @@
"""Return path to Special:Random."""
return self.family.random_address(self.lang)
+ def randomredirect_address(self):
+ """Return path to Special:RandomRedirect."""
+ return self.family.randomredirect_address(self.lang)
+
def login_address(self):
"""Return path to Special:Userlogin."""
return self.family.login_address(self.lang)
Feature Requests item #2595920, was opened at 2009-02-13 14:26
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603141&aid=2595920&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Priority: 4
Private: No
Submitted By: Multichill (multichill)
Assigned to: Multichill (multichill)
Summary: Change imagerecat.py to use lists in family file
Initial Comment:
Imagerecat.py has to be changed to use the redirect template list and the disambigutation template list in the Commons family file:
self.category_redirect_templates = {
'commons': (u'Category redirect',
u'Categoryredirect',
u'See cat',
u'Seecat',
u'Catredirect',
u'Cat redirect',
u'CatRed',
u'Cat-red',
u'Catredir',
u'Redirect category'),
}
self.disambiguationTemplates = {
'commons': [u'Disambig', u'Disambiguation', u'Razločitev',
u'Begriffsklärung']
}
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603141&aid=2595920&group_…
Revision: 6344
Author: purodha
Date: 2009-02-12 16:23:44 +0000 (Thu, 12 Feb 2009)
Log Message:
-----------
Add -until:pagetitle command line parameter so as to complement -start:pagetitle
in intewiki.py , thereby solving tracker item 1911836, see:
https://sourceforge.net/tracker2/index.php?func=detail&aid=1911836&group_id…
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-02-12 16:20:23 UTC (rev 6343)
+++ trunk/pywikipedia/interwiki.py 2009-02-12 16:23:44 UTC (rev 6344)
@@ -64,6 +64,12 @@
-number: used as -number:#, specifies that the robot should process
that amount of pages and then stop. This is only useful in
combination with -start. The default is not to stop.
+
+ -until: used as -until:title, specifies that the robot should process
+ pages in wiki default sort order up to, and including, "title"
+ and then stop. This is only useful in combination with -start.
+ The default is not to stop.
+ Note: do not specify a namespace, even if -start has one.
-bracket only work on pages that have (in the home language) parenthesis
in their title. All other pages are skipped.
@@ -1378,11 +1384,13 @@
# Keep correct counters
self.plus(site)
- def setPageGenerator(self, pageGenerator, number = None):
+# def setPageGenerator(self, pageGenerator, number = None):
+ def setPageGenerator(self, pageGenerator, number = None, until = None):
"""Add a generator of subjects. Once the list of subjects gets
too small, this generator is called to produce more Pages"""
self.pageGenerator = pageGenerator
self.generateNumber = number
+ self.generateUntil = until
def dump(self):
site = wikipedia.getSite()
@@ -1420,12 +1428,14 @@
continue
break
+ if len(self.generateUntil) > 0:
+ if page.titleWithoutNamespace() > self.generateUntil:
+ raise StopIteration
self.add(page, hints = hints)
self.generated += 1
if self.generateNumber:
- if self.generated == self.generateNumber:
- self.pageGenerator = None
- break
+ if self.generated >= self.generateNumber:
+ raise StopIteration
except StopIteration:
self.pageGenerator = None
break
@@ -1620,6 +1630,7 @@
# default to [] which means all namespaces will be processed
namespaces = []
number = None
+ until = None
warnfile = None
# a normal PageGenerator (which doesn't give hints, only Pages)
hintlessPageGen = None
@@ -1740,6 +1751,8 @@
# deprecated for consistency with other scripts
elif arg.startswith('-number:'):
number = int(arg[8:])
+ elif arg.startswith('-until:'):
+ until = arg[7:]
elif arg.startswith('-neverlink:'):
globalvar.neverlink += arg[11:].split(",")
elif arg.startswith('-ignore:'):
@@ -1826,7 +1839,7 @@
if len(namespaces) > 0:
hintlessPageGen = pagegenerators.NamespaceFilterPageGenerator(hintlessPageGen, namespaces)
# we'll use iter() to create make a next() function available.
- bot.setPageGenerator(iter(hintlessPageGen), number = number)
+ bot.setPageGenerator(iter(hintlessPageGen), number = number, until=until)
elif warnfile:
# TODO: filter namespaces if -namespace parameter was used
readWarnfile(warnfile, bot)