User:BenSbot/Code3
From JoCopedia
Here is the code for the third program that I run. If you have any questions ask in the discussion page. Note this is a compilation of both Code1 and Code2.
import wikipedia
import catlib
import pagegenerators
import re
import datetime
site = wikipedia.getSite()
songlist = []
linkslist = {}
rea = re.compile('^:\\s*\\W{3,3}(First\\s*)?Encores?\\W{3,3}.*' , re.I | re.S | re.M)
reb = re.compile('^:\\s*\\W{3,3}Encores?\\W{3,3}.*' , re.I | re.S | re.M)
rec = re.compile('^:\\s*\\W{3,3}First\\s*Encores?\\W{3,3}.*' , re.I | re.S | re.M)
red = re.compile('^:\\s*\\W{3,3}Second\\s*Encores?\\W{3,3}.*' , re.I | re.S | re.M)
ree = re.compile('^[^#\\n].*$' , re.M)
ref = re.compile('\\[\\[[^\\]]*\\]\\]')
ref2 = re.compile('[ \t]{0,2}\\|.*')
reg = re.compile('^\\*\\s*Location:.*$' , re.M)
reg2 = re.compile('\\*\\s*Location:\\s*')
reh = re.compile('^\\*\\s*Venue:.*$' , re.M)
reh2 = re.compile('\\*\\s*Venue:\\s*')
rei = re.compile('(19|20)\\d\\d[-](0[1-9]|1[012])[-](0[1-9]|[12][0-9]|3[01])')
showscat = catlib.Category(site,'Category:Shows')
showslist = list(pagegenerators.CategorizedPageGenerator(showscat))
for show in showslist:
page = show.get()
title = show.title()
Setlist = ree.sub("",rea.sub("",page))
Encore = ""
a = reb.search(page)
if a != None:
Encore = ree.sub("",red.sub("",a.group()))
FirstEncore = ""
b = rec.search(page)
if b != None:
FirstEncore = ree.sub("",red.sub("",b.group()))
SecondEncore = ""
c = red.search(page)
if c != None:
SecondEncore = ree.sub("",c.group())
City = "Unknown"
d = reg.search(page)
if d != None:
City = reg2.sub("",d.group())
Venue = "Unknown"
e = reh.search(page)
if e != None:
Venue = reh2.sub("",e.group())
Date = ""
g = rei.search(show.aslink())
if g != None:
Date = g.group()
setlinks = ref.findall(Setlist)
for x in setlinks:
x = ref2.sub("]]",x)
Entry = [x,Date,City,Venue," ",title]
songlist.append(Entry)
encorelinks = ref.findall(Encore)
for x in encorelinks:
x = ref2.sub("]]",x)
Entry = [x,Date,City,Venue,"Yes",title]
songlist.append(Entry)
firstencorelinks = ref.findall(FirstEncore)
for x in firstencorelinks:
x = ref2.sub("]]",x)
Entry = [x,Date,City,Venue,"First",title]
songlist.append(Entry)
secondencorelinks = ref.findall(SecondEncore)
for x in secondencorelinks:
x = ref2.sub("]]",x)
Entry = [x,Date,City,Venue,"Second",title]
songlist.append(Entry)
entiresetlist = ree.sub("",page)
links = ref.findall(entiresetlist)
for x in links:
x = ref2.sub("]]",x)
lx = x.lower()
if linkslist.has_key(lx):
v = linkslist[lx][1] + 1
linkslist[lx] = (linkslist[lx][0], v)
else:
linkslist[lx] = (x,1)
bls = wikipedia.Page(site, u"User:BenSbot/Code1/Blacklist").get()
blacklist = ref.findall(bls)
for link in blacklist:
if link.lower() in linkslist:
del linkslist[link.lower()]
output1 = "The following is a list of the songs [[Jonathan Coulton]] has played\
in concert. This list has been compiled from the setlists currently available\
here on JoCopedia in the [[:Category:Shows|Shows]] section, by an awesome bot \
designed by user [[User:BenS|BenS]]. Keep in mind that not all setlists are \
currently available to JoCopedia, and not all setlists are 100%. But this is a\
pretty good indicator. This list is current as of " + \
str(datetime.date.today()) + "\n\n"
items = linkslist.values()
items.sort(lambda x,y: cmp(y[1], x[1]) or cmp(x[0], y[0]))
for l, c in items:
output1 = output1 + ("*" + l + ": " + repr(c) + "\n")
output1 = output1 + "\n" + "[[Category:Show Statistics]]"
SongStats = wikipedia.Page(site, u"SongStats")
SongStats.put(output1, u"Song statistics")
songscat = catlib.Category(site,'Category:Songs')
songslist = list(pagegenerators.CategorizedPageGenerator(songscat))
for a in songslist:
tablelist = []
count = 0
for b in songlist:
if a.aslink().lower() == b[0].lower():
tablelist.append(b)
count = count + 1
if count != 0:
tablelist.sort(lambda x,y: cmp(x[1], y[1]))
output2 = "{{SongNav\n|cat=no\n}}\n\n\"\'\'\'" + str(a.title()) + "\'\'\'\" was played at the following concerts: \n\n" + "{|class=\"wikitable sortable\" background = \"white\" border = \"1px solid rgb(153, 153, 153)\" cellpadding = \"2%\" rules = \"all\"\n!\'\'\'Date\'\'\'!!\'\'\'Location\'\'\'!!\'\'\'Venue\'\'\'!!\'\'\'Encore?\'\'\'\n"
for b in tablelist:
output2 = output2 + "|-\n"
output2 = output2 + "| <span style=\"display:none\">&</span>[[" + str(b[5]) + "|" + str(b[1]) + "]]"
output2 = output2 + "\n| " + str(b[2])
output2 = output2 + "\n| " + str(b[3])
output2 = output2 + "\n| " + str(b[4])
output2 = output2 + "\n"
output2 = output2 + "|}\n\'\'NB: This page was created by a bot and was last updated on: " + str(datetime.date.today()) + "\n\n[[Category:Songs by Concert]]"
page = wikipedia.Page(site, (str(a.title()) + "/Concerts"))
if page.exists() == False:
page.put(output2, u"Songs by Concert")
elif output2 != page.get():
page.put(output2, u"Songs by Concert")
print "fin"
[edit] Explanation
For explanation see explanations of Code1 and Code2. Please note some variables have been renamed due to clashes in name between the codes.