First, I know there are other Python wiki API classes out there. I'm writing this one because I don't need all the bells and whistles, no edits, no talks, etc. I just need to be able to search for titles and get the wiki markup. Second, I'm new to Python.
Any advice or suggestions or comments or a review or anything really.
# -*- coding: utf-8 -*-
import urllib2
import re
import time
import sys
from urllib import quote_plus, _is_unicode
try:
import json
except:
import simplejson as json
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
class Wiki:
def __init__(self, api="http://en.wikipedia.org/w/api.php"):
self.api = api
return
"""A HTTP Request"""
def downloadFile(self, URL=None):
"""
URL - The URL to fetch
"""
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
responce = opener.open(URL)
data = responce.read()
responce.close()
return data.decode(encoding='UTF-8',errors='strict')
"""Search the wiki for titles"""
def search(self, searchString):
results = []
if (searchString != u""):
encoded_searchString = searchString
if isinstance(encoded_searchString, unicode):
encoded_searchString = searchString.encode('utf-8')
url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
rawData = self.downloadFile(url)
object = json.loads(rawData)
if object:
if 'query' in object:
for item in object['query']['search']:
wikiTitle = item['title']
if isinstance(wikiTitle, str):
wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
results.append(wikiTitle)
return results
"""Search for the top wiki title"""
def searchTop(self, searchString):
results = self.search(searchString)
if len(results) > 0:
return results[0]
else:
return u""
"""Get the raw markup for a title"""
def getPage(self, title):
# Do the best we can to get a valid wiki title
wikiTitle = self.searchTop(title)
if (wikiTitle != u""):
encoded_title = wikiTitle
if isinstance(encoded_title, unicode):
encoded_title = title.encode('utf-8')
url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
rawData = self.downloadFile(url)
object = json.loads(rawData)
for k, v in object['query']['pages'].items():
if 'revisions' in v:
return v['revisions'][0]['*']
return u""
** Updated with Lattyware's suggestion.
Updated Code
Updated with Winston Ewert♦'s suggestions. I've left a way to silently fail defaulted to off. I'm leaving this as if passed a list of titles to search for and process I'd like to just skip over the errors and keep moving along to the rest of the list.
# -*- coding: utf-8 -*-
import urllib2
import re
import time
import sys
from urllib import quote_plus, _is_unicode
try:
import json
except:
import simplejson as json
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
class Wiki:
def __init__(self, api="http://en.wikipedia.org/w/api.php"):
self.api = api
"""A HTTP Request"""
def __download_file(self, url, utf_8=True):
"""
url - The URL to fetch
utf_8 - Should the results be converted to unicode UTF-8
returns - The downloaded data
"""
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (compatible; wiki parser thing')]
response = opener.open(url)
data = response.read()
response.close()
if utf_8:
return data.decode(encoding='UTF-8',errors='strict')
else:
return data
"""Search the wiki for titles"""
def search(self, search_string):
"""
search_string - The search string to search the wiki for
returns - Array of page titles are returned, empty array if none
"""
results = []
if search_string:
if isinstance(search_string, unicode):
search_string = search_string.encode('utf-8')
url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(search_string)
raw_data = self.__download_file(url)
object = json.loads(raw_data)
if 'query' in object:
for item in object['query']['search']:
wiki_title = item['title']
if isinstance(wiki_title, str):
wiki_title = wiki_title.decode(encoding='UTF-8',errors='strict')
results.append(wiki_title)
return results
"""Search for the top wiki title"""
def search_top(self, search_string, silent_fail=False):
"""
search_string - The search string to search the wiki for
silent_fail - If no title is found return empty string
returns - The top ranked title
"""
results = self.search(search_string)
if len(results) > 0:
return results[0]
else:
if silent_fail:
return u""
else:
raise Exception("No Wiki Title Found")
"""Get the raw markup for a title"""
def get_page(self, title, silent_fail=False):
"""
title - Wiki title to get the page for or the top
result from a search for the title
silent_fail - If no page is found return empty string
returns - The wiki markup for a page
"""
# Do the best we can to get a valid wiki title
wiki_title = self.search_top(title, silent_fail)
if wiki_title != u"":
if isinstance(wiki_title, unicode):
wiki_title = title.encode('utf-8')
url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(wiki_title)
raw_data = self.__download_file(url)
object = json.loads(raw_data)
for k, v in object['query']['pages'].items():
if 'revisions' in v:
return v['revisions'][0]['*']
if silent_fail:
return u""
else:
raise Exception("No Wiki Page Found")