Tell me more ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

First, I know there are other Python wiki API classes out there. I'm writing this one because I don't need all the bells and whistles, no edits, no talks, etc. I just need to be able to search for titles and get the wiki markup. Second, I'm new to Python.

Any advice or suggestions or comments or a review or anything really.

# -*- coding: utf-8 -*-

import urllib2
import re
import time
import sys
from urllib import quote_plus, _is_unicode

try:
    import json
except:
    import simplejson as json


def enum(*sequential, **named):
    enums = dict(zip(sequential, range(len(sequential))), **named)
    return type('Enum', (), enums)


class Wiki:
    def __init__(self, api="http://en.wikipedia.org/w/api.php"):
        self.api = api
        return


    """A HTTP Request"""
    def downloadFile(self, URL=None):
        """     
        URL - The URL to fetch
        """
        opener = urllib2.build_opener()
        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
        responce = opener.open(URL)
        data = responce.read()
        responce.close()
        return data.decode(encoding='UTF-8',errors='strict')


    """Search the wiki for titles"""
    def search(self, searchString):
        results = []
        if (searchString != u""):
            encoded_searchString = searchString
            if isinstance(encoded_searchString, unicode):
                encoded_searchString = searchString.encode('utf-8')
            url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

            if object:
                if 'query' in object:
                    for item in object['query']['search']:
                        wikiTitle = item['title']
                        if isinstance(wikiTitle, str):
                            wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
                        results.append(wikiTitle)
        return results


    """Search for the top wiki title"""
    def searchTop(self, searchString):
        results = self.search(searchString)
        if len(results) > 0:
            return results[0]
        else:
            return u""


    """Get the raw markup for a title"""
    def getPage(self, title):
        # Do the best we can to get a valid wiki title
        wikiTitle = self.searchTop(title)

        if (wikiTitle != u""):
            encoded_title = wikiTitle
            if isinstance(encoded_title, unicode):
                encoded_title = title.encode('utf-8')
            url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

            for k, v in object['query']['pages'].items():
                if 'revisions' in v:
                    return v['revisions'][0]['*']
        return u""

** Updated with Lattyware's suggestion.


Updated Code

Updated with Winston Ewert♦'s suggestions. I've left a way to silently fail defaulted to off. I'm leaving this as if passed a list of titles to search for and process I'd like to just skip over the errors and keep moving along to the rest of the list.

# -*- coding: utf-8 -*-

import urllib2
import re
import time
import sys
from urllib import quote_plus, _is_unicode

try:
    import json
except:
    import simplejson as json


def enum(*sequential, **named):
    enums = dict(zip(sequential, range(len(sequential))), **named)
    return type('Enum', (), enums)


class Wiki:
    def __init__(self, api="http://en.wikipedia.org/w/api.php"):
        self.api = api


    """A HTTP Request"""
    def __download_file(self, url, utf_8=True):
        """     
        url     - The URL to fetch
        utf_8   - Should the results be converted to unicode UTF-8
        returns - The downloaded data
        """
        opener = urllib2.build_opener()
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (compatible; wiki parser thing')]
        response = opener.open(url)
        data = response.read()
        response.close()
        if utf_8:
            return data.decode(encoding='UTF-8',errors='strict')
        else:
            return data


    """Search the wiki for titles"""
    def search(self, search_string):
        """     
        search_string - The search string to search the wiki for
        returns       - Array of page titles are returned, empty array if none
        """
        results = []
        if search_string:
            if isinstance(search_string, unicode):
                search_string = search_string.encode('utf-8')
            url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(search_string)
            raw_data = self.__download_file(url)
            object = json.loads(raw_data)

            if 'query' in object:
                for item in object['query']['search']:
                    wiki_title = item['title']
                    if isinstance(wiki_title, str):
                        wiki_title = wiki_title.decode(encoding='UTF-8',errors='strict')
                    results.append(wiki_title)
        return results


    """Search for the top wiki title"""
    def search_top(self, search_string, silent_fail=False):
        """     
        search_string - The search string to search the wiki for
        silent_fail   - If no title is found return empty string
        returns       - The top ranked title
        """
        results = self.search(search_string)
        if len(results) > 0:
            return results[0]
        else:
            if silent_fail:
                 return u""
            else:
                 raise Exception("No Wiki Title Found")


    """Get the raw markup for a title"""
    def get_page(self, title, silent_fail=False):
        """     
        title         - Wiki title to get the page for or the top
                        result from a search for the title
        silent_fail   - If no page is found return empty string
        returns       - The wiki markup for a page
        """
        # Do the best we can to get a valid wiki title
        wiki_title = self.search_top(title, silent_fail)

        if wiki_title != u"":
            if isinstance(wiki_title, unicode):
                wiki_title = title.encode('utf-8')
            url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(wiki_title)
            raw_data = self.__download_file(url)
            object = json.loads(raw_data)

            for k, v in object['query']['pages'].items():
                if 'revisions' in v:
                    return v['revisions'][0]['*']

        if silent_fail:
             return u""
        else:
             raise Exception("No Wiki Page Found")
share|improve this question

2 Answers

up vote 2 down vote accepted
class Wiki:
    def __init__(self, api="http://en.wikipedia.org/w/api.php"):
        self.api = api
        return

This return does nothing

    """A HTTP Request"""
    def downloadFile(self, URL=None):

Python convention is to use lowercase_with_underscores for method names

        """     
        URL - The URL to fetch
        """
        opener = urllib2.build_opener()
        opener.addheaders = [('User-agent', 'Mozilla/5.0')]

Why are you pretending to be Mozilla?

        responce = opener.open(URL)

Response is spelled wrong

        data = responce.read()
        responce.close()
        return data.decode(encoding='UTF-8',errors='strict')

This whole function should probably be a free function, not a method.

    """Search the wiki for titles"""
    def search(self, searchString):

Parameters by convention should be named lowercase_with_underscore

        results = []
        if (searchString != u""):

No need for the ( and ). Also you can just do: if searchString:

            encoded_searchString = searchString

Why?

            if isinstance(encoded_searchString, unicode):
                encoded_searchString = searchString.encode('utf-8')
            url = self.api + "?action=query&list=search&format=json&srlimit=10&srsearch=" + urllib2.quote(encoded_searchString)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

I'd combine these two lines

            if object:

In what circumstance will this be false? If that happens you should probably do something besides pretend that nothing happened.

                if 'query' in object:
                    for item in object['query']['search']:
                        wikiTitle = item['title']
                        if isinstance(wikiTitle, str):
                            wikiTitle = wikiTitle.decode(encoding='UTF-8',errors='strict')
                        results.append(wikiTitle)
        return results


    """Search for the top wiki title"""
    def searchTop(self, searchString):
        results = self.search(searchString)
        if len(results) > 0:
            return results[0]
        else:
            return u""

Do you really want an empty string if your result wasn't found? You should probably throw an exception here. Returning an empty string will just make failures hard to trace.

    """Get the raw markup for a title"""
    def getPage(self, title):
        # Do the best we can to get a valid wiki title
        wikiTitle = self.searchTop(title)

        if (wikiTitle != u""):
            encoded_title = wikiTitle
            if isinstance(encoded_title, unicode):
                encoded_title = title.encode('utf-8')
            url = self.api + "?action=query&prop=revisions&format=json&rvprop=content&rvlimit=1&titles=" + urllib2.quote(encoded_title)
            rawData = self.downloadFile(url)
            object = json.loads(rawData)

            for k, v in object['query']['pages'].items():
                if 'revisions' in v:
                    return v['revisions'][0]['*']
        return u""

Don't default to stupid defaults. If you can't get the requested page throw an error with as much detail as possible, don't just throw me an empty string.

share|improve this answer
Thanks for the review. I've updated with the changes I've made. Any other comments or suggestions welcome. – Justin808 Oct 26 '12 at 17:31

An obvious one that jumps out at me is this:

class Wiki:
    def __init__(self, api=None):
        if api == None:
            self.api = "http://en.wikipedia.org/w/api.php"
        else:
            self.api = api
        return

Can be simplified to this:

class Wiki:
    def __init__(self, api="http://en.wikipedia.org/w/api.php"):
        self.api = api
share|improve this answer

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Not the answer you're looking for? Browse other questions tagged or ask your own question.