Converting latitude and longitude coordinates from CSV using web service

Question

I am working on converting a mailing list that has longitude and latitude coordinates within the CSV file. This script I came up with does what I need, but this is my first real-world use of python. I want to know where I am making any mistakes, not using best-practices, and what can be optimized to make it faster.

The input.csv file has the following header:

"Email Address",MEMBER_RATING,OPTIN_TIME,OPTIN_IP,CONFIRM_TIME,CONFIRM_IP,LATITUDE,LONGITUDE,GMTOFF,DSTOFF,TIMEZONE,CC,REGION,LAST_CHANGED,LEID,EUID

And the script:

import sys
import os
import csv
import signal
import json
import urllib
import urllib2
import sqlite3
import codecs
import cStringIO

csv.field_size_limit(sys.maxsize)


class EmailList:

    WEB_SERVICE_URL = 'http://open.mapquestapi.com/nominatim/v1/reverse.php?format=json'

    def __init__(self, inputFile):
        signal.signal(signal.SIGINT, self.signal_handler)
        self.conn = None
        self.initialize_database(inputFile)
        self.convert_rows()
        self.db_to_csv()

    def signal_handler(self, signal, frame):
        try:
            self.conn.commit()
            self.conn.close()
            print '[DB changes committed and connection closed.]'
        except sqlite3.ProgrammingError as e:
            print '[script stopped]'
            print e.message

        sys.exit(0)

    def initialize_database(self, file):

        print 'checking for data.db...'

        if not os.path.isfile('data.db'):

            print 'data.db does not exist, converting csv to sqlite...'

            with open(file) as inputFile:
                reader = UnicodeReader(inputFile)

                header = reader.next()

                if self.conn is None:
                    self.conn = sqlite3.connect('data.db')

                c = self.conn.cursor()

                c.execute("DROP TABLE IF EXISTS email_list")

                sql = """CREATE TABLE email_list (\n""" + \
                      ",\n".join([("%s varchar" % name) for name in header]) \
                      + ")"

                c.execute(sql)

                for line in reader:
                    if line:
                        try:
                            c.execute('INSERT INTO email_list VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', line)
                        except sqlite3.ProgrammingError as e:
                            print e.message
                            print line

                try:
                    c.execute('ALTER TABLE email_list ADD COLUMN CITY varchar')
                    c.execute('ALTER TABLE email_list ADD COLUMN STATE varchar')
                    c.execute('ALTER TABLE email_list ADD COLUMN COUNTRY varchar')
                    c.execute('ALTER TABLE email_list ADD COLUMN POSTAL_CODE varchar')
                except sqlite3.OperationalError as e:
                    print 'error creating new columns: '
                    print e.message

                self.conn.commit()
                self.conn.close()

            print 'converted csv to sqlite, stored in data.db'

    def convert_rows(self):

        print 'converting coordinates...'

        self.conn = sqlite3.connect('data.db')
        c = self.conn.cursor()

        results = c.execute('SELECT LATITUDE AS lat, LONGITUDE AS lon, Email as email FROM email_list WHERE POSTAL_CODE IS NULL AND CITY IS NULL AND COUNTRY IS NULL AND STATE IS NULL')

        rows = []

        for row in results:
            lat, lon, email = row
            data = {'lat': lat, 'lon': lon, 'email': email}
            rows.append(data)

        self.conn.commit()
        self.conn.close()

        for item in rows:
            try:
                converted = self.convert_coordinates(item['lat'], item['lon'])
                print str(rows.index(item) + 1) + '/' + str(len(rows))
            except TypeError:
                converted['city': '']
                converted['state': '']
                converted['country': '']
                converted['postal_code': '']

            self.conn = sqlite3.connect('data.db')
            c = self.conn.cursor()

            try:
                c.execute('UPDATE email_list SET CITY=?, STATE=?, COUNTRY=?, POSTAL_CODE=? WHERE Email=?', [converted['city'], converted['state'], converted['country'], converted['postal_code'], item['email']])
            except KeyboardInterrupt:
                print 'user quit'

            self.conn.commit()
            self.conn.close()

        print 'converted coordinates.'

    def convert_coordinates(self, lat, lon):
        if lat and long:
            try:
                values = {'lat': lat, 'lon': lon}
                data = urllib.urlencode(values)
                request = urllib2.Request(self.WEB_SERVICE_URL, data)
                response = urllib2.urlopen(request)
            except urllib2.HTTPError as e:
                print 'error loading web service'
                print e.message

            json_result = json.load(response)

            try:
                city = json_result['address']['city']
            except KeyError:
                city = ''

            try:
                state = json_result['address']['state']
            except KeyError:
                state = ''

            try:
                cc = json_result['address']['country_code']
            except KeyError:
                cc = ''

            try:
                postal_code = json_result['address']['postcode']
            except KeyError:
                postal_code = ''
        else:
            city = ''
            state = ''
            cc = ''
            postal_code = ''

        return {'city': city, 'state': state, 'country': cc, 'postal_code': postal_code}

    def db_to_csv(self):

        print 'beginning write to csv...'

        self.conn = sqlite3.connect('data.db')
        c = self.conn.cursor()
        c.execute('SELECT * FROM email_list')

        with open('output.csv', 'wb') as outputFile:
            writer = UnicodeWriter(outputFile)
            writer.writerow([i[0] for i in c.description])
            writer.writerows(c)

        print 'write finished.'

        self.conn.commit()
        self.conn.close()

        print 'done.'


# The CSV module has issues with reading/writing unicode,
# the following classes were taken from docs.python.org to
# help with that: http://docs.python.org/2/library/csv.html


class UTF8Recoder:
    """
    Iterator that reads an encoded stream and reencodes the input to UTF-8
    """
    def __init__(self, f, encoding):
        self.reader = codecs.getreader(encoding)(f)

    def __iter__(self):
        return self

    def next(self):
        return self.reader.next().encode("utf-8")


class UnicodeReader:
    """
    A CSV reader which will iterate over lines in the CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        f = UTF8Recoder(f, encoding)
        self.reader = csv.reader(f, dialect=dialect, **kwds)

    def next(self):
        row = self.reader.next()
        return [unicode(s, "utf-8") for s in row]

    def __iter__(self):
        return self


class UnicodeWriter:
    """
    A CSV writer which will write rows to CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
        # Redirect output to a queue
        self.queue = cStringIO.StringIO()
        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)()

    def writerow(self, row):
        self.writer.writerow([s.encode("utf-8") for s in row])
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)


EmailList('input.csv')

I like how you did not try to implement CSV parsing yourself. — ChrisWue, Oct 8 '13 at 2:52

Zoran Pavlovic · Accepted Answer · 2014-04-10 05:06:01Z

up vote 2 down vote accepted

Two things that pop out for me immediately:

You named a parameter "file", which is a built-in function in python.
In one of your except blocks, you incorrectly use a dictionary.

See below:

converted['city': '']
converted['state': '']
converted['country': '']
converted['postal_code': '']

Note, what that will do is attempt to use the slice operator on the dictionary. I just tried it now and it fails with a TypeError exception. What you want the code to look like is this:

converted['city'] = ''
converted['state'] = ''
converted['country'] = ''
converted['postal_code'] = ''

Another thing I've noticed. Dealing with dictionaries, you're not quite sure how to handle getting "optional" parameters from them. Have a look at the refactored code I wrote for your json dict handling:

city = json_result['address'].get('city', '')
state = json_result['address'].get('state', '')
cc = json_result['address'].get('country_code', '')
postal_code = json_result['address'].get('postcode', '')

The get method on dictionary objects takes an optional second parameter that specifies a default value to return if the key is not found.

edited Apr 10 at 5:06

answered Nov 19 '13 at 20:50

Zoran Pavlovic
25818

I don’t think file is a keyword in Python. It’s still a bad variable name, but not as bad as, say, list. – alexwlchan Apr 9 at 10:37

@alexwlchan Have a look here for the built-ins: docs.python.org/2/library/functions.html#file – Zoran Pavlovic Apr 9 at 14:11

Ah, sorry, I was taking “reserved keyword” a bit too literally, and thought you meant on of the keyword keywords. – alexwlchan Apr 9 at 14:12

@alexwlchan Np, and thanks... I've edited the answer to clarify a little better. – Zoran Pavlovic Apr 9 at 14:30

add a comment |

asked	10 months ago
viewed	319 times
active	4 months ago

current community

your communities

more stack exchange communities

Converting latitude and longitude coordinates from CSV using web service

1 Answer 1

Your Answer

Not the answer you're looking for? Browse other questions tagged python csv sqlite geospatial or ask your own question.

Hot Network Questions

current community

your communities

more stack exchange communities

Converting latitude and longitude coordinates from CSV using web service

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Not the answer you're looking for? Browse other questions tagged python csv sqlite geospatial or ask your own question.

Related

Hot Network Questions