My MapReduce tester is clearly ported from Shell, short of args=None
for line in args or read_input()
, what's a better way of importing->testing the function outside of subprocess
?
Or does it not matter, i.e.: my "hack" is fine?
test_mapreduce.py
from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
top_path = ''
map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
['python', path.join(self.top_path, reducer_name), # Reduce
run(['sort', # Shuffle, could be replaced with python `sorted`
run(['python', path.join(self.top_path, mapper_name), # Map
path.join(self.top_path, 'data', datafile_name)])])])
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
mapper.py
#!/usr/bin/env python
from fileinput import input as read_input
def mapper():
for line in read_input():
data = line.strip().split('\t')
if len(data) != 6:
continue
date, time, store, item, cost, payment = data
print "{0}\t{1}".format(store, cost)
if __name__ == '__main__':
mapper()
PS: Should I refactor to use the map
and reduce
inbuilt functions?
reducer.py
? Some samplestudent_test_posts.csv
would be nice too – janos♦ Aug 17 '14 at 8:05