As a follow-up to my previous question about prefix free code, I learned about the module unittest and wrote the following set of functions, to be used in order to semi-automatically check the optimality of the output of any new algorithm to compute prefix free code.
The code works but I would like it to be as elegant (and compact) as possible in order to potentially include it in a research article, in a more formal and reproducible maneer than the traditional "algorithm". I am proud of how it looks, but I do expect you to still criticize it!!!
import unittest, doctest, math
def codeIsPrefixFreeCodeMinimal(L,W):
"""Checks if the prefix free code described by an array $L$ of
pairs $(codeLenght_i,nbWeights_i)$ is minimal for weights $W$, by
1) checking if the code respects Kraft's inequality and
2) comparing the lenght of a code encoded with $L$ with the
entropy of $W$.
"""
assert respectsKraftInequality(L)
return compressedTextLenght(L,W) <= NTimesEntropy(W)+len(W)
def respectsKraftInequality(L):
"""Checks if the given array $L$ of pairs $(codeLenght_i,nbWeights_i)$
corresponds to a prefix free code by checking Kraft's inequality, i.e.
$\sum_i nbWeights_i 2^{-codelenght_i} \leq 1$.
"""
return KraftSum(L) <= 1 ;
def KraftSum(L):
"""Computes the Kraft sum of the prefix free code described by an
array $L$ of pairs $(codeLenght_i,nbWeights_i)$ i.e.
$\sum_i nbWeights_i 2^{-codelenght_i}$.
"""
if len(L)==0:
return 0
terms = map( lambda x: x[1] * math.pow(2,-x[0]), L)
return sum(terms)
class TestKraftSum(unittest.TestCase):
def test_empty(self):
"""Empty input."""
self.assertEqual(KraftSum([]),0)
def test_singleton(self):
"""Singleton with one single symbol."""
self.assertEqual(KraftSum([(0,1)]),1)
def test_simpleCode(self):
"""Simple Code with code lenghts [1,2,2]."""
self.assertEqual(KraftSum([(1,1),(2,2)]),1)
def test_fourEqual(self):
"""Four equal weights"""
self.assertEqual(KraftSum([(2,4)]),1)
def test_HuffmanExample(self):
"""Example from Huffman's article"""
self.assertEqual(KraftSum([(5,6),(4,3),(3,3),(2,1)]),1)
def test_MoffatTurpinExample(self):
"""Example from Moffat and Turpin's article"""
self.assertEqual(KraftSum([(5,4),(4,4),(3,3),(2,1)]),1)
def NTimesEntropy(W):
"""Returns N times the entropy, rounded to the next integer, as computed by
$\lceil \sum_{i=1}^N W[i]/\sum(W) \log (sum(W) / W[i]) \rceil$.
"""
if len(W)==0:
return 0
assert min(W)>0
sumWeights = sum(W)
terms = map( lambda x: x * math.log(x,2), W )
return math.ceil(sumWeights * math.log(sumWeights,2) - sum(terms))
class TestNTimesEntropy(unittest.TestCase):
def test_empty(self):
"""Empty input"""
self.assertEqual(NTimesEntropy([]),0)
def test_singleton(self):
"""Singleton"""
self.assertEqual(NTimesEntropy([1]),0)
def test_pair(self):
"""Pair"""
self.assertEqual(NTimesEntropy([1,1]),2)
def test_fourEqual(self):
"""Four equal weights"""
self.assertEqual(NTimesEntropy([1,1,1,1]),8)
def test_HuffmanExample(self):
"""Example from Huffman's article"""
self.assertEqual(NTimesEntropy([1,3,4,4,4,4,6,6,10,10,10,18,20]),336)
def test_MoffatTurpinExample(self):
"""Example from Moffat and Turpin's article"""
self.assertEqual(NTimesEntropy([1,1,1,1,1,2,2,2,2,3,3,6]),84)
def compressedTextLength(L,W):
"""Computes the lengths of a text which frequencies are given by
an array $W$, when it is compressed by a prefix free code
described by an array $L$ of pairs $(codeLenght_i,nbWeights_i)$.
"""
compressedTextLength = 0
Ls = sorted(L, reverse=True)
Ws = sorted(W)
for (l,n) in Ls:
compressedTextLength += l*sum(Ws[0:n])
Ws = Ws[n:]
return compressedTextLength
class TestcompressedTextLength(unittest.TestCase):
def test_empty(self):
"""Empty input"""
self.assertEqual(compressedTextLength([],[]),0)
def test_pair(self):
"""Pair of symbols, arbitrary text"""
self.assertEqual(compressedTextLength([(1,2)],[1,1]),2)
def test_fourEqual(self):
"""Four equal weights"""
self.assertEqual(compressedTextLength([(2,4)],[1,1,1,1]),8)
def test_HuffmanExample(self):
"""Example from Huffman's article (compares with value compared by hand)"""
self.assertEqual(compressedTextLength([(5,6),(4,3),(3,3),(2,1)],[1,3,4,4,4,4,6,6,10,10,10,18,20]),342)
def test_MoffatTurpinExample(self):
"""Example from Moffat and Turpin's article (compares with entropy value)"""
self.assertEqual(compressedTextLength([(5,4),(4,4),(3,3),(2,1)],[1,1,1,1,1,2,2,2,2,3,3,6]),84)
def main():
unittest.main()
if __name__ == '__main__':
doctest.testmod()
main()