After programming a lot in high level languages such as Python and R, I started working with C++ recently. To get my C++ skills up, I of course read a lot of books. In addition, I try and replicate some functionality from high level languages in C++. My first attempt at this (after Hello World :)) was to create a C++ class which could read comma separated files. In essence the class parses the file and reads it into a boost::MultiArray
. I would appreciate any feedback on the code.
The links to the code and example data are listed below, they link to my bitbucket account.
The actual code comprises of a cpp file (csv-reader.cpp):
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/multi_array.hpp>
#include "csv-test.h"
#include <cassert>
template <class T> class csv_reader {
boost::multi_array<T, 2> content2d ;
std::vector<std::string> split_line ;
std::string line;
std::string sep ;
int ncol ;
int nrow ;
public :
csv_reader(std::string, std::string) ; // constructor
~csv_reader(); // desctructor
void cout_content() ; // print the contents
T operator() (unsigned row, unsigned column) ;
} ;
// Constructor
template <class T> csv_reader<T>::csv_reader(std::string path, std::string sep = ",")
{
// Initializing variables
ncol = 0 ; // Initialize the number of colums to 0
nrow = 1 ; // Initialize the number of rows to 1
content2d = boost::multi_array<T, 2> (boost::extents[0][0]) ;
std::ifstream data(path.c_str()) ;
// read the csv data
while(getline(data, line))
{
boost::split(split_line, line, boost::is_any_of(sep) ) ;
if(ncol == 0)
{
ncol = split_line.size() ;
}
else assert(ncol == split_line.size()) ;
content2d.resize(boost::extents[nrow][ncol]) ;
for(int i = 0; i < split_line.size(); i++)
{
content2d[nrow - 1][i] = convertToDouble(split_line[i]) ;
}
nrow++ ;
}
}
// Destructor
template <class T> csv_reader<T>::~csv_reader() { }
template <class T> void csv_reader<T>::cout_content()
{
for(int row = 0; row < (nrow - 1); row++)
{
for(int col = 0; col < ncol ; col++)
{
std::cout << content2d[row][col] << " ";
}
std::cout << "\n" ;
}
}
// Allow access to the contents
template <class T> T csv_reader<T>::operator() (unsigned row, unsigned column)
{
if (row >= nrow || column >= ncol)
throw BadIndex("boost::MultiArray subscript out of bounds");
return(content2d[row][column]) ;
}
int main()
{
// An integer csv reader
csv_reader<int> csv_obj_int("test.csv") ;
csv_obj_int.cout_content() ;
// A double csv reader
csv_reader<double> csv_obj_double("test.csv") ;
csv_obj_double.cout_content() ;
// It also supports direct access to the content using operator()
std::cout << csv_obj_double(1,1) << "\n" ;
std::cout << csv_obj_double(1,1) * 5 << "\n" ;
// This statement fails with a subscript out of bounds error
// std::cout << csv_obj_double(10,10) * 5 << "\n" ;
// Testing a different seperator
csv_reader<double> csv_obj_double_sep2("test_semicol.csv", ";") ;
csv_obj_double_sep2.cout_content() ;
}
and a header file (csv-test.h):
// File: convert.h
#include <iostream>
#include <sstream>
#include <string>
#include <stdexcept>
class BadConversion : public std::runtime_error {
public:
BadConversion(std::string const& s)
: std::runtime_error(s)
{ }
};
class BadIndex : public std::runtime_error {
public:
BadIndex(std::string const& s)
: std::runtime_error(s)
{ }
};
inline double convertToDouble(std::string const& s)
{
std::istringstream i(s);
double x;
if (!(i >> x))
throw BadConversion("convertToDouble(\"" + s + "\")");
return x;
}
The main()
test the class on a number of different csv file examples: test.csv and test_semicol.csv.