Contents
C/C++ MapReduce Code & build
This is the WordCount example using C/C++.
1 #include "hadoop/Pipes.hh"
2 #include "hadoop/TemplateFactory.hh"
3 #include "hadoop/StringUtils.hh"
4
5 class WordCountMap: public HadoopPipes::Mapper {
6 public:
7 WordCountMap(HadoopPipes::TaskContext& context){}
8 void map(HadoopPipes::MapContext& context) {
9 std::vector<std::string> words =
10 HadoopUtils::splitString(context.getInputValue(), " ");
11 for(unsigned int i=0; i < words.size(); ++i) {
12 context.emit(words[i], "1");
13 }
14 }
15 };
16
17 class WordCountReduce: public HadoopPipes::Reducer {
18 public:
19 WordCountReduce(HadoopPipes::TaskContext& context){}
20 void reduce(HadoopPipes::ReduceContext& context) {
21 int sum = 0;
22 while (context.nextValue()) {
23 sum += HadoopUtils::toInt(context.getInputValue());
24 }
25 context.emit(context.getInputKey(), HadoopUtils::toString(sum));
26 }
27 };
28
29 int main(int argc, char *argv[]) {
30 return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap,
31 WordCountReduce>());
32 }
To compile the example, build the Hadoop code and the C/C++ word count example:
# ant -Dcompile.c++=yes examples
Upload C++ binary files to HDFS
To upload the binary files to HDFS, the command syntax is:
# bin/hadoop fs -put build/c++-examples/Linux-i386-32/bin /examples/bin
Set the MapReduce Config
# vi src/examples/pipes/conf/word.xml <?xml version="1.0"?> <configuration> <property> // Set the binary path on DFS <name>hadoop.pipes.executable</name> <value>/examples/bin/wordcount</value> </property> <property> <name>hadoop.pipes.java.recordreader</name> <value>true</value> </property> <property> <name>hadoop.pipes.java.recordwriter</name> <value>true</value> </property> </configuration>
Execute
To run the example, the command syntax is:
# bin/hadoop pipes -conf src/examples/pipes/conf/word.xml -input in-dir -output out-dir