Hadoop: Output data to mutiple dir
1575 단어 hadoop
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
public class ImportDataFromMongoReducer extends Reducer<Text, Text, Text, Text> {
private static final Log LOG = LogFactory
.getLog(ImportDataFromMongoReducer.class);
private MultipleOutputs out;
public void setup(Context context) {
out = new MultipleOutputs(context);
}
private String generateFileName(Text k) {
return k.toString() + "/part";
}
@Override
public void reduce(final Text pKey, final Iterable<Text> pValues,
final Context pContext) throws IOException, InterruptedException {
for (final Text value : pValues) {
// pContext.write(pKey, value);
out.write(NullWritable.get(), value, generateFileName(pKey));
}
}
protected void cleanup(Context context) throws IOException,
InterruptedException {
out.close();
}
}
References
http://hadoop.apache.org/docs/current/api/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.html
http://www.infoq.com/articles/HadoopOutputFormat