hadoop0.20.2 분산 캐시 예제
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class AdlogETL extends Configured implements Tool {
public static class MyMap extends Mapper<LongWritable, Text, Text, Text> {
private IpCity ipcity;
//hashmap treemap ip ip
private TreeMap<Long, String> tm;
private HashMap<Long, Long> hm;
//df yyyy-mm-dd HH:mm:ss
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
//setup map (0.20 api configure)
public void setup(Context context) {
try {
ipcity = new IpCity();
ipcity.initialize(new File("ip"));//ip , -files
tm = ipcity.getTypeName();
hm = ipcity.getIpregion();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
StringBuffer line = new StringBuffer(value.toString());
Pattern p = Pattern
.compile("([^,]*),([^;]*);ad=([^;]*);ap=([^;]*);mac=([^;]*);([^\"]*)\",([^,]*),(\"[^\"]*)\",([^,]*),([^,]*),\"([^\"]*)\",\"([^\"]*)\",\"([^\"]*)\"");// ,
Matcher m = p.matcher(line);
boolean isvalid = m.matches();
// m.group():1-->time,3-->ad,4-->ap,5-->mac,9->ip,13-->cookie
if(isvalid)
{
String city = "999999";
long long_ip = ip2long(m.group(9));
if (tm.floorEntry(long_ip) != null) {
long tmp = tm.floorKey(long_ip);
if (hm.containsKey(tmp) && long_ip <= hm.get(tmp))
city = tm.get(tmp);
}
String mac = m.group(5);
String time = m.group(1);
Date date = new Date();
try {
date = df.parse(time);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
long sec = date.getTime()/1000;
if(mac.equals(""))
{
mac = "NULL";
}
//mac, time,ap, ad, area_id, cookie_id
context.write(new Text(mac), new Text(String.valueOf(sec) + "\t"
+ m.group(4) + "\t" + m.group(3) + "\t" + city + "\t"
+ m.group(13)));
}
}
}
public static class MyReduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text keys, Iterable<Text> values, Context context)
throws IOException {
try {
for (Text val : values) {
context.write(keys, new Text(val));
}
} catch (Exception e) {
}
}
}
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = getConf();
Job job = new Job(conf, "AdlogETL");
job.setJarByClass(AdlogETL.class);
Path in = new Path(args[0]);
Path out = new Path(args[1]);
FileInputFormat.setInputPaths(job, in);
FileOutputFormat.setOutputPath(job, out);
job.setMapperClass(MyMap.class);
job.setReducerClass(MyReduce.class);
// job.setCombinerClass(MyReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(0);
System.exit(job.waitForCompletion(true) ? 0 : 1);
return 0;
}
public static long ip2long(String str) {
String[] strFields = str.split("\\.");
if (!str.matches("^\\d+\\.\\d+\\.\\d+\\.\\d+$"))
return 0;
long[] nFields = new long[4];
for (int i = 0; i < 4; i++) {
nFields[i] = Integer.parseInt(strFields[i]);
}
return (nFields[0] << 24) + (nFields[1] << 16) + (nFields[2] << 8)
+ nFields[3];
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new AdlogETL(), args);
System.out.println(res);
}
}
코드 2:ipcity.java
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.TreeMap;
import org.apache.hadoop.io.IOUtils;
public class IpCity {
TreeMap<Long, String> tm = new TreeMap<Long, String>();
HashMap<Long, Long> hm = new HashMap<Long, Long>();
public void initialize(File file) throws IOException {
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
String line;
while ((line = in.readLine()) != null) {
String fields[] = line.trim().split("\t");
long s_ip = Long.parseLong(fields[0]);
long e_ip = Long.parseLong(fields[1]);
String city_id = fields[3];
tm.put(s_ip,city_id );
hm.put(s_ip,e_ip);
}
} finally {
IOUtils.closeStream(in);
}
}
public TreeMap<Long, String> getTypeName(){
return tm;
}
public HashMap<Long, Long> getIpregion(){
return hm;
}
}
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
다양한 언어의 JSONJSON은 Javascript 표기법을 사용하여 데이터 구조를 레이아웃하는 데이터 형식입니다. 그러나 Javascript가 코드에서 이러한 구조를 나타낼 수 있는 유일한 언어는 아닙니다. 저는 일반적으로 '객체'{}...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.