Hive UDF UDTF UDAF 함수

4554 단어 빅 데이터
1. UDF (시간 형식 변환)
// 1、       UDF,        ,ecaluate,                       
// 2、      jar,    hive classpath ,  add jar
// 3、 hive      funcation,             create function MyDateParser as'hadoop.Hive.UDF.MyDateParser'
// 4、    :select MyDateParser(time)  from apache_log limit 10 ;
public class MyDateParser extends UDF{

		// hive     ,  UDF   ,       evaluate  ,
		//         hive                 
		//        hive          、
		//      [29/April/2016:17:38:20 +0800]
		//       :2016-4-20 20:40:39
		
	public String evaluate(String s) {
		SimpleDateFormat sdf = new SimpleDateFormat("dd/MMMM/yyyy:HH:mm:ss Z",Locale.ENGLISH) ;
			if(s.indexOf("[")>-1){
				s = s.replace("[", "") ;
			}
			if(s.indexOf("]")>-1){
				s= s.replace("]", "") ;
			}
		
		try {
			//     String   date    
			Date date = sdf.parse(s) ;
			SimpleDateFormat rsdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") ;
			return rsdf.format(date) ;
		} catch (ParseException e) {
			e.printStackTrace();
			return "" ;
		}
		
	}
}

2. UDTF (Apache Access log 에 대한 url 분석)

public class MyUdtf extends GenericUDTF {

	@Override
	public void process(Object[] args) throws HiveException {
		String input = args[0].toString() ;
		input = input.replace("\"", "");
		String[] result = input.split(" ") ;
		//           ,         "--"
		if(result.length!=3){
			result[0] ="--" ;
			result[1] ="--" ;
			result[2] ="--" ;
		}else{ 
			//       
			forward(result) ;
		}
	}

	@Override
	public void close() throws HiveException {
		//       
	}

	@Override
	public StructObjectInspector initialize(ObjectInspector[] argOIs) 
												throws UDFArgumentException {
		if(argOIs.length!=1){
			throw new UDFArgumentException("     ") ;
		}
		ArrayList fileName = new ArrayList() ;
		ArrayList fileOis = new ArrayList() ;
		
		//         
		fileName.add("rcol1") ;
		fileOis.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector) ;
		
		fileName.add("rcol2") ;
		fileOis.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector) ;
		
		fileName.add("rcol3") ;
		fileOis.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector) ;
		
		//         UDTF       
		return ObjectInspectorFactory.getStandardStructObjectInspector(fileName, fileOis) ;
	}
}

3. UDAF (데이터 취 합)
public class MyUDAF extends UDAF {
	public static class MaxNumberUDAFEvaluator implements UDAFEvaluator {
		private IntWritable result;
		public void init() {
			result = null;
		}

		//                  iterate  ,                
		public boolean iterate(IntWritable value) {
			if (value == null) {
				return false;
			}
			if (result == null) {
				result = new IntWritable(value.get());
			} else {
				//           ,           ,      result
				result.set(Math.max(result.get(), value.get()));
			}
			return true;
		}

		// hive               ,     result  hive         
		public IntWritable terminatePartial() {
			return result;
		}

		//    ,           merge    ,                  iterate
		public boolean merge(IntWritable other) {
			return iterate(other);
		}

		// hive             ,          
		public IntWritable terminate() {
			return result;
		}
	}
}

첨부: 부분 데이터 형식
27.19.74.143 - - [29/April/2016:17:38:20 +0800] "GET /static/image/common/faq.gif HTTP/1.1" 200 1127 110.52.250.126 - - [29/April/2016:17:38:20 +0800] "GET /data/cache/style_1_widthauto.css?y7a HTTP/1.1" 200 1292 27.19.74.143 - - [29/April/2016:17:38:20 +0800] "GET /static/image/common/hot_1.gif HTTP/1.1" 200 680 27.19.74.143 - - [29/April/2016:17:38:20 +0800] "GET /static/image/common/hot_2.gif HTTP/1.1" 200 682 27.19.74.143 - - [29/April/2016:17:38:20 +0800] "GET /static/image/filetype/common.gif HTTP/1.1" 200 90 110.52.250.126 - - [29/April/2016:17:38:20 +0800] "GET /source/plugin/wsh_wx/img/wsh_zk.css HTTP/1.1" 200 1482 110.52.250.126 - - [29/April/2016:17:38:20 +0800] "GET /data/cache/style_1_forum_index.css?y7a HTTP/1.1" 200 2331 110.52.250.126 - - [29/April/2016:17:38:20 +0800] "GET /source/plugin/wsh_wx/img/wx_jqr.gif HTTP/1.1" 200 1770 27.19.74.143 - - [29/April/2016:17:38:20 +0800] "GET /static/image/common/recommend_1.gif HTTP/1.1" 200 1028 110.52.250.126 - - [29/April/2016:17:38:20 +0800] "GET /static/image/common/logo.png HTTP/1.1" 200 4542

좋은 웹페이지 즐겨찾기