자세히 보기
package com.rayeen.spider.vertical.util;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import com.rayeen.spider.vertical.ParalleIRVirtualMachine;
import com.rayeen.spider.vertical.algorithm.AddFunction;
import com.rayeen.spider.vertical.algorithm.ClearTagFunction;
import com.rayeen.spider.vertical.algorithm.DoubleParameterFunction;
import com.rayeen.spider.vertical.algorithm.EqNullOperator;
import com.rayeen.spider.vertical.algorithm.EqOperator;
import com.rayeen.spider.vertical.algorithm.EqStringOperator;
import com.rayeen.spider.vertical.algorithm.FullUrlFunction;
import com.rayeen.spider.vertical.algorithm.Function;
import com.rayeen.spider.vertical.algorithm.MaxlengthFunction;
import com.rayeen.spider.vertical.algorithm.Operator;
import com.rayeen.spider.vertical.algorithm.RecursiveFunction;
import com.rayeen.spider.vertical.algorithm.ReplaceFunction;
import com.rayeen.spider.vertical.algorithm.SprintfFunction;
import com.rayeen.spider.vertical.algorithm.UneqNullOperator;
import com.rayeen.spider.vertical.algorithm.UneqOperator;
import com.rayeen.spider.vertical.algorithm.UneqStringOperator;
import com.rayeen.spider.vertical.algorithm.UniParameterFunction;
import com.rayeen.spider.vertical.auxiliary.CrawlResultSetCollection;
import com.rayeen.spider.vertical.auxiliary.SemanticException;
import com.rayeen.spider.vertical.auxiliary.TableMerge;
import com.rayeen.spider.vertical.constant.ArgumentType;
import com.rayeen.spider.vertical.constant.ConfConstant;
import com.rayeen.spider.vertical.constant.ErrorType;
import com.rayeen.spider.vertical.constant.FunctionConstant;
public class ResutTree {
static final Logger LOG = Logger.getLogger(ResutTree.class);
static Map FunctionNameMap = new ConcurrentHashMap();
//
static Set uniParameterFunction = new HashSet();
//
static Set doubleParameterFunction = new HashSet();
static {
uniParameterFunction.add(FunctionConstant.FULL_URL);
uniParameterFunction.add(FunctionConstant.CLEAR_TAG);
doubleParameterFunction.add(FunctionConstant.MAX_LENGTH);
doubleParameterFunction.add(FunctionConstant.ADD);
}
static Function getFunctionInstance(String func) {
Class cls = FunctionNameMap.get(func).getClass();
Function f = null;
try {
f = (Function) cls.newInstance();
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return f;
}
// f(..) showF:toByte
static Pattern FUNC_PATTERN = Pattern
.compile("(\\w+)\\s*\\((.+?)\\)\\s+(\\w+(:\\w+)?)");
Map fieldFunctionMap = new ConcurrentHashMap();
Map showFieldFunctionMap = new ConcurrentHashMap();
static Map operators = new LinkedHashMap();
static {
operators.put("!=", new UneqOperator());
operators.put("=", new EqOperator());
operators.put("is", new EqNullOperator());
operators.put("not", new UneqNullOperator());
FunctionNameMap.put(FunctionConstant.FULL_URL, new FullUrlFunction());
FunctionNameMap.put(FunctionConstant.SPRINGTF, new SprintfFunction());
FunctionNameMap
.put(FunctionConstant.RECURSIVE, new RecursiveFunction());
FunctionNameMap.put(FunctionConstant.ADD, new AddFunction());
FunctionNameMap.put(FunctionConstant.REPLACE, new ReplaceFunction());
FunctionNameMap.put(FunctionConstant.CLEAR_TAG, new ClearTagFunction());
FunctionNameMap.put(FunctionConstant.MAX_LENGTH,
new MaxlengthFunction());
}
public ResutTree(URL rootUrl,
Map> hierarchyResult,
CrawlResultSetCollection crawlRSC) {
this.rootUrl = rootUrl;
this.hierarchyResult = hierarchyResult;
this.crawlRSC = crawlRSC;
/**
* curUniResultTableMap key "tableName->extractName"
*/
}
static void error(String str) throws SemanticException {
LOG.error(str);
throw new SemanticException(str);
}
static void warn(String str) {
LOG.error(str);
}
URL rootUrl;
Map> hierarchyResult = new HashMap>();
CrawlResultSetCollection crawlRSC;
Map mergeMap = new ConcurrentHashMap();
class Pfk {
String p;//
String f;//
String k;//
public Pfk(String p, String f, String k) {
super();
this.p = p;
this.f = f;
this.k = k;
if (StringUtils.isEmpty(k)) {
this.k = f;
}
}
public String toString() {
return p + ":" + f + ":" + k;
}
}
class Pkpk {
Operator operator;
ArgumentType argumentType;
List argList;
/**
* argmentType , operator argList
*
*/
public Pkpk(ArgumentType argmentType, Operator operator,
List argList) {
this.argumentType = argmentType;
this.operator = operator;
this.argList = argList;
}
public String toString() {
return argumentType + ":" + operator + ":" + argList.toString();
}
public List getArgList() {
return argList;
}
public void setArgList(List argList) {
this.argList = argList;
}
public ArgumentType getArgmentType() {
return argumentType;
}
public void setArgmentType(ArgumentType argmentType) {
this.argumentType = argmentType;
}
public Operator getOperator() {
return operator;
}
public void setOperator(Operator operator) {
this.operator = operator;
}
}
private List
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
elasticsearchnested 삽입 대상 전체 텍스트 검색 및 정렬
ES Nested 중첩 유형은 무엇입니까?Elasticsearch에는 다음과 같은 다양한 데이터 유형이 있습니다.
기본 데이터 형식:string 형식.ES 7.x에서string 형식은:text와 키워드로 업그레이드됩...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.