검색엔진 의 토끼 사냥 분사 실례

2407 단어 검색 엔진자바
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

import com.lietu.seg.result.CnTokenizer;

//  lucene-core-2.3.2.jar     seg.jar     dic
public class MyCnAnalyzerTest {

	static class MyCnAnalyzer extends Analyzer 
	{

		public MyCnAnalyzer()
		{
			CnTokenizer.makeTag=true;
		}
		@Override
		public TokenStream tokenStream(String fieldName, Reader reader) {
			
			TokenStream result =  new CnTokenizer(reader);		
			result = new MySingleFilter(result);
			return result;
		}		
	}
	static class MySingleFilter extends TokenFilter 
	{
		private Token buff=null;
		private int offset=0;
		//    
		private static String tokenType = "1word";

		public MySingleFilter(TokenStream in) {
			super(in);
		}
		@Override
		public Token next() throws IOException {
			if (buff!=null)
			{	
				if(offset == buff.termText().length())
				{
					Token buff2 = buff;
					buff = null;
					return buff2;
				}
				Token buff2 = new Token(buff.termText().substring(offset,1+offset),
					buff.startOffset()+offset,
					buff.startOffset()+offset+1,tokenType);
				buff2.setPositionIncrement(0);//   0,    0                ,            ,              。
				++offset;
				return buff2;
			}		
			Token t = input.next();		
			if (t == null)
				return null;
			if (t.termText().length()>1)
			{
				buff = t;//  buff       private Token buff=null;          if  buff
				offset = 0;
				Token buff2 = new Token(buff.termText().substring(offset,1+offset),
					buff.startOffset()+offset,
					buff.startOffset()+offset+1,tokenType);
				buff2.setPositionIncrement(0);				
				++offset;
				return buff2;
			}			
			return t;
		}
		
	}
	public static void main(String[] args) throws IOException {
		MyCnAnalyzer cna = new MyCnAnalyzer();
		String input = "               《    》  ,    ,   ";
		//          CnAnalyzer tokenStream                        。
		TokenStream ts = cna.tokenStream("asd", new StringReader(input));

		for (Token t = ts.next(); t != null; t = ts.next()) {
			System.out.println(t.termText() + " " + t.startOffset() + " "
					+ t.endOffset() + " " + t.type() + " "
					+ t.getPositionIncrement());
		}
	}
}

좋은 웹페이지 즐겨찾기