3. 날짜와 숫자에 색인 추가 (lucene 노트)

7355 단어
여기서 우리가 복사한 이전 공사lucene_index01는 공사lucene_index02이다.그리고 색인 생성과 검색 방법에 대한 개선 테스트를 진행합니다.관련 코드: IndexUtil.java
package cn.lucene.index;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class IndexUtil {
    private String[] ids = {"1", "2", "3", "4", "5", "6"};
    //     
    private String[] emails = {"[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]"};
    //       
    private String[] content = {
            "welcom to visited the space,I like football", 
            "hello boy, i like someone", 
            "come on baby", 
            "first blood", 
            "I like football,I like football", 
            "my girlfriend is so beatiful, every body like game"
    };
    private int[] attaches = {2,5,6,5,8,4};//    
    //     
    private String[] names = {"Tom", "Jack", "goudan", "alibaba", "jerry", "kitty"};
    //     
    private Date[] dates = null;
    
    private Directory directory = null;
    private Map scores = new HashMap();//    Map,      
    
    public IndexUtil() {
        try {
            setDates();//    
            scores.put("qq.com", 2.0f);//   "qq.com"           2.0,  :   1.0
            scores.put("sina.edu", 1.5f);
            directory = FSDirectory.open(new File("E:/myeclipse/Lucene/index"));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    //    
    public void index(){
        IndexWriter writer = null;
        try {
            writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
            //           
            writer.deleteAll();
            Document document = null;
            for(int i = 0; i < ids.length; i++){
                document = new Document();
                //id    ,     、  ,email     ,      ,        
                //    ,          ,     。       ,        
                //               ,    
                document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
                document.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.add(new Field("content", content[i], Field.Store.NO, Field.Index.ANALYZED));
                document.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
                
                //       ,        true      
                document.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attaches[i]));
                //       
                document.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
                
                
                String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
                System.out.println(et);
                //    
                if(scores.containsKey(et)){
                    document.setBoost(scores.get(et));
                }else{
                    document.setBoost(0.5f);
                }
                writer.addDocument(document);
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }finally{
            if(writer != null){
                try {
                    writer.close();
                } catch (CorruptIndexException e) {
                    e.printStackTrace();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    //    
    private void setDates(){
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        try {
            dates = new Date[ids.length];
            dates[0] = sdf.parse("2015-02-15");
            dates[1] = sdf.parse("2015-03-01");
            dates[2] = sdf.parse("2015-05-18");
            dates[3] = sdf.parse("2015-09-05");
            dates[4] = sdf.parse("2015-12-15");
            dates[5] = sdf.parse("2015-08-29");
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }
    public void search(){
        IndexReader reader;
        try {
            reader = IndexReader.open(directory);
            IndexSearcher searcher = new IndexSearcher(reader);
            TermQuery query = new TermQuery(new Term("content", "like"));//       like 
            TopDocs tds = searcher.search(query, 10);
            for(ScoreDoc sd : tds.scoreDocs){
                Document doc = searcher.doc(sd.doc);
                //        getBoost()       1.0,            document,        。
                //        ,    luke  
                //                 
                System.out.println("(" + sd.doc + "  :"+ doc.getBoost() + ")" + doc.get("name") + "[" + doc.get("email") + "]-->" 
                            + doc.get("id") + "-->" + doc.get("attach") + "-->" + doc.get("date"));
                reader.close();
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}


설명:
  • 여기서 저희가 먼저 가입한 날짜를 동시에 사용합니다
  • //       ,        true      
    document.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attaches[i]));
    //       
    document.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));
    

    숫자와 날짜를 색인합니다.
  • 여기서 주의해야 할 것은 우리가 색인을 보려면 getBoost 방법을 사용할 수 없다는 것이다. 그렇지 않으면 찾아낸 값이 1.0이므로 luke 도구로 볼 수 있다.
  • 좋은 웹페이지 즐겨찾기