자바 키워드 전체 텍스트 검색 인 스 턴 스

18080 단어 lucene
package javaLucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;


public class TestLucene {
     private static TestLucene indexManager;
        private static String content="";

        private static String INDEX_DIR = "G:\\Unity3dziyuan";
        private static String DATA_DIR = "G:\\Unity3dziyuan";
        private static Analyzer analyzer = null;
        private static Directory directory = null;
        private static IndexWriter indexWriter = null;

        /**
         *        
         * @return          
         */
        public TestLucene getManager(){
            if(indexManager == null){
                this.indexManager = new TestLucene();
            }
            return indexManager;
        }
        /**
         *            
         * @param path       
         * @return     
         */
        public static boolean createIndex(String path){
            Date date1 = new Date();
            List fileList = getFileList(path);
            for (File file : fileList) {
                content = "";
                //      
                String type = file.getName().substring(file.getName().lastIndexOf(".")+1);
                if("txt".equalsIgnoreCase(type)){

                    content += txt2String(file);

                }else if("doc".equalsIgnoreCase(type)){

                    content += doc2String(file);

                }else if("xls".equalsIgnoreCase(type)){

                    content += xls2String(file);

                }

                System.out.println("name :"+file.getName());
                System.out.println("path :"+file.getPath());
//              System.out.println("content :"+content);
                System.out.println();


                try{
                    analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
                    directory = FSDirectory.open(new File(INDEX_DIR));

                    File indexFile = new File(INDEX_DIR);
                    if (!indexFile.exists()) {
                        indexFile.mkdirs();
                    }
                    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer);
                    indexWriter = new IndexWriter(directory, config);

                    Document document = new Document();
                    document.add(new TextField("filename", file.getName(), Store.YES));
                    document.add(new TextField("content", content, Store.YES));
                    document.add(new TextField("path", file.getPath(), Store.YES));
                    indexWriter.addDocument(document);
                    indexWriter.commit();
                    closeWriter();


                }catch(Exception e){
                    e.printStackTrace();
                }
                content = "";
            }
            Date date2 = new Date();
            System.out.println("    -----  :" + (date2.getTime() - date1.getTime()) + "ms
"
); return true; } /** * txt * @param file * @return */ public static String txt2String(File file){ String result = ""; try{ BufferedReader br = new BufferedReader(new FileReader(file));// BufferedReader String s = null; while((s = br.readLine())!=null){// readLine , result = result + "
"
+s; } br.close(); }catch(Exception e){ e.printStackTrace(); } return result; } /** * doc * @param file * @return */ public static String doc2String(File file){ String result = ""; try{ FileInputStream fis = new FileInputStream(file); HWPFDocument doc = new HWPFDocument(fis); Range rang = doc.getRange(); result += rang.text(); fis.close(); }catch(Exception e){ e.printStackTrace(); } return result; } /** * xls * @param file * @return */ public static String xls2String(File file){ String result = ""; try{ FileInputStream fis = new FileInputStream(file); StringBuilder sb = new StringBuilder(); jxl.Workbook rwb = Workbook.getWorkbook(fis); Sheet[] sheet = rwb.getSheets(); for (int i = 0; i < sheet.length; i++) { Sheet rs = rwb.getSheet(i); for (int j = 0; j < rs.getRows(); j++) { Cell[] cells = rs.getRow(j); for(int k=0;kcatch(Exception e){ e.printStackTrace(); } return result; } /** * , * @param text * @return List */ public static void searchIndex(String text){ Date date1 = new Date(); try{ directory = FSDirectory.open(new File(INDEX_DIR)); analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer); Query query = parser.parse(text); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); System.out.println("____________________________"); System.out.println(hitDoc.get("filename")); System.out.println(hitDoc.get("content")); System.out.println(hitDoc.get("path")); System.out.println("____________________________"); } ireader.close(); directory.close(); }catch(Exception e){ e.printStackTrace(); } Date date2 = new Date(); System.out.println(" ----- :" + (date2.getTime() - date1.getTime()) + "ms
"
); } /** * * @param dirPath * @return list */ public static List getFileList(String dirPath) { File[] files = new File(dirPath).listFiles(); List fileList = new ArrayList(); for (File file : files) { if (isTxtFile(file.getName())) { fileList.add(file); } } return fileList; } /** * , txt xls doc * @param fileName * @return , true; false */ public static boolean isTxtFile(String fileName) { if (fileName.lastIndexOf(".txt") > 0) { return true; }else if (fileName.lastIndexOf(".xls") > 0) { return true; }else if (fileName.lastIndexOf(".doc") > 0) { return true; } return false; } public static void closeWriter() throws Exception { if (indexWriter != null) { indexWriter.close(); } } /** * * @param file * @return , true. */ public static boolean deleteDir(File file){ if(file.isDirectory()){ File[] files = file.listFiles(); for(int i=0; ireturn true; } public static void main(String[] args){ //File fileIndex = new File(INDEX_DIR); /*if(deleteDir(fileIndex)){ fileIndex.mkdir(); }else{ fileIndex.mkdir(); }*/ createIndex(DATA_DIR); searchIndex("man"); } }

좋은 웹페이지 즐겨찾기