한 편의 글 에서 사전 새 단어 장 에 없 는 단 어 를 선별 하여 txt 파일 로 내 보 냅 니 다.
9037 단어 txt
<?xml version="1.0" encoding="UTF-8"?>
<wordbook><item>
<word>daunting</word>
<trans><![CDATA[adj. ; ;
daunting: | | ]]></trans>
<phonetic><![CDATA[['dɔ:ntiŋ]]]></phonetic>
<tags/>
<progress>10</progress>
</item><item>
<word>informative</word>
<trans><![CDATA[adj. , ; ;
informative: | | ]]></trans>
<phonetic><![CDATA[[in'fɔ:mətiv]]]></phonetic>
<tags/>
<progress>10</progress>
</item><item>
<word>contribute</word>
<trans><![CDATA[vt. , ; ;
vt. , ; ;
contribute: | | ]]></trans>
<phonetic><![CDATA[[kən'tribju:t]]]></phonetic>
<tags/>
<progress>10</progress>
</item>
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
public class WordNewMain {
/**
* @param args
* @throws DocumentException
* @throws IOException
*/
public static void main(String[] args) throws DocumentException, IOException {
Map<String, Word> listMap = new HashMap<String, Word>();
SAXReader saxReader = new SAXReader();
saxReader.setEncoding("utf-8");
Document whole1Xml = saxReader.read(new BufferedInputStream(new FileInputStream("all_sych.xml")));
List<Element> whole1List = whole1Xml.selectNodes("//wordbook/item");
System.out.println("whole1 List Size:" + whole1List.size());
for (int i = 0; i < whole1List.size(); i++) {
Element e = whole1List.get(i);
Node word = e.selectSingleNode("word");
Node trans = e.selectSingleNode("trans");
Node phonetic = e.selectSingleNode("phonetic");
Node tags = e.selectSingleNode("tags");
Node progress = e.selectSingleNode("progress");
Word w = listMap.get(word.getStringValue());
if (w != null && Integer.parseInt(w.getProgress()) < Integer.parseInt(progress.getStringValue())) {
w.setProgress(progress.getStringValue());
} else if (w == null) {
e.detach();
w = new Word(word.getStringValue(), trans.getStringValue(), phonetic.getStringValue(),
tags.getStringValue(), progress.getStringValue());
}
listMap.put(word.getStringValue().toLowerCase(), w);
}
// txt
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("8word.txt"))));
System.out.println("Ok, find the file!");
String line = null;
byte[] wordB = new byte[30];
Map<String, String> countArea = new HashMap<String, String>();
int wordBP = 0;
String theWord = null;
System.out.println("Start count~");
FileWriter fw = new FileWriter("result.txt");
while ((line = br.readLine()) != null) {
boolean inWord = true;
byte[] lineB = line.getBytes();
for (int i = 0; i < lineB.length; i++) {
// is a character
if ((lineB[i] < 91 && lineB[i] > 64) || (lineB[i] < 123 && lineB[i] > 96)) {
wordB[wordBP] = lineB[i];
wordBP = wordBP + 1;
inWord = true;
} else if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
br.close();
// steven txt
br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("steve_4p.txt"))));
System.out.println("Ok, find the file!");
while ((line = br.readLine()) != null) {
boolean inWord = true;
byte[] lineB = line.getBytes();
for (int i = 0; i < lineB.length; i++) {
// is a character
if ((lineB[i] < 91 && lineB[i] > 64) || (lineB[i] < 123 && lineB[i] > 96)) {
wordB[wordBP] = lineB[i];
wordBP = wordBP + 1;
inWord = true;
} else if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
// GRE text
br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("gre.txt"))));
System.out.println("Ok, find the file!");
while ((line = br.readLine()) != null) {
boolean inWord = true;
byte[] lineB = line.getBytes();
for (int i = 0; i < lineB.length; i++) {
// is a character
if ((lineB[i] < 91 && lineB[i] > 64) || (lineB[i] < 123 && lineB[i] > 96)) {
wordB[wordBP] = lineB[i];
wordBP = wordBP + 1;
inWord = true;
} else if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
// GaoZhong text
br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("gz.txt"))));
System.out.println("Ok, find the file!");
while ((line = br.readLine()) != null) {
boolean inWord = true;
byte[] lineB = line.getBytes();
for (int i = 0; i < lineB.length; i++) {
// is a character
if ((lineB[i] < 91 && lineB[i] > 64) || (lineB[i] < 123 && lineB[i] > 96)) {
wordB[wordBP] = lineB[i];
wordBP = wordBP + 1;
inWord = true;
} else if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
if (inWord) {
theWord = new String(wordB).trim().toLowerCase();
if (listMap.get(theWord) == null && theWord.length() > 1) {
countArea.put(theWord, theWord);
}
wordBP = 0;
inWord = false;
wordB = new byte[30];
}
}
// output
Iterator<String> it = countArea.keySet().iterator();
while (it.hasNext()) {
fw.write(it.next() + "\r
");
}
fw.close();
System.out.println("End count~");
System.out.println("Sum word of steve is :" + countArea.size());
}
}
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
Java 텍스트 파일(.txt .csv)로부터의 입력 java.io.FileReader➊FileReader fr = new FileReader("파일 이름"); ➋Buffered br = BufferedReader(br); ➌String rec; rec = br.readLine(); ➍ fr.clos...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.