파일 문자 인코딩 변환 도구

3803 단어 UP

public class CharsetConvertor {
	public final static String PATH = "";
	public final static String[] FILTER_WORD = new String[] {  };

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		cpdetector.io.CodepageDetectorProxy detector = cpdetector.io.CodepageDetectorProxy
				.getInstance();
		detector.add(cpdetector.io.JChardetFacade.getInstance());
		java.nio.charset.Charset charset = null;
		File dir = new File(PATH);
		if (dir.isDirectory()) {
			File[] files = dir.listFiles();
			for (File file : files) {
				if (!file.isDirectory()) {
					try {
						charset = detector.detectCodepage(file.toURL());
					} catch (Exception ex) {
						System.err.println("    [" + file.getAbsolutePath() + "]    !");
						ex.printStackTrace();
					}

					if (charset == null || !"GB2312".equals(charset.name())) {
						//    gbk    gbk
						String path = file.getParent() + "\\gbk\\";
						File gbkDir = new File(path);
						if (!gbkDir.exists()) {
							if (!gbkDir.mkdir()) {
								throw new RuntimeException("  gbk   [" + path + "]  ");
							}
						}
						String name = file.getName();
						for (String word : FILTER_WORD) {
							name = name.replaceAll(word, "");
						}
						String fileName = path + name;
						System.out.println(fileName);

						try {
							convert(file.getAbsolutePath(), fileName, "UTF-8", "GB2312");
						} catch (UnsupportedEncodingException e) {
							e.printStackTrace();
						} catch (IOException e) {
							e.printStackTrace();
						}
					}

				}
			}
		} else {
			throw new RuntimeException("     [" + dir.getAbsolutePath() + "]   .");
		}

	}

	/**
	 * 
	 * @param infile
	 *                 
	 * @param outfile
	 *                  
	 * @param fromCharset
	 *                 
	 * @param toCharset
	 *                  
	 * @throws IOException
	 * @throws UnsupportedEncodingException
	 */
	public static void convert(String infile, String outfile, String fromCharset, String toCharset)
			throws IOException, UnsupportedEncodingException {
		// set up byte streams
		InputStream in;
		if (infile != null)
			in = new FileInputStream(infile);
		else
			in = System.in;
		OutputStream out;
		if (outfile != null)
			out = new FileOutputStream(outfile);
		else
			out = System.out;

		// Use default encoding if no encoding is specified.
		if (fromCharset == null)
			fromCharset = System.getProperty("file.encoding");
		if (toCharset == null)
			toCharset = System.getProperty("file.encoding");

		// Set up character stream
		Reader r = new BufferedReader(new InputStreamReader(in, fromCharset));
		Writer w = new BufferedWriter(new OutputStreamWriter(out, toCharset));

		// Copy characters from input to output. The InputStreamReader
		// converts from the input encoding to Unicode,, and the
		// OutputStreamWriter
		// converts from Unicode to the output encoding. Characters that cannot
		// be
		// represented in the output encoding are output as '?'
		char[] buffer = new char[4096];
		int len;
		while ((len = r.read(buffer)) != -1)
			w.write(buffer, 0, len);
		r.close();
		w.flush();
		w.close();
	}

}

좋은 웹페이지 즐겨찾기