返回顶部

收藏

java超快速文本去重复代码

更多
import java.io.*;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

public class SpeedClear {

    public static void main(String[] args) {
        if(args.length==0){
            print();
            System.exit(1);
        }
        if(args.length!=2){
            System.out.println("Format error...");
            System.exit(1);
        }
        String pathname = args[0];
        String newPath = args[1];
        clear(pathname,newPath);     //调用去重复的方法...
    }

    /**
     * 
     * @param pathname
     *            源文件路径
     * @param newPath
     *            新的文件路径
     * @throws Exception
     */
    public static void clear(String pathname, String newPath) {

        System.out.println("Start... ");

        try{    //懒的写Try..直接都包围起来吧....

            File file = new File(pathname);
        BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));

BufferedReader buffer = new BufferedReader(new InputStreamReader(fis,"utf-8"),20*1024*1024);// 用5M的缓冲读取文本文件 

            //FileWriter fw  = new FileWriter(new File(newPath),true);  //去除后的文本

            OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(new File(newPath)),"utf-8") ;

            Set<String> set = new HashSet<String>();
            String temp = ""; // 临时字符串
            int x = 0;
            while ((temp = buffer.readLine()) != null) { // 读文件,一行读一个
                set.add(temp); // 存储到Set集合里面
                if(x%30000==0){
                    System.out.print("..") ;
                }
                x++;
            }
            fis.close();
            buffer.close();   //关闭读取操作

            //下面开始写文件

            for (String xxser : set) {
                out.write(xxser+"\r\n");

            }
            System.out.println("") ;
            out.close();   //关闭写操作
            System.out.println("size = " + set.size());
            System.out.println("End...");
        }catch(Exception e){

            System.out.println("文件太大了,建议先100MB大小..") ;
        }

    }

        public static void  print(){
        System.out.println("*************************************************");
        System.out.println("\t\tTo repeat \t\t");
        System.out.println();
        System.out.println("  format: java -Xmx1000m SpeedClear c:\\old.txt c:\\new.txt\t\t");
        System.out.println();
        System.out.println("\t\tAuthor:xxser    QQ:616100108");
        System.out.println("*************************************************");

    }

}

标签:java

收藏

0人收藏

支持

0

反对

0

发表评论