import java.util.ArrayList
import java.util.List
public class Participle
{
private static final String HEAD_END_STR = "_"
private static final int PARTICIPLE_LENGTH = 2
public static void main(String[] args)
{
String exampleWord = "计算机"
exampleWord = "_" + exampleWord + "_"
int length = exampleWord.length()
List<String>result = new ArrayList<String>()
for (int i = 0i <length - 1i++)
{
String str = exampleWord.substring(i, i + PARTICIPLE_LENGTH)
result.add(str)
}
System.out.println(result)
}
}
输出结果:_计, 计算, 算机, 机_
需要commons-io包, 或者自己写读文件的部分import java.io.File
import java.io.IOException
import java.util.ArrayList
import java.util.Collections
import java.util.Comparator
import java.util.List
import java.util.regex.Matcher
import java.util.regex.Pattern
import org.apache.commons.io.FileUtils
public class Test20 {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String str = null
try {
str = FileUtils.readFileToString(new File("e.txt"))
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace()
}
Pattern p = Pattern.compile("\\b[\\w-']+\\b")
Matcher m = p.matcher(str)
List<Word> words = new ArrayList<Word>()
while(m.find()){
add(words, m.group().trim())
}
Collections.sort(words, new Comparator<Word>(){
@Override
public int compare(Word o1, Word o2) {
// TODO Auto-generated method stub
return o1.getWord().compareTo(o2.getWord())
}})
System.out.println(words)
}
private static void add(List<Word> words, String word) {
// TODO Auto-generated method stub
for(Word temp : words){
if(temp.getWord().equals(word)){
temp.setCount(temp.getCount() + 1)
return
}
}
Word w = new Word()
w.setWord(word)
words.add(w)
}
}
class Word{
private String word
private int count = 1
public String getWord() {
return word
}
public void setWord(String word) {
this.word = word
}
public int getCount() {
return count
}
public void setCount(int count) {
this.count = count
}
@Override
public String toString() {
return "Word [word=" + word + ", count=" + count + "]"
}
}
java读取中文分词工具:lingerJava开源中文分词器
1、word分词器
2、Ansj分词器
3、Stanford分词器
4、FudanNLP分词器
5、Jieba分词器
6、Jcseg分词器
7、MMSeg4j分词器
8、IKAnalyzer分词器
9、Paoding分词器
10、smartcn分词器
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)