1)方式一
object TestWordCount {
def main(args: Array[String]): Unit = {
// 第一种方式(不通用)
val tupleList = List(("Hello Scala Spark World ", 4), ("Hello Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
val stringList: List[String] = tupleList.map(t=>(t._1 + " ") * t._2)
//val words: List[String] = stringList.flatMap(s=>s.split(" "))
val words: List[String] = stringList.flatMap(_.split(" "))
//在map中,如果传进来什么就返回什么,不要用_省略
val groupMap: Map[String, List[String]] = words.groupBy(word=>word)
//val groupMap: Map[String, List[String]] = words.groupBy(_)
// (word, list) => (word, count)
val wordToCount: Map[String, Int] = groupMap.map(t=>(t._1, t._2.size))
val wordCountList: List[(String, Int)] = wordToCount.toList.sortWith {
(left, right) => {
left._2 > right._2
}
}.take(3)
//tupleList.map(t=>(t._1 + " ") * t._2).flatMap(_.split(" ")).groupBy(word=>word).map(t=>(t._1, t._2.size))
println(wordCountList)
}
}
2)方式二
object TestWordCount {
def main(args: Array[String]): Unit = {
val tuples = List(("Hello Scala Spark World", 4), ("Hello Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
// (Hello,4),(Scala,4),(Spark,4),(World,4)
// (Hello,3),(Scala,3),(Spark,3)
// (Hello,2),(Scala,2)
// (Hello,1)
val wordToCountList: List[(String, Int)] = tuples.flatMap {
t => {
val strings: Array[String] = t._1.split(" ")
strings.map(word => (word, t._2))
}
}
// Hello, List((Hello,4), (Hello,3), (Hello,2), (Hello,1))
// Scala, List((Scala,4), (Scala,3), (Scala,2)
// Spark, List((Spark,4), (Spark,3)
// Word, List((Word,4))
val wordToTupleMap: Map[String, List[(String, Int)]] = wordToCountList.groupBy(t=>t._1)
val stringToInts: Map[String, List[Int]] = wordToTupleMap.mapValues {
datas => datas.map(t => t._2)
}
stringToInts
}
}
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)