作用在 K,V 格式的 RDD 上。根据 Key 进行分组。作用在(K,V),返回(K, Iterable )。
- java
package transformations; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import scala.Tuple2; import java.util.Arrays; public class GroupByKeyTest { public static void main(String[] args) { JavaSparkContext context = new JavaSparkContext( new SparkConf() .setMaster("local") .setAppName("groupbykey") ); context.setLogLevel("Error"); context.parallelizePairs(Arrays.asList( new Tuple2("科比",24), new Tuple2 ("科比",23), new Tuple2 ("威斯布鲁克",0), new Tuple2 ("保罗",3), new Tuple2 ("保罗",12) )).groupByKey().foreach(e-> System.out.println(e)); } }
- scala
package transformation import org.apache.spark.{SparkConf, SparkContext} object GroupByKeyTest { def main(args: Array[String]): Unit = { val context = new SparkContext( new SparkConf() .setAppName("groupbykey") .setMaster("local") ) context.setLogLevel("Error") context.parallelize(Array[(String,Int)]( ("科比",24), ("科比", 23), ("威斯布鲁克", 0), ("保罗", 3), ("保罗", 12) )).groupByKey().foreach(println) } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)