import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.recommendation.ALS; import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; import org.apache.spark.mllib.recommendation.Rating; import scala.Tuple2; public class myAls { public static void main(String[] args) { // TODO Auto-generated method stub SparkConf conf=new SparkConf().setAppName("als").setMaster("local"); JavaSparkContext sc=new JavaSparkContext(conf); JavaRDDcon=sc.textFile("file:///home/gyq/下载/spark-2.3.2-bin-hadoop2.7/data/mllib/als/sample.data"); JavaRDD ratings=con.map(f->{ return new Rating( new Integer(f.split("::")[0]), new Integer(f.split("::")[1]), new Double(f.split("::")[2])); });//数据转换为javardd三元组 JavaRDD [] rr=ratings.randomSplit(new double[]{0.3,0.7}); MatrixFactorizationModel model=ALS.train(rr[0].rdd(), 15, 10);//生成模型 JavaRDD > up =rr[1].map(f->{//取0.7的数据要预测的用户和产品ID return new Tuple2<>(f.user(),f.product()); }); JavaPairRDD pupr= model.predict(up.rdd()).toJavaRDD().mapToPair(f->{ return new Tuple2 (f.user()+","+f.product(),f.rating());//用0.7的数据去预测得到一个键值对 }); //pupr.foreach(f->System.out.println(f+"gg")); JavaPairRDD upr=rr[1].mapToPair(f->{//原来0.7的数据转化为键值对 return new Tuple2 (f.user()+","+f.product(),f.rating()); }); //upr.foreach(f->System.out.println(f+"ss")); JavaPairRDD > mm=upr.join(pupr); //输出格式为((用户,项目),(预测评分,实际评分)) //mm.foreach(f->System.out.println(f+"qq")); //model.save(sc.sc(),"file:///home/gyq/下载/spark-2.3.2-bin-hadoop2.7/data/mllib/als/myals"); JavaPairRDD > recom=mm.filter(f->{ double f2_2=f._2._2; double f2_1=f._2._1; int a=(int)f2_1; int b=(int)f2_2; if (a==b){ return true;} else return false; }); double count=recom.count(); double counts=mm.count(); double accur=count/counts; System.out.println("count="+count); System.out.println("counts="+counts); System.out.println("accur="+accur); sc.stop(); } }
数据类似这种:用户 产品 评分 时间戳
利用自带的函数:
ALS.train(data,rank,iterations,lambda)
各参数意义:
ALS.train(数据,维度,迭代次数,正则化参数)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)