需要数据集可以给我留言
object linner { def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local[*]").setAppName("linner") val spark = SparkSession.builder().config(conf).getOrCreate() val file = spark.read.format("csv").option("sep",";").option("header","true").load("house.csv") import spark.implicits._ //生成随机数 val random = new util.Random() val data = file.select("square","price").map(rows => (rows.getAs[String](0).toDouble,rows.getString(1) .toDouble,random.nextDouble())).toDF("square","price","random") .sort("random") //类似封装成 数组 val assembler = new VectorAssembler() .setInputCols(Array("square")) .setOutputCol("features") val frame = assembler.transform(data) //把数据集拆分2个部分 val Array(train,test) = frame.randomSplit(Array(0.8,0.2),1L) //创建线性回归的示例 val regression = new LinearRegression() .setMaxIter(10)//训练轮次 .setRegParam(0.3)//正则化 .setElasticNetParam(0.8)//推荐值 //Features 特征向量 label 标签 val model = regression.setLabelCol("price").setFeaturesCol("features").fit(train) model.transform(test).show() spark.stop() }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)