使用python训练模型生成PMML文件,然后用spark读取
import com.ubiai.zhyx.utils.SparkHelper
import org.apache.spark.ml.Transformer
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.jpmml.evaluator.spark.TransformerBuilder
import org.jpmml.evaluator.{LoadingModelEvaluatorBuilder, ModelEvaluator}
import java.io.InputStream
object FeaturesTransformer {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkHelper.getSparkSession("dev", "pmmlTest")
val frame1: DataFrame = spark.sql("这里是经过计算的得到的特征数据")
// pmml文件放在resources里面即可
val pmml: InputStream = getClass.getClassLoader.getResourceAsStream("xgb-test-20220418.pmml")
val evaluator: ModelEvaluator[_] = new LoadingModelEvaluatorBuilder().load(pmml).build()
val pmmlTransformer: Transformer = new TransformerBuilder(evaluator)
// .withTargetCols
.withOutputCols
.exploded(true)
.build()
// Create the transformer
val frame: DataFrame = pmmlTransformer.transform(frame1)
frame.select("vid", "date", "predicted_score").show()
}
}
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)