需求:用户点击完立刻购买的信息
数据源kafka,这里比较重要的一点设置水位Watermarks
import com.alibaba.fastjson.JSON import java.util.Properties import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.cep.pattern.conditions.SimpleCondition import org.apache.flink.cep.scala.CEP import org.apache.flink.cep.scala.pattern.Pattern import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 import org.apache.flink.api.scala._ import org.apache.flink.cep.PatternSelectFunction import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor import org.apache.flink.streaming.api.windowing.time.Time object Model01 { def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) env.setParallelism(1) val properties = new Properties() properties.setProperty("bootstrap.servers", "xxxx:9092") properties.setProperty("group.id", "test") properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") val consumer = new FlinkKafkaConsumer010("topic", new SimpleStringSchema(), properties) consumer.setStartFromLatest() val sourceStream = env.addSource(consumer) val input: DataStream[UserAction] = sourceStream.map(line=>{ val data = JSON.parseObject(line) val distinct_id = data.getString("distinct_id") val event = data.getString("event") val time = data.getLong("time") val project_id = data.getString("project_id") UserAction(project_id, time, distinct_id, event) }) .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[UserAction](Time.seconds(10)) { override def extractTimestamp(t: UserAction): Long = t.timestamp }).keyBy(k=>k.name) val pattern = Pattern.begin[UserAction]("start").where(new SimpleCondition[UserAction] { override def filter(t: UserAction): Boolean = { t.action.equals("click") } }).next("end").where(new SimpleCondition[UserAction] { override def filter(t: UserAction): Boolean = { t.action.equals("buy") } }).within(Time.seconds(10)) val patternStream = CEP.pattern(input, pattern) val result = patternStream.select(new PatternSelectFunction[UserAction, String] { override def select(map: java.util.Map[String, java.util.List[UserAction]]): String = { val start: UserAction = map.get("start").iterator().next() val end: UserAction = map.get("end").iterator().next() s"name: ${start.name}, project_id: ${end.project_id}, timestamp: ${end.timestamp}" } }) result.print() env.execute("CEPTestJson") } case class UserAction(project_id: String, timestamp: Long, name: String, action: String) }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)