- 添加依赖
- 基于 Flink 服务提交任务并执行时需要的依赖包
- 构建KafkaSink参数实例
- 构建自定义KafkaMQSink
基于 Flink 服务提交任务并执行时需要的依赖包org.apache.flink flink-connector-kafka_2.121.13.2 provided
基于 flink 服务器提交任务前,先上传依赖包到 flink 的 lib 目录下;然后重启 flink 服务,使 jar 进行加载;否则会出现 ClassNoFoundException 的异常。
- flink-connector-kafka_2.12-1.13.2.jar
- kafka-clients-2.4.1.jar
public class KafkaSink implements Serializable { private static final long serialVersionUID = -6378076276774453062L; private String bootStrapServers; private String groupId; private String productId; private String domain; private String type; private String data; public String getBootStrapServers() { return bootStrapServers; } public String getGroupId() { return groupId; } public String getProductId() { return productId; } public String getDomain() { return domain; } public String getType() { return type; } public String getData() { return data; } public KafkaSink(Object obj) { final JSONObject json = JSONObject.parseObject(obj.toString()); this.bootStrapServers = json.getString("bootStrapServers"); this.groupId = json.getString("groupId"); this.productId = json.getString("productId"); this.domain = json.getString("domain"); this.type = json.getString("type"); if(json.containsKey("data")) { this.data = json.getString("data"); } } }构建自定义KafkaMQSink
基于FlinkKafkaProducer< T > 类实现KafkaSink,其中KafkaSerializationSchema< T >类型是用于数据序列化的,可以将数据组装成你想要的方式然后发送出去。
如果数据是String类型的可以直接用 SimpleStringSchema() 自动进行序列化即可。
import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Properties; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.kafka.clients.CommonClientConfigs; import org.apache.kafka.clients.producer.ProducerRecord; import com.ygsoft.dataprocess.vo.sink.KafkaSink; public class KafkaPropertySink implements Serializable { private static final long serialVersionUID = -7477350968706636648L; private FlinkKafkaProducer
import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.Map; import java.util.Properties; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.kafka.clients.CommonClientConfigs; import org.apache.kafka.clients.producer.ProducerRecord; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.ygsoft.dataprocess.vo.sink.KafkaSink; public class KafkaEventSink implements Serializable { private static final long serialVersionUID = -8333995037065268493L; private FlinkKafkaProducer> producer; public KafkaEventSink(final KafkaSink sinkParams) { Properties properties = new Properties(); properties.setProperty(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, sinkParams.getBootStrapServers()); // Flink Kafka Consumer 支持发现动态创建的 Kafka 分区,并使用精准一次的语义保证去消耗它们 properties.setProperty("flink.partition-discovery.interval-millis", "10000"); final KafkaSerializationSchema > deserializer = new KafkaSerializationSchema >(){ private static final long serialVersionUID = 115722639942583321L; @Override public ProducerRecord serialize(Map element, Long timestamp) { final String topic = element.get("topic").replace("property", "event"); final JSONArray events = new JSONArray(); final JSONObject event = JSONObject.parseObject(sinkParams.getData()); final JSONArray propertys = JSONArray.parseArray(element.get("value").toString()); for(int i=0; i (topic, events.toString().getBytes(StandardCharsets.UTF_8)); } }; producer = new FlinkKafkaProducer<>("default", deserializer, properties, FlinkKafkaProducer.Semantic.NONE); } public FlinkKafkaProducer > getProducer() { return producer; } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)