Spark大数据-spark streaming输出操作

spark streaming输出操作其他部分与转换操作一样，只需要添加输出保存部分，保存为文件使用saveAsTextFiles，输出到mysql数据库。object NetworkWordCountStateful {def main(args: Array[String]) {//定义状态更新函数val updateFunc = (values: Seq[Int...

chenbengang

807人浏览 · 2019-12-30 10:25:26

chenbengang · 2019-12-30 10:25:26 发布

spark streaming输出操作

其他部分与转换操作一样，只需要添加输出保存部分，保存为文件使用saveAsTextFiles，输出到mysql数据库。

object NetworkWordCountStateful {
  def main(args: Array[String]) {
    //定义状态更新函数
    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
      val currentCount = values.foldLeft(0)(_ + _)
      val previousCount = state.getOrElse(0)
      Some(currentCount + previousCount)
    }
        StreamingExamples.setStreamingLogLevels()  //设置log4j日志级别
    val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCountStateful")
    val sc = new StreamingContext(conf, Seconds(5))
    sc.checkpoint("home/ziyu_bigdata/quick_learn_spark/checkpoint")    //设置检查点，检查点具有容错机制
    val lines = sc.socketTextStream("localhost", 9999)
    val words = lines.flatMap(_.split(" "))
    val wordDstream = words.map(x => (x, 1))
    val stateDstream = wordDstream.updateStateByKey[Int](updateFunc)
//       结果打印
    stateDstream.print()
      
//       运行的词频统计结果存储到txt文件中
    stateDstream.saveAsTextFiles("home/ziyu_bigdata/quick_learn_spark/DstreamOutput/output.txt")
      
//       怎样写入mysql数据库
    stateDstream.foreachRDD(
    rdd => {
        
        def func(records:Iterator[(String,Int)]){
            var conn:Connection=null
            var stmt:PreparedStatement=null
            try{
                val url="jdbc:mysql://localhost:3306/spark"
                val user="root"
                val password="hadoop"
                conn=DirverManager.getConnection(url,user,password)
                records.foreach(p=>{
                    val sql="inser into wordcount(word,count) values(?,?)"
                    stmt=conn.PrepareStatement(sql)
//                     key
                    stmt.setString(1,p._1.trim)
//                     value
                    stmt.setInt(2,p._2.toInt)
                    stmt.executeUpdate()
                })
                
            }catch{
                case e:Exception => e.ptintStackTrace()
            }finally{
                if(stmt!=null){
                    stmt.close()
                    stmt=null
                }
                if(conn!=null){
                    conn.close()
                    conn=null
                }
            }
        }
        
//         对RDD重新分区，对新分区后的使用func函数处理
        val repartitionedRDD=rdd.repartition(3)
        repartitionedRDD.foreachPartition(func)
        
    })
      
    sc.start()
    sc.awaitTermination()
  }
}

技术共进，成长同行——讯飞AI开发者社区

更多推荐

论文笔记：AlphaEdit: Null-Space Constrained Knowledge Editing for Language Models（AlphaEdit）

论文发表于人工智能顶会ICLR（基于定位和修改的模型编辑方法（针对和等）会破坏LLM中最初保存的知识，特别是在顺序编辑场景。为此，本文提出AlphaEdit：1、在将保留知识应用于参数之前，将扰动投影到保留知识的零空间上。2、从理论上证明，这种预测确保了在查询保留的知识时，编辑后的LLM的输出保持不变，从而减轻中断问题。3、对各种LLM（包括LLaMA3、GPT2XL和GPT-J）的广泛实验表明，