spark图计算简单来说就是构建边集合,点集合,然后把边集合,点集合放到graphx中进行计算。 下面我用scala写一下spark图计算的demo。
package sparksql import org.apache.spark.graphx.{Edge, Graph} import org.apache.spark.{SparkConf, SparkContext} object sparkgraphx { //spark图计算demo def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("test").setMaster("local[*]") val sc = new SparkContext(conf) //定点 val vertexArray = Array( (1L, ("Alice", 38)), (2L, ("Henry", 27)), (3L, ("Charlie", 55)), (4L, ("Peter", 32)), (5L, ("Mike", 35)), (6L, ("Kate", 23)) ) //边 val edgeArray = Array( Edge(2L, 1L, 5), Edge(2L, 4L, 2), Edge(3L, 2L, 7), Edge(3L, 6L, 3), Edge(4L, 1L, 1), Edge(5L, 2L, 3), Edge(5L, 3L, 8), Edge(5L, 6L, 8) ) val vertexRdd = sc.parallelize(vertexArray) val edgeRdd = sc.parallelize(edgeArray) val graph = Graph(vertexRdd, edgeRdd) println("年龄大于20的人") graph.vertices.filter(v => v._2._2 > 20).collect.foreach { v => println(s"${v._2._1} is ${v._2._2}") } println("图中属性大于3的边") graph.edges.filter(e => e.attr > 3).collect .foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}")) println("triplet操作") for (t<-graph.triplets.filter(s=>s.attr>3).collect){ println(s"${t.srcAttr._1} likes ${t.dstAttr._1}") } } }