RDD = sc.parallelize([ (1,"paul"),(2,"anne"),
    (1,"emile"),(2,"marie"),(1,"victor") ])
print RDD.reduceByKey(lambda a,b: a+"-"+b).collect()