RDD1 = sc.parallelize([ (1,"tintin"),(2,"asterix"),(3,"spirou") ]) RDD2 = sc.parallelize([ (1,1930),(2,1961),(1,1931),(4,1974) ]) print RDD1.join(RDD2).collect()