map(func) |
reduceByKey(func, [numTasks]) |
filter(func) |
aggregateByKey(zeroValue)(seqOp, combOp, [numTasks]) |
flatMap(func) |
sortByKey([ascending], [numTasks]) |
mapPartitions(func) |
join(otherDataset, [numTasks]) |
mapPartitionsWithIndex(func) |
cogroup(otherDataset, [numTasks]) |
sample(withReplacement, fraction, seed) |
cartesian(otherDataset) |
union(otherDataset) |
pipe(command, [envVars]) |
intersection(otherDataset) |
coalesce(numPartitions) |
distinct([numTasks])) |
repartition(numPartitions) |
groupByKey([numTasks]) |
repartitionAndSortWithinPartitions(partitioner) |