In [1]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.3.1
org.apache.spark spark-sql_2.11 2.3.1
In [2]:
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
val spark = SparkSession.builder()
.master("local[2]")
.appName("Spark Column Example")
.config("spark.some.config.option", "some-value")
.getOrCreate()
import spark.implicits._
Out[2]:
In [3]:
val df = spark.read.json("../../data/people.json")
df.show
Out[3]:
Get Number of Partitions¶
In [4]:
df.rdd.getNumPartitions
Out[4]:
Repartition¶
In [6]:
val df2 = df.repartition(4)
Out[6]:
In [7]:
df2.rdd.getNumPartitions
Out[7]:
In [ ]: