In [2]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.1.1
org.apache.spark spark-sql_2.11 2.1.1
In [3]:
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
val spark = SparkSession
.builder()
.master("local")
.appName("Spark SQL basic example")
.config("spark.some.config.option", "some-value")
.getOrCreate()
import spark.implicits._
Out[3]:
An empty DataFrame with 1 column.
In [4]:
val s = Seq.empty[String]
val df = s.toDF("x")
df.show
Out[4]:
The size of an empty DataFrame is 0 of course.
In [5]:
df.count
Out[5]:
Write the empty DataFrame into a file.
In [5]:
df.write.mode("overwrite").csv("empty.csv")
Out[5]:
An empty DataFrame with no rows or columns.
In [6]:
val df2 = spark.emptyDataFrame
df2.show
Out[6]:
The size of an empty DataFrame is 0 of course.
In [7]:
df2.count
Out[7]:
Write the empty DataFrame into a file.
In [7]:
df2.write.mode("overwrite").csv("empty2.csv")
Out[7]:
Add a Column into an Empty DataFrame¶
The resulting DataFrame is still empty but with one more column.
In [8]:
import org.apache.spark.sql.functions._
df.withColumn("y", lit(1)).show
Out[8]:
In [ ]: