Im loading the below JSON string into a dataframe column.
{
"title": {
"titleid": "222",
"titlename": "ABCD"
},
"customer": {
"customerDetail": {
"customerid": 878378743,
"customerstatus": "ACTIVE",
"customersystems": {
"customersystem1": "SYS01",
"customersystem2": null
},
"sysid": null
},
"persons": [{
"personid": "123",
"personname": "IIISKDJKJSD"
},
{
"personid": "456",
"personname": "IUDFIDIKJK"
}]
}
}
val js = spark.read.json("./src/main/resources/json/customer.txt")
println(js.schema)
val newDF = df.select(from_json($"value", js.schema).as("parsed_value"))
newDF.selectExpr("parsed_value.customer.*").show(false)
//Schema:
StructType(StructField(customer,StructType(StructField(customerDetail,StructType(StructField(customerid,LongType,true), StructField(customerstatus,StringType,true), StructField(customersystems,StructType(StructField(customersystem1,StringType,true), StructField(customersystem2,StringType,true)),true), StructField(sysid,StringType,true)),true), StructField(persons,ArrayType(StructType(StructField(personid,StringType,true), StructField(personname,StringType,true)),true),true)),true), StructField(title,StructType(StructField(titleid,StringType,true), StructField(titlename,StringType,true)),true))
//Output:
+------------------------------+---------------------------------------+
|customerDetail |persons |
+------------------------------+---------------------------------------+
|[878378743, ACTIVE, [SYS01,],]|[[123, IIISKDJKJSD], [456, IUDFIDIKJK]]|
+------------------------------+---------------------------------------+
My Question: Is there a way that I can split the key value as a separate dataframe columns like below
by keeping the Array columns as is since I need to have only one record per json string:
Example for customer column:
customer.customerDetail.customerid,customer.customerDetail.customerstatus,customer.customerDetail.customersystems.customersystem1,customer.customerDetail.customersystems.customersystem2,customerid,customer.customerDetail.sysid,customer.persons
878378743,ACTIVE,SYS01,null,null,{"persons": [ { "personid": "123", "personname": "IIISKDJKJSD" }, { "personid": "456", "personname": "IUDFIDIKJK" } ] }