Skip to content

Commit c423c91

Browse files
feat: new sample - load: GCS parquet replace existing table (#101)
1 parent 7fedacd commit c423c91

File tree

2 files changed

+152
-0
lines changed

2 files changed

+152
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
// [START bigquery_load_table_gcs_parquet_truncate]
20+
21+
import com.google.cloud.bigquery.BigQuery;
22+
import com.google.cloud.bigquery.BigQueryException;
23+
import com.google.cloud.bigquery.BigQueryOptions;
24+
import com.google.cloud.bigquery.FormatOptions;
25+
import com.google.cloud.bigquery.Job;
26+
import com.google.cloud.bigquery.JobInfo;
27+
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
28+
import com.google.cloud.bigquery.LoadJobConfiguration;
29+
import com.google.cloud.bigquery.TableId;
30+
import java.math.BigInteger;
31+
32+
public class LoadParquetReplaceTable {
33+
34+
public static void runLoadParquetReplaceTable() {
35+
// TODO(developer): Replace these variables before running the sample.
36+
String datasetName = "MY_DATASET_NAME";
37+
loadParquetReplaceTable(datasetName);
38+
}
39+
40+
public static void loadParquetReplaceTable(String datasetName) {
41+
try {
42+
// Initialize client that will be used to send requests. This client only needs to be created
43+
// once, and can be reused for multiple requests.
44+
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
45+
46+
// Imports a GCS file into a table and overwrites table data if table already exists.
47+
// This sample loads CSV file at:
48+
// https://storage.googleapis.com/cloud-samples-data/bigquery/us-states/us-states.csv
49+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet";
50+
TableId tableId = TableId.of(datasetName, "us_states");
51+
52+
// For more information on LoadJobConfiguration see:
53+
// https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/LoadJobConfiguration.Builder.html
54+
LoadJobConfiguration configuration =
55+
LoadJobConfiguration.builder(tableId, sourceUri)
56+
.setFormatOptions(FormatOptions.parquet())
57+
// Set the write disposition to overwrite existing table data.
58+
.setWriteDisposition(WriteDisposition.WRITE_TRUNCATE)
59+
.build();
60+
61+
// For more information on Job see:
62+
// https://googleapis.dev/java/google-cloud-clients/latest/index.html?com/google/cloud/bigquery/package-summary.html
63+
// Load the table
64+
Job job = bigquery.create(JobInfo.of(configuration));
65+
66+
// Load data from a GCS parquet file into the table
67+
// Blocks until this load table job completes its execution, either failing or succeeding.
68+
Job completedJob = job.waitFor();
69+
if (completedJob == null) {
70+
System.out.println("Job not executed since it no longer exists.");
71+
return;
72+
} else if (completedJob.getStatus().getError() != null) {
73+
System.out.println(
74+
"BigQuery was unable to load into the table due to an error: \n"
75+
+ job.getStatus().getError());
76+
return;
77+
}
78+
79+
// Check number of rows loaded into the table
80+
BigInteger numRows = bigquery.getTable(tableId).getNumRows();
81+
System.out.printf("Loaded %d rows. \n", numRows);
82+
83+
System.out.println("GCS parquet overwrote existing table successfully.");
84+
} catch (BigQueryException | InterruptedException e) {
85+
System.out.println("Table extraction job was interrupted. \n" + e.toString());
86+
}
87+
}
88+
}
89+
// [END bigquery_load_table_gcs_parquet_truncate]
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
import static junit.framework.TestCase.assertNotNull;
21+
22+
import java.io.ByteArrayOutputStream;
23+
import java.io.PrintStream;
24+
import org.junit.After;
25+
import org.junit.Before;
26+
import org.junit.BeforeClass;
27+
import org.junit.Test;
28+
29+
public class LoadParquetReplaceTableIT {
30+
private ByteArrayOutputStream bout;
31+
private PrintStream out;
32+
33+
private static final String BIGQUERY_DATASET_NAME = System.getenv("BIGQUERY_DATASET_NAME");
34+
35+
private static void requireEnvVar(String varName) {
36+
assertNotNull(
37+
"Environment variable " + varName + " is required to perform these tests.",
38+
System.getenv(varName));
39+
}
40+
41+
@BeforeClass
42+
public static void checkRequirements() {
43+
requireEnvVar("BIGQUERY_DATASET_NAME");
44+
}
45+
46+
@Before
47+
public void setUp() {
48+
bout = new ByteArrayOutputStream();
49+
out = new PrintStream(bout);
50+
System.setOut(out);
51+
}
52+
53+
@After
54+
public void tearDown() {
55+
System.setOut(null);
56+
}
57+
58+
@Test
59+
public void testLoadParquetReplaceTable() {
60+
LoadParquetReplaceTable.loadParquetReplaceTable(BIGQUERY_DATASET_NAME);
61+
assertThat(bout.toString()).contains("GCS parquet overwrote existing table successfully.");
62+
}
63+
}

0 commit comments

Comments
 (0)