Skip to content

Commit e5467c9

Browse files
authored
feat(bigquery): Add support for custom timezones and timestamps (#3859)
* feat(bigquery): Add support for custom timezones and timestamps * update ignored-diff * fix data -> date typo * Add enums for SourceColumnMatch * Change null markers test names * change enums to StringEnumValue
1 parent f081589 commit e5467c9

File tree

5 files changed

+525
-1
lines changed

5 files changed

+525
-1
lines changed

‎google-cloud-bigquery/clirr-ignored-differences.xml‎

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,76 @@
22
<!-- see http://www.mojohaus.org/clirr-maven-plugin/examples/ignored-differences.html -->
33
<differences>
44
<!-- TODO: REMOVE AFTER RELEASE -->
5+
<difference>
6+
<differenceType>7013</differenceType>
7+
<className>com/google/cloud/bigquery/ExternalTableDefinition*</className>
8+
<method>*TimeZone(*)</method>
9+
</difference>
10+
<difference>
11+
<differenceType>7013</differenceType>
12+
<className>com/google/cloud/bigquery/ExternalTableDefinition*</className>
13+
<method>*DateFormat(*)</method>
14+
</difference>
15+
<difference>
16+
<differenceType>7013</differenceType>
17+
<className>com/google/cloud/bigquery/ExternalTableDefinition*</className>
18+
<method>*DatetimeFormat(*)</method>
19+
</difference>
20+
<difference>
21+
<differenceType>7013</differenceType>
22+
<className>com/google/cloud/bigquery/ExternalTableDefinition*</className>
23+
<method>*TimeFormat(*)</method>
24+
</difference>
25+
<difference>
26+
<differenceType>7013</differenceType>
27+
<className>com/google/cloud/bigquery/ExternalTableDefinition*</className>
28+
<method>*TimestampFormat(*)</method>
29+
</difference>
30+
<difference>
31+
<differenceType>7013</differenceType>
32+
<className>com/google/cloud/bigquery/ExternalTableDefinition*</className>
33+
<method>*SourceColumnMatch(*)</method>
34+
</difference>
35+
<difference>
36+
<differenceType>7013</differenceType>
37+
<className>com/google/cloud/bigquery/ExternalTableDefinition*</className>
38+
<method>*NullMarkers(*)</method>
39+
</difference>
40+
<difference>
41+
<differenceType>7013</differenceType>
42+
<className>com/google/cloud/bigquery/LoadJobConfiguration*</className>
43+
<method>*TimeZone(*)</method>
44+
</difference>
45+
<difference>
46+
<differenceType>7013</differenceType>
47+
<className>com/google/cloud/bigquery/LoadJobConfiguration*</className>
48+
<method>*DateFormat(*)</method>
49+
</difference>
50+
<difference>
51+
<differenceType>7013</differenceType>
52+
<className>com/google/cloud/bigquery/LoadJobConfiguration*</className>
53+
<method>*DatetimeFormat(*)</method>
54+
</difference>
55+
<difference>
56+
<differenceType>7013</differenceType>
57+
<className>com/google/cloud/bigquery/LoadJobConfiguration*</className>
58+
<method>*TimeFormat(*)</method>
59+
</difference>
60+
<difference>
61+
<differenceType>7013</differenceType>
62+
<className>com/google/cloud/bigquery/LoadJobConfiguration*</className>
63+
<method>*TimestampFormat(*)</method>
64+
</difference>
65+
<difference>
66+
<differenceType>7013</differenceType>
67+
<className>com/google/cloud/bigquery/LoadJobConfiguration*</className>
68+
<method>*SourceColumnMatch(*)</method>
69+
</difference>
70+
<difference>
71+
<differenceType>7013</differenceType>
72+
<className>com/google/cloud/bigquery/LoadJobConfiguration*</className>
73+
<method>*NullMarkers(*)</method>
74+
</difference>
575
<difference>
676
<differenceType>7004</differenceType>
777
<className>com/google/cloud/bigquery/BigQueryRetryHelper</className>

‎google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java‎

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919
import static com.google.common.base.Preconditions.checkArgument;
2020
import static com.google.common.base.Strings.isNullOrEmpty;
2121

22+
import com.google.api.core.ApiFunction;
2223
import com.google.api.services.bigquery.model.ExternalDataConfiguration;
2324
import com.google.api.services.bigquery.model.Table;
2425
import com.google.auto.value.AutoValue;
26+
import com.google.cloud.StringEnumType;
27+
import com.google.cloud.StringEnumValue;
2528
import com.google.common.base.Function;
2629
import com.google.common.collect.ImmutableList;
2730
import java.util.List;
@@ -57,6 +60,46 @@ public ExternalDataConfiguration apply(ExternalTableDefinition tableInfo) {
5760

5861
private static final long serialVersionUID = -5951580238459622025L;
5962

63+
public static final class SourceColumnMatch extends StringEnumValue {
64+
private static final long serialVersionUID = 818920627219751207L;
65+
private static final ApiFunction<String, SourceColumnMatch> CONSTRUCTOR =
66+
new ApiFunction<String, SourceColumnMatch>() {
67+
@Override
68+
public SourceColumnMatch apply(String constant) {
69+
return new SourceColumnMatch(constant);
70+
}
71+
};
72+
73+
private static final StringEnumType<SourceColumnMatch> type =
74+
new StringEnumType<SourceColumnMatch>(SourceColumnMatch.class, CONSTRUCTOR);
75+
76+
public static final SourceColumnMatch POSITION = type.createAndRegister("POSITION");
77+
78+
public static final SourceColumnMatch NAME = type.createAndRegister("NAME");
79+
80+
private SourceColumnMatch(String constant) {
81+
super(constant);
82+
}
83+
84+
/**
85+
* Get the SourceColumnMatch for the given String constant, and throw an exception if the
86+
* constant is not recognized.
87+
*/
88+
public static SourceColumnMatch valueOfStrict(String constant) {
89+
return type.valueOfStrict(constant);
90+
}
91+
92+
/** Get the SourceColumnMatch for the given String constant, and allow unrecognized values. */
93+
public static SourceColumnMatch valueOf(String constant) {
94+
return type.valueOf(constant);
95+
}
96+
97+
/** Return the known values for SourceColumnMatch. */
98+
public static SourceColumnMatch[] values() {
99+
return type.values();
100+
}
101+
}
102+
60103
@AutoValue.Builder
61104
public abstract static class Builder
62105
extends TableDefinition.Builder<ExternalTableDefinition, Builder> {
@@ -215,6 +258,45 @@ public Builder setMaxStaleness(String maxStaleness) {
215258

216259
abstract Builder setMaxStalenessInner(String maxStaleness);
217260

261+
/**
262+
* Time zone used when parsing timestamp values that do not have specific time zone information
263+
* (e.g. 2024-04-20 12:34:56). The expected format is a IANA timezone string (e.g.
264+
* America/Los_Angeles).
265+
*/
266+
public abstract Builder setTimeZone(String timeZone);
267+
268+
/** Format used to parse DATE values. Supports C-style and SQL-style values. */
269+
public abstract Builder setDateFormat(String dateFormat);
270+
271+
/** Format used to parse DATETIME values. Supports C-style and SQL-style values. */
272+
public abstract Builder setDatetimeFormat(String datetimeFormat);
273+
274+
/** Format used to parse TIME values. Supports C-style and SQL-style values. */
275+
public abstract Builder setTimeFormat(String timeFormat);
276+
277+
/** Format used to parse TIMESTAMP values. Supports C-style and SQL-style values. */
278+
public abstract Builder setTimestampFormat(String timestampFormat);
279+
280+
/**
281+
* Controls the strategy used to match loaded columns to the schema. If not set, a sensible
282+
* default is chosen based on how the schema is provided. If autodetect is used, then columns
283+
* are matched by name. Otherwise, columns are matched by position. This is done to keep the
284+
* behavior backward-compatible. Acceptable values are: POSITION - matches by position. This
285+
* assumes that the columns are ordered the same way as the schema. NAME - matches by name. This
286+
* reads the header row as column names and reorders columns to match the field names in the
287+
* schema.
288+
*/
289+
public abstract Builder setSourceColumnMatch(SourceColumnMatch sourceColumnMatch);
290+
291+
/**
292+
* A list of strings represented as SQL NULL value in a CSV file. null_marker and null_markers
293+
* can't be set at the same time. If null_marker is set, null_markers has to be not set. If
294+
* null_markers is set, null_marker has to be not set. If both null_marker and null_markers are
295+
* set at the same time, a user error would be thrown. Any strings listed in null_markers,
296+
* including empty string would be interpreted as SQL NULL. This applies to all column types.
297+
*/
298+
public abstract Builder setNullMarkers(List<String> nullMarkers);
299+
218300
/** Creates an {@code ExternalTableDefinition} object. */
219301
@Override
220302
public abstract ExternalTableDefinition build();
@@ -373,6 +455,37 @@ public HivePartitioningOptions getHivePartitioningOptions() {
373455
return getHivePartitioningOptionsInner();
374456
}
375457

458+
/**
459+
* Returns the time zone used when parsing timestamp values that don't have specific time zone
460+
* information.
461+
*/
462+
@Nullable
463+
public abstract String getTimeZone();
464+
465+
/** Returns the format used to parse DATE values. */
466+
@Nullable
467+
public abstract String getDateFormat();
468+
469+
/** Returns the format used to parse DATETIME values. */
470+
@Nullable
471+
public abstract String getDatetimeFormat();
472+
473+
/** Returns the format used to parse TIME values. */
474+
@Nullable
475+
public abstract String getTimeFormat();
476+
477+
/** Returns the format used to parse TIMESTAMP values. */
478+
@Nullable
479+
public abstract String getTimestampFormat();
480+
481+
/** Returns the strategy used to match loaded columns to the schema, either POSITION or NAME. */
482+
@Nullable
483+
public abstract SourceColumnMatch getSourceColumnMatch();
484+
485+
/** Returns a list of strings represented as SQL NULL value in a CSV file. */
486+
@Nullable
487+
public abstract List<String> getNullMarkers();
488+
376489
@Nullable
377490
abstract HivePartitioningOptions getHivePartitioningOptionsInner();
378491

@@ -454,6 +567,29 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC
454567
if (getMetadataCacheMode() != null) {
455568
externalConfigurationPb.setMetadataCacheMode(getMetadataCacheMode());
456569
}
570+
if (getTimeZone() != null) {
571+
externalConfigurationPb.setTimeZone(getTimeZone());
572+
}
573+
if (getDateFormat() != null) {
574+
externalConfigurationPb.setDateFormat(getDateFormat());
575+
}
576+
if (getDatetimeFormat() != null) {
577+
externalConfigurationPb.setDatetimeFormat(getDatetimeFormat());
578+
}
579+
if (getTimeFormat() != null) {
580+
externalConfigurationPb.setTimeFormat(getTimeFormat());
581+
}
582+
if (getTimestampFormat() != null) {
583+
externalConfigurationPb.setTimestampFormat(getTimestampFormat());
584+
}
585+
if (getSourceColumnMatch() != null) {
586+
externalConfigurationPb
587+
.getCsvOptions()
588+
.setSourceColumnMatch(getSourceColumnMatch().toString());
589+
}
590+
if (getNullMarkers() != null) {
591+
externalConfigurationPb.getCsvOptions().setNullMarkers(getNullMarkers());
592+
}
457593

458594
return externalConfigurationPb;
459595
}
@@ -654,6 +790,31 @@ static ExternalTableDefinition fromPb(Table tablePb) {
654790
if (tablePb.getMaxStaleness() != null) {
655791
builder.setMaxStaleness(tablePb.getMaxStaleness());
656792
}
793+
if (externalDataConfiguration.getTimeZone() != null) {
794+
builder.setTimeZone(externalDataConfiguration.getTimeZone());
795+
}
796+
if (externalDataConfiguration.getDateFormat() != null) {
797+
builder.setDateFormat(externalDataConfiguration.getDateFormat());
798+
}
799+
if (externalDataConfiguration.getDatetimeFormat() != null) {
800+
builder.setDatetimeFormat(externalDataConfiguration.getDatetimeFormat());
801+
}
802+
if (externalDataConfiguration.getTimeFormat() != null) {
803+
builder.setTimeFormat(externalDataConfiguration.getTimeFormat());
804+
}
805+
if (externalDataConfiguration.getTimestampFormat() != null) {
806+
builder.setTimestampFormat(externalDataConfiguration.getTimestampFormat());
807+
}
808+
if (externalDataConfiguration.getCsvOptions() != null) {
809+
if (externalDataConfiguration.getCsvOptions().getSourceColumnMatch() != null) {
810+
builder.setSourceColumnMatch(
811+
SourceColumnMatch.valueOf(
812+
externalDataConfiguration.getCsvOptions().getSourceColumnMatch()));
813+
}
814+
if (externalDataConfiguration.getCsvOptions().getNullMarkers() != null) {
815+
builder.setNullMarkers(externalDataConfiguration.getCsvOptions().getNullMarkers());
816+
}
817+
}
657818
}
658819
return builder.build();
659820
}
@@ -724,6 +885,31 @@ static ExternalTableDefinition fromExternalDataConfiguration(
724885
if (externalDataConfiguration.getMetadataCacheMode() != null) {
725886
builder.setMetadataCacheMode(externalDataConfiguration.getMetadataCacheMode());
726887
}
888+
if (externalDataConfiguration.getTimeZone() != null) {
889+
builder.setTimeZone(externalDataConfiguration.getTimeZone());
890+
}
891+
if (externalDataConfiguration.getDateFormat() != null) {
892+
builder.setDateFormat(externalDataConfiguration.getDateFormat());
893+
}
894+
if (externalDataConfiguration.getDatetimeFormat() != null) {
895+
builder.setDatetimeFormat(externalDataConfiguration.getDatetimeFormat());
896+
}
897+
if (externalDataConfiguration.getTimeFormat() != null) {
898+
builder.setTimeFormat(externalDataConfiguration.getTimeFormat());
899+
}
900+
if (externalDataConfiguration.getTimestampFormat() != null) {
901+
builder.setTimestampFormat(externalDataConfiguration.getTimeFormat());
902+
}
903+
if (externalDataConfiguration.getCsvOptions() != null) {
904+
if (externalDataConfiguration.getCsvOptions().getSourceColumnMatch() != null) {
905+
builder.setSourceColumnMatch(
906+
SourceColumnMatch.valueOf(
907+
externalDataConfiguration.getCsvOptions().getSourceColumnMatch()));
908+
}
909+
if (externalDataConfiguration.getCsvOptions().getNullMarkers() != null) {
910+
builder.setNullMarkers(externalDataConfiguration.getCsvOptions().getNullMarkers());
911+
}
912+
}
727913

728914
return builder.build();
729915
}

0 commit comments

Comments
 (0)