Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions google/cloud/bigquery/external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,23 @@ def schema(self, value):
prop = {"fields": [field.to_api_repr() for field in value]}
self._properties["schema"] = prop

@property
def time_zone(self) -> Optional[str]:
"""Optional[str]: Time zone used when parsing timestamp values that do not
have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
format is an IANA timezone string (e.g. America/Los_Angeles).

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
"""

result = self._properties.get("timeZone")
return typing.cast(str, result)

@time_zone.setter
def time_zone(self, value: Optional[str]):
self._properties["timeZone"] = value

@property
def connection_id(self):
"""Optional[str]: [Experimental] ID of a BigQuery Connection API
Expand Down
21 changes: 21 additions & 0 deletions google/cloud/bigquery/job/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,20 @@ def source_format(self):
def source_format(self, value):
self._set_sub_prop("sourceFormat", value)

@property
def time_zone(self) -> Optional[str]:
"""Optional[str]: Default time zone that will apply when parsing timestamp
values that have no specific time zone.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone
"""
return self._get_sub_prop("timeZone")

@time_zone.setter
def time_zone(self, value: Optional[str]):
self._set_sub_prop("timeZone", value)

@property
def time_partitioning(self):
"""Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based
Expand Down Expand Up @@ -889,6 +903,13 @@ def clustering_fields(self):
"""
return self.configuration.clustering_fields

@property
def time_zone(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`.
"""
return self.configuration.time_zone

@property
def schema_update_options(self):
"""See
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/job/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,14 @@ def _setUpConstants(self):
self.OUTPUT_ROWS = 345
self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference"

self.TIME_ZONE = "UTC"

def _make_resource(self, started=False, ended=False):
resource = super(TestLoadJob, self)._make_resource(started, ended)
config = resource["configuration"]["load"]
config["sourceUris"] = [self.SOURCE1]

config["timeZone"] = self.TIME_ZONE
config["destinationTable"] = {
"projectId": self.PROJECT,
"datasetId": self.DS_ID,
Expand Down Expand Up @@ -152,6 +156,10 @@ def _verifyResourceProperties(self, job, resource):
)
else:
self.assertIsNone(job.destination_encryption_configuration)
if "timeZone" in config:
self.assertEqual(job.time_zone, config["timeZone"])
else:
self.assertIsNone(job.time_zone)

def test_ctor(self):
client = _make_client(project=self.PROJECT)
Expand Down Expand Up @@ -195,6 +203,8 @@ def test_ctor(self):
self.assertIsNone(job.schema_update_options)
self.assertIsNone(job.reference_file_schema_uri)

self.assertIsNone(job.time_zone)

def test_ctor_w_config(self):
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.job import LoadJobConfig
Expand Down Expand Up @@ -431,6 +441,24 @@ def test_from_api_repr_w_properties(self):
self.assertIs(job._client, client)
self._verifyResourceProperties(job, RESOURCE)

def test_to_api_repr(self):
self._setUpConstants()
client = _make_client(project=self.PROJECT)
RESOURCE = self._make_resource(ended=False)

klass = self._get_target_class()
job = klass.from_api_repr(RESOURCE, client)
api_repr = job.to_api_repr()

# as per the documentation in load.py -> LoadJob.to_api_repr(),
# the return value from to_api_repr should not include statistics
expected = {
"jobReference": RESOURCE["jobReference"],
"configuration": RESOURCE["configuration"],
}

self.assertEqual(api_repr, expected)

def test_begin_w_already_running(self):
conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
Expand Down Expand Up @@ -571,6 +599,7 @@ def test_begin_w_alternate_client(self):
]
},
"schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION],
"timeZone": self.TIME_ZONE,
}
RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION
conn1 = make_connection()
Expand Down Expand Up @@ -599,6 +628,9 @@ def test_begin_w_alternate_client(self):
config.write_disposition = WriteDisposition.WRITE_TRUNCATE
config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION]
config.reference_file_schema_uri = "gs://path/to/reference"

config.time_zone = self.TIME_ZONE

with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
Expand Down
127 changes: 127 additions & 0 deletions tests/unit/job/test_load_config.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to https://github.com/googleapis/python-bigquery/pull/2229/files#r2191183829, I think in general we need to test against to/from_api_repr and the property itself. The same goes for LoadJobConfig and LoadJob. Could you add these changes? Otherwise the PR looks good in general.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pattern/philosophy for test_load_config.py is different than the approach used in test_external_config.py

Here, the author chose to test each attribute in more of a stand alone fashion using a trifecta of tests:

  • x_missing()
  • x_hit()
  • x_setter()

But:
the author did not test to_api_repr() or from_api_repr() directly (I can add such a pair of tests).

Examples:

  • def test_write_disposition_missing()
  • def test_write_disposition_hit()
  • def test_write_disposition_setter()
  • def test_time_zone_missing()
  • def test_time_zone_hit()
  • def test_time_zone_setter()

missing

Confirms that when the target class is created, the attribute is not populated

self.assertIsNone(config.write_disposition)`

hit

Sets (behind the scenes using ._properties) the value of the attribute
Confirms that the getter functions correctly and that the expected/result values match

config._properties["load"]["writeDisposition"] = write_disposition
self.assertEqual(config.write_disposition, write_disposition)

setter

Sets the value of the attribute directly using the setter function
Confirms that the setter functions correctly by examining the _properties object directly

config.write_disposition = write_disposition
self.assertEqual(config._properties["load"]["writeDisposition"], write_disposition)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a test_to_api_repr() and a test_from_api_repr() test because the original file did not include them.

Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,22 @@ def test_write_disposition_setter(self):
config._properties["load"]["writeDisposition"], write_disposition
)

def test_time_zone_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.time_zone)

def test_time_zone_hit(self):
time_zone = "UTC"
config = self._get_target_class()()
config._properties["load"]["timeZone"] = time_zone
self.assertEqual(config.time_zone, time_zone)

def test_time_zone_setter(self):
time_zone = "America/New_York"
config = self._get_target_class()()
config.time_zone = time_zone
self.assertEqual(config._properties["load"]["timeZone"], time_zone)

def test_parquet_options_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.parquet_options)
Expand Down Expand Up @@ -901,3 +917,114 @@ def test_column_name_character_map_none(self):
config._properties["load"]["columnNameCharacterMap"],
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
)

RESOURCE = {
"load": {
"allowJaggedRows": True,
"createDisposition": "CREATE_NEVER",
"encoding": "UTF-8",
"fieldDelimiter": ",",
"ignoreUnknownValues": True,
"maxBadRecords": 10,
"nullMarker": "\\N",
"quote": '"',
"schema": {
"fields": [
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
]
},
"skipLeadingRows": "1",
"sourceFormat": "CSV",
"timePartitioning": {
"type": "DAY",
"field": "transaction_date",
},
"useAvroLogicalTypes": True,
"writeDisposition": "WRITE_TRUNCATE",
"timeZone": "America/New_York",
"parquetOptions": {"enableListInference": True},
"columnNameCharacterMap": "V2",
"someNewField": "some-value",
}
}

def test_from_api_repr(self):
from google.cloud.bigquery.job import (
CreateDisposition,
LoadJobConfig,
SourceFormat,
WriteDisposition,
)
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType

from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = LoadJobConfig.from_api_repr(self.RESOURCE)

self.assertTrue(config.allow_jagged_rows)
self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER)
self.assertEqual(config.encoding, "UTF-8")
self.assertEqual(config.field_delimiter, ",")
self.assertTrue(config.ignore_unknown_values)
self.assertEqual(config.max_bad_records, 10)
self.assertEqual(config.null_marker, "\\N")
self.assertEqual(config.quote_character, '"')
self.assertEqual(
config.schema,
[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")],
)
self.assertEqual(config.skip_leading_rows, 1)
self.assertEqual(config.source_format, SourceFormat.CSV)
self.assertEqual(
config.time_partitioning,
TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"),
)
self.assertTrue(config.use_avro_logical_types)
self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE)
self.assertEqual(config.time_zone, "America/New_York")
self.assertTrue(config.parquet_options.enable_list_inference)
self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2)
self.assertEqual(config._properties["load"]["someNewField"], "some-value")

def test_to_api_repr(self):
from google.cloud.bigquery.job import (
CreateDisposition,
LoadJobConfig,
SourceFormat,
WriteDisposition,
)
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType
from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery.job.load import ColumnNameCharacterMap

config = LoadJobConfig()
config.allow_jagged_rows = True
config.create_disposition = CreateDisposition.CREATE_NEVER
config.encoding = "UTF-8"
config.field_delimiter = ","
config.ignore_unknown_values = True
config.max_bad_records = 10
config.null_marker = r"\N"
config.quote_character = '"'
config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")]
config.skip_leading_rows = 1
config.source_format = SourceFormat.CSV
config.time_partitioning = TimePartitioning(
type_=TimePartitioningType.DAY, field="transaction_date"
)
config.use_avro_logical_types = True
config.write_disposition = WriteDisposition.WRITE_TRUNCATE
config.time_zone = "America/New_York"
parquet_options = ParquetOptions()
parquet_options.enable_list_inference = True
config.parquet_options = parquet_options
config.column_name_character_map = ColumnNameCharacterMap.V2
config._properties["load"]["someNewField"] = "some-value"

api_repr = config.to_api_repr()

expected = self.RESOURCE
self.assertEqual(api_repr, expected)
7 changes: 7 additions & 0 deletions tests/unit/test_external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@
class TestExternalConfig(unittest.TestCase):
SOURCE_URIS = ["gs://foo", "gs://bar"]

TIME_ZONE = "America/Los_Angeles"

BASE_RESOURCE = {
"sourceFormat": "",
"sourceUris": SOURCE_URIS,
"maxBadRecords": 17,
"autodetect": True,
"ignoreUnknownValues": False,
"compression": "compression",
"timeZone": TIME_ZONE,
}

def test_from_api_repr_base(self):
Expand Down Expand Up @@ -79,6 +82,7 @@ def test_to_api_repr_base(self):
ec.connection_id = "path/to/connection"
ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")]

ec.time_zone = self.TIME_ZONE
exp_schema = {
"fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}]
}
Expand All @@ -92,6 +96,7 @@ def test_to_api_repr_base(self):
"compression": "compression",
"connectionId": "path/to/connection",
"schema": exp_schema,
"timeZone": self.TIME_ZONE,
}
self.assertEqual(got_resource, exp_resource)

Expand Down Expand Up @@ -128,6 +133,8 @@ def _verify_base(self, ec):
self.assertEqual(ec.max_bad_records, 17)
self.assertEqual(ec.source_uris, self.SOURCE_URIS)

self.assertEqual(ec.time_zone, self.TIME_ZONE)

def test_to_api_repr_source_format(self):
ec = external_config.ExternalConfig("CSV")
got = ec.to_api_repr()
Expand Down