Skip to content

Commit b5617d8

Browse files
committed
[u r] Add type and domain fields to TDRSourceSpec (#6426)
1 parent b2705b5 commit b5617d8

File tree

15 files changed

+93
-42
lines changed

15 files changed

+93
-42
lines changed

UPGRADING.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ have too many entries in this file.
2323
#6426 Clean-up and generalize TDR source specs
2424
==============================================
2525

26-
The "snapshot/" string has been removed from TDR source specs.
26+
The "snapshot/" string has been removed from TDR source specs, and the ``type``
27+
and ``domain`` fields have been added.
2728

2829
Update the ``mksrc`` function in ``environment.py`` for each of your personal
2930
deployments. As always, use the sandbox deployment's ``environment.py`` as a

deployments/anvilbox/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def mksrc(google_project,
4040
prefix = common_prefix(subgraphs)
4141
source = None if flags & pop else ':'.join([
4242
'tdr',
43+
'bigquery',
44+
'gcp',
4345
google_project,
4446
snapshot,
4547
prefix + '/0'

deployments/anvildev/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
2727
assert flags <= ma | pop
2828
source = None if flags & pop else ':'.join([
2929
'tdr',
30+
'bigquery',
31+
'gcp',
3032
google_project,
3133
snapshot,
3234
'/' + str(partition_prefix_length(subgraphs))

deployments/anvilprod/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
2727
assert flags <= ma | pop
2828
source = None if flags & pop else ':'.join([
2929
'tdr',
30+
'bigquery',
31+
'gcp',
3032
google_project,
3133
snapshot,
3234
'/' + str(partition_prefix_length(subgraphs))

deployments/dev/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
2727
assert flags <= ma | pop
2828
source = None if flags & pop else ':'.join([
2929
'tdr',
30+
'bigquery',
31+
'gcp',
3032
google_project,
3133
snapshot,
3234
'/' + str(partition_prefix_length(subgraphs))

deployments/hammerbox/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def mksrc(google_project,
4040
prefix = common_prefix(subgraphs)
4141
source = None if flags & pop else ':'.join([
4242
'tdr',
43+
'bigquery',
44+
'gcp',
4345
google_project,
4446
snapshot,
4547
prefix + '/0'

deployments/prod/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
2929
assert flags <= ma | pop
3030
source = None if flags & pop else ':'.join([
3131
'tdr',
32+
'bigquery',
33+
'gcp',
3234
google_project,
3335
snapshot,
3436
'/' + str(partition_prefix_length(subgraphs))

deployments/sandbox/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def mksrc(google_project,
4040
prefix = common_prefix(subgraphs)
4141
source = None if flags & pop else ':'.join([
4242
'tdr',
43+
'bigquery',
44+
'gcp',
4345
google_project,
4446
snapshot,
4547
prefix + '/0'

deployments/tempdev/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
2727
assert flags <= ma | pop
2828
source = None if flags & pop else ':'.join([
2929
'tdr',
30+
'bigquery',
31+
'gcp',
3032
google_project,
3133
snapshot,
3234
'/' + str(partition_prefix_length(subgraphs))

src/azul/terra.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from collections.abc import (
66
Sequence,
77
)
8+
import enum
89
import json
910
import logging
1011
from time import (
@@ -52,6 +53,7 @@
5253
cache,
5354
config,
5455
mutable_furl,
56+
reject,
5557
require,
5658
)
5759
from azul.auth import (
@@ -93,35 +95,63 @@
9395

9496
@attrs.frozen(kw_only=True)
9597
class TDRSourceSpec(SourceSpec):
98+
class Type(enum.Enum):
99+
bigquery = 'bigquery'
100+
parquet = 'parquet'
101+
102+
class Domain(enum.Enum):
103+
gcp = 'gcp'
104+
azure = 'azure'
105+
106+
type: Type
107+
domain: Domain
96108
subdomain: str
97109
name: str
98110

99111
@classmethod
100112
def parse(cls, spec: str) -> 'TDRSourceSpec':
101113
"""
102114
Construct an instance from its string representation, using the syntax
103-
'tdr:{subdomain}:{name}:{prefix}' ending with an optional
115+
'tdr:{type}{domain}{subdomain}:{name}:{prefix}' ending with an optional
104116
'/{partition_prefix_length}'.
105117
106-
>>> s = TDRSourceSpec.parse('tdr:foo:bar:/0')
118+
>>> s = TDRSourceSpec.parse('tdr:bigquery:gcp:foo:bar:/0')
107119
>>> s # doctest: +NORMALIZE_WHITESPACE
108120
TDRSourceSpec(prefix=Prefix(common='', partition=0),
121+
type=<Type.bigquery: 'bigquery'>,
122+
domain=<Domain.gcp: 'gcp'>,
109123
subdomain='foo',
110124
name='bar')
111125
112126
>>> str(s)
113-
'tdr:foo:bar:/0'
127+
'tdr:bigquery:gcp:foo:bar:/0'
128+
129+
>>> TDRSourceSpec.parse('tdr:spam:gcp:foo:bar:/0')
130+
Traceback (most recent call last):
131+
...
132+
ValueError: 'spam' is not a valid TDRSourceSpec.Type
133+
134+
>>> TDRSourceSpec.parse('tdr:bigquery:eggs:foo:bar:/0')
135+
Traceback (most recent call last):
136+
...
137+
ValueError: 'eggs' is not a valid TDRSourceSpec.Domain
114138
115-
>>> TDRSourceSpec.parse('tdr:foo:bar:n32/0')
139+
>>> TDRSourceSpec.parse('tdr:bigquery:gcp:foo:bar:n32/0')
116140
Traceback (most recent call last):
117141
...
118142
azul.uuids.InvalidUUIDPrefixError: 'n32' is not a valid UUID prefix.
119143
"""
120144
rest, prefix = cls._parse(spec)
121145
# BigQuery (and by extension the TDR) does not allow : or / in dataset names
122-
service, subdomain, name = rest.split(':')
146+
service, type, domain, subdomain, name = rest.split(':')
123147
assert service == 'tdr', service
148+
type = cls.Type(type)
149+
reject(type == cls.Type.parquet, 'Parquet sources are not yet supported')
150+
domain = cls.Domain(domain)
151+
reject(domain == cls.Domain.azure, 'Azure sources are not yet supported')
124152
self = cls(prefix=prefix,
153+
type=type,
154+
domain=domain,
125155
subdomain=subdomain,
126156
name=name)
127157
assert spec == str(self), spec
@@ -131,20 +161,22 @@ def __str__(self) -> str:
131161
"""
132162
The inverse of :meth:`parse`.
133163
134-
>>> s = 'tdr:foo:bar:/0'
164+
>>> s = 'tdr:bigquery:gcp:foo:bar:/0'
135165
>>> s == str(TDRSourceSpec.parse(s))
136166
True
137167
138-
>>> s = 'tdr:foo:bar:22/0'
168+
>>> s = 'tdr:bigquery:gcp:foo:bar:22/0'
139169
>>> s == str(TDRSourceSpec.parse(s))
140170
True
141171
142-
>>> s = 'tdr:foo:bar:22/2'
172+
>>> s = 'tdr:bigquery:gcp:foo:bar:22/2'
143173
>>> s == str(TDRSourceSpec.parse(s))
144174
True
145175
"""
146176
return ':'.join([
147177
'tdr',
178+
self.type.value,
179+
self.domain.value,
148180
self.subdomain,
149181
self.name,
150182
str(self.prefix)
@@ -157,18 +189,20 @@ def contains(self, other: 'SourceSpec') -> bool:
157189
"""
158190
>>> p = TDRSourceSpec.parse
159191
160-
>>> p('tdr:foo:bar:/0').contains(p('tdr:foo:bar:/0'))
192+
>>> p('tdr:bigquery:gcp:foo:bar:/0').contains(p('tdr:bigquery:gcp:foo:bar:/0'))
161193
True
162194
163-
>>> p('tdr:foo:bar:/0').contains(p('tdr:bar:bar:/0'))
195+
>>> p('tdr:bigquery:gcp:foo:bar:/0').contains(p('tdr:bigquery:gcp:bar:bar:/0'))
164196
False
165197
166-
>>> p('tdr:foo:bar:/0').contains(p('tdr:foo:baz:/0'))
198+
>>> p('tdr:bigquery:gcp:foo:bar:/0').contains(p('tdr:bigquery:gcp:foo:baz:/0'))
167199
False
168200
"""
169201
return (
170202
isinstance(other, TDRSourceSpec)
171203
and super().contains(other)
204+
and self.type == other.type
205+
and self.domain == other.domain
172206
and self.subdomain == other.subdomain
173207
and self.name == other.name
174208
)

0 commit comments

Comments
 (0)