Skip to content
This repository was archived by the owner on Apr 3, 2025. It is now read-only.

Commit f3a7478

Browse files
authored
Merge pull request #60 from google/push_ads_api_to_v10
Push ads api to v10
2 parents 15af2af + 1386e62 commit f3a7478

File tree

14 files changed

+257
-79
lines changed

14 files changed

+257
-79
lines changed

.github/workflows/python-app.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ jobs:
2424
run: |
2525
python -m pip install --upgrade pip
2626
pip install -r megalista_dataflow/requirements.txt
27+
pip install --no-deps -r megalista_dataflow/requirements-no-deps.txt
2728
- name: Run tests
2829
run: |
2930
./run_tests.sh

megalista_dataflow/main.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from models.oauth_credentials import OAuthCredentials
2626
from models.options import DataflowOptions
2727
from models.sheets_config import SheetsConfig
28-
from sources.batches_from_executions import BatchesFromExecutions, ExecutionCoder
28+
from sources.batches_from_executions import BatchesFromExecutions, ExecutionCoder, TransactionalType
2929
from sources.primary_execution_source import PrimaryExecutionSource
3030
from uploaders.big_query.transactional_events_results_writer import TransactionalEventsResultsWriter
3131
from uploaders.campaign_manager.campaign_manager_conversion_uploader import CampaignManagerConversionUploaderDoFn
@@ -155,14 +155,24 @@ def expand(self, executions):
155155
return (
156156
executions
157157
| "Load Data - GoogleAdsOfflineConversions"
158-
>> BatchesFromExecutions(DestinationType.ADS_OFFLINE_CONVERSION, 2000)
158+
>> BatchesFromExecutions(
159+
DestinationType.ADS_OFFLINE_CONVERSION,
160+
2000,
161+
TransactionalType.GCLID_TIME,
162+
self.params.dataflow_options.bq_ops_dataset)
159163
| "Upload - GoogleAdsOfflineConversions"
160164
>> beam.ParDo(
161165
GoogleAdsOfflineUploaderDoFn(
162166
self.params._oauth_credentials,
163167
self.params._dataflow_options.developer_token
164168
)
165169
)
170+
| "Persist results - GoogleAdsOfflineConversions"
171+
>> beam.ParDo(
172+
TransactionalEventsResultsWriter(
173+
self.params._dataflow_options.bq_ops_dataset,
174+
TransactionalType.GCLID_TIME)
175+
)
166176
)
167177

168178

@@ -200,13 +210,15 @@ def expand(self, executions):
200210
>> BatchesFromExecutions(
201211
DestinationType.GA_MEASUREMENT_PROTOCOL,
202212
20,
203-
True,
213+
TransactionalType.UUID,
204214
self.params.dataflow_options.bq_ops_dataset)
205215
| "Upload - GA measurement protocol"
206216
>> beam.ParDo(GoogleAnalyticsMeasurementProtocolUploaderDoFn())
207217
| "Persist results - GA measurement protocol"
208218
>> beam.ParDo(
209-
TransactionalEventsResultsWriter(self.params._dataflow_options.bq_ops_dataset)
219+
TransactionalEventsResultsWriter(
220+
self.params._dataflow_options.bq_ops_dataset,
221+
TransactionalType.UUID)
210222
)
211223
)
212224

@@ -219,13 +231,15 @@ def expand(self, executions):
219231
>> BatchesFromExecutions(
220232
DestinationType.GA_4_MEASUREMENT_PROTOCOL,
221233
20,
222-
True,
234+
TransactionalType.UUID,
223235
self.params.dataflow_options.bq_ops_dataset)
224236
| "Upload - GA 4 measurement protocol"
225237
>> beam.ParDo(GoogleAnalytics4MeasurementProtocolUploaderDoFn())
226238
| "Persist results - GA 4 measurement protocol"
227239
>> beam.ParDo(
228-
TransactionalEventsResultsWriter(self.params._dataflow_options.bq_ops_dataset)
240+
TransactionalEventsResultsWriter(
241+
self.params._dataflow_options.bq_ops_dataset,
242+
TransactionalType.UUID)
229243
)
230244
)
231245

@@ -238,15 +252,17 @@ def expand(self, executions):
238252
>> BatchesFromExecutions(
239253
DestinationType.CM_OFFLINE_CONVERSION,
240254
1000,
241-
True,
255+
TransactionalType.UUID,
242256
self.params.dataflow_options.bq_ops_dataset)
243257
| "Upload - CM conversion"
244258
>> beam.ParDo(
245259
CampaignManagerConversionUploaderDoFn(self.params._oauth_credentials)
246260
)
247261
| "Persist results - CM conversion"
248262
>> beam.ParDo(
249-
TransactionalEventsResultsWriter(self.params._dataflow_options.bq_ops_dataset)
263+
TransactionalEventsResultsWriter(
264+
self.params._dataflow_options.bq_ops_dataset,
265+
TransactionalType.UUID)
250266
)
251267
)
252268

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
apache-beam[gcp]==2.36.0
Lines changed: 56 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,60 @@
1-
google-ads==12.0.0
2-
httplib2==0.17.4
3-
protobuf==3.13.0
4-
google-api-python-client==1.12.8
5-
google-cloud-core==1.4.1
6-
google-cloud-bigquery==1.27.2
7-
apache-beam[gcp]==2.36.0
8-
apache-beam==2.36.0
9-
google-cloud-datastore==1.13.1
10-
google-apitools==0.5.31
1+
google-ads==15.0.0
2+
google-api-python-client==2.37.0
3+
google-cloud-bigquery==2.34.0
4+
google-cloud-firestore==2.3.4
5+
google-cloud-storage==2.1.0
6+
7+
aiohttp==3.6.2
8+
9+
# Test deps
10+
mypy==0.790
1111
pytest==5.4.3
1212
pytest-cov==2.10.0
1313
pytest-mock==3.2.0
14+
pytz==2021.3
1415
requests-mock==1.8.0
15-
pytz==2020.1
16-
wheel==0.34.2
17-
pyarrow==0.17.1
18-
aiohttp==3.6.2
19-
bloom-filter==1.3
20-
six==1.13.0
21-
mypy==0.790
22-
google-cloud-storage==1.38.0
23-
google-cloud-firestore==2.1.1
16+
17+
18+
19+
#apache beam deps below
20+
google-apitools==0.5.31
21+
cachetools==4.2.4
22+
certifi==2021.10.8
23+
charset-normalizer==2.0.12
24+
cloudpickle==2.0.0
25+
crcmod==1.7
26+
dill==0.3.1.1
27+
docopt==0.6.2
28+
fastavro==1.4.9
29+
fasteners==0.17.3
30+
google-resumable-media==2.2.1
31+
googleapis-common-protos==1.54.0
32+
grpc-google-iam-v1==0.12.3
33+
grpcio==1.44.0
34+
grpcio-gcp==0.2.2
35+
grpcio-status==1.44.0
36+
hdfs==2.6.0
37+
httplib2==0.19.1
38+
idna==3.3
39+
libcst==0.4.1
40+
mypy-extensions==0.4.3
41+
numpy==1.21.5
42+
oauth2client==4.1.3
43+
orjson==3.6.7
44+
overrides==6.1.0
45+
packaging==21.3
46+
pyarrow==6.0.1
47+
pyasn1==0.4.8
48+
pyasn1-modules==0.2.8
49+
pydot==1.4.2
50+
pymongo==3.12.3
51+
pyparsing==2.4.7
52+
python-dateutil==2.8.2
53+
# pyyaml==6.0
54+
requests==2.27.1
55+
rsa==4.8
56+
six==1.16.0
57+
typing-extensions==4.1.1
58+
typing-inspect==0.7.1
59+
typing-utils==0.1.0
60+
urllib3==1.26.8

megalista_dataflow/setup.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@
2020
author='Google',
2121
author_email='[email protected]',
2222
url='https://github.com/google/megalista/',
23-
install_requires=['google-ads==12.0.0', 'google-api-python-client==1.12.8',
24-
'google-cloud-core==1.4.1', 'google-cloud-bigquery==1.27.2',
25-
'google-cloud-datastore==1.13.1', 'aiohttp==3.6.2',
26-
'google-cloud-storage==1.38.0', 'google-cloud-firestore==2.1.1'],
23+
install_requires=['google-ads==15.0.0', 'google-api-python-client==2.37.0',
24+
'google-cloud-bigquery==2.34.0','aiohttp==3.6.2',
25+
'google-cloud-storage==2.1.0', 'google-cloud-firestore==2.3.4'],
2726
packages=setuptools.find_packages(),
2827
)

megalista_dataflow/sources/batches_from_executions.py

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
from enum import Enum
1415

1516
import apache_beam as beam
1617
import logging
@@ -20,6 +21,7 @@
2021
from apache_beam.options.value_provider import ValueProvider
2122
from google.cloud import bigquery
2223
from models.execution import DestinationType, Execution, Batch
24+
from string import Template
2325
from typing import Any, List, Iterable, Tuple, Dict
2426

2527

@@ -48,6 +50,20 @@ def is_deterministic(self):
4850
return True
4951

5052

53+
class TransactionalType(Enum):
54+
"""
55+
Distinct types to handle data uploading deduplication.
56+
NOT_TRANSACTION: don't handle.
57+
UUID: Expect a 'uuid' field in the source table as a unique identifier to each row.
58+
GCLID_DATE_TIME: Expect 'gclid' and 'time' fields in the source table as unique identifiers to each row.
59+
"""
60+
(
61+
NOT_TRANSACTIONAL,
62+
UUID,
63+
GCLID_TIME,
64+
) = range(3)
65+
66+
5167
class BatchesFromExecutions(beam.PTransform):
5268
"""
5369
Filter the received executions by the received action,
@@ -62,14 +78,15 @@ def process(self, execution: Execution) -> Iterable[Tuple[Execution, Dict[str, A
6278
table_name = table_name.replace('`', '')
6379
query = f"SELECT data.* FROM `{table_name}` AS data"
6480
logging.getLogger(_LOGGER_NAME).info(f'Reading from table {table_name} for Execution {execution}')
65-
rows_iterator = client.query(query).result(page_size=_BIGQUERY_PAGE_SIZE)
66-
for row in rows_iterator:
81+
for row in client.query(query).result(page_size=_BIGQUERY_PAGE_SIZE):
6782
yield execution, _convert_row_to_dict(row)
6883

6984
class _ExecutionIntoBigQueryRequestTransactional(beam.DoFn):
7085

71-
def __init__(self, bq_ops_dataset):
86+
def __init__(self, bq_ops_dataset, create_table_query, join_query):
7287
self._bq_ops_dataset = bq_ops_dataset
88+
self._create_table_query = create_table_query
89+
self._join_query = join_query
7390

7491
def process(self, execution: Execution) -> Iterable[Tuple[Execution, Dict[str, Any]]]:
7592
table_name = execution.source.source_metadata[0] + \
@@ -81,25 +98,20 @@ def process(self, execution: Execution) -> Iterable[Tuple[Execution, Dict[str, A
8198
uploaded_table_name = uploaded_table_name.replace('`', '')
8299
client = bigquery.Client()
83100

84-
query = f"CREATE TABLE IF NOT EXISTS `{uploaded_table_name}` ( \
85-
timestamp TIMESTAMP OPTIONS(description= 'Event timestamp'), \
86-
uuid STRING OPTIONS(description='Event unique identifier')) \
87-
PARTITION BY _PARTITIONDATE \
88-
OPTIONS(partition_expiration_days=15)"
101+
create_table_query_ready = \
102+
Template(self._create_table_query).substitute(uploaded_table_name=uploaded_table_name)
89103

90104
logging.getLogger(_LOGGER_NAME).info(
91105
f"Creating table {uploaded_table_name} if it doesn't exist")
92106

93-
client.query(query).result()
107+
client.query(create_table_query_ready).result()
94108

95-
query = f"SELECT data.* FROM `{table_name}` AS data \
96-
LEFT JOIN {uploaded_table_name} AS uploaded USING(uuid) \
97-
WHERE uploaded.uuid IS NULL;"
109+
join_query_ready = \
110+
Template(self._join_query).substitute(table_name=table_name, uploaded_table_name=uploaded_table_name)
98111

99112
logging.getLogger(_LOGGER_NAME).info(
100113
f'Reading from table {table_name} for Execution {execution}')
101-
rows_iterator = client.query(query).result(page_size=_BIGQUERY_PAGE_SIZE)
102-
for row in rows_iterator:
114+
for row in client.query(join_query_ready).result(page_size=_BIGQUERY_PAGE_SIZE):
103115
yield execution, _convert_row_to_dict(row)
104116

105117

@@ -123,21 +135,44 @@ def __init__(
123135
self,
124136
destination_type: DestinationType,
125137
batch_size: int = 5000,
126-
transactional: bool = False,
138+
transactional_type: TransactionalType = TransactionalType.NOT_TRANSACTIONAL,
127139
bq_ops_dataset: ValueProvider = None
128140
):
129141
super().__init__()
130-
if transactional and not bq_ops_dataset:
142+
if transactional_type is not TransactionalType.NOT_TRANSACTIONAL and not bq_ops_dataset:
131143
raise Exception('Missing bq_ops_dataset for this uploader')
132144

133145
self._destination_type = destination_type
134146
self._batch_size = batch_size
135-
self._transactional = transactional
147+
self._transactional_type = transactional_type
136148
self._bq_ops_dataset = bq_ops_dataset
137149

138150
def _get_bq_request_class(self):
139-
if self._transactional:
140-
return self._ExecutionIntoBigQueryRequestTransactional(self._bq_ops_dataset)
151+
if self._transactional_type == TransactionalType.UUID:
152+
return self._ExecutionIntoBigQueryRequestTransactional(
153+
self._bq_ops_dataset,
154+
"CREATE TABLE IF NOT EXISTS `$uploaded_table_name` ( \
155+
timestamp TIMESTAMP OPTIONS(description= 'Event timestamp'), \
156+
uuid STRING OPTIONS(description='Event unique identifier')) \
157+
PARTITION BY _PARTITIONDATE \
158+
OPTIONS(partition_expiration_days=15)",
159+
"SELECT data.* FROM `$table_name` AS data \
160+
LEFT JOIN $uploaded_table_name AS uploaded USING(uuid) \
161+
WHERE uploaded.uuid IS NULL;"
162+
)
163+
if self._transactional_type == TransactionalType.GCLID_TIME:
164+
return self._ExecutionIntoBigQueryRequestTransactional(
165+
self._bq_ops_dataset,
166+
"CREATE TABLE IF NOT EXISTS `$uploaded_table_name` ( \
167+
timestamp TIMESTAMP OPTIONS(description= 'Event timestamp'), \
168+
gclid STRING OPTIONS(description= 'Original gclid'), \
169+
time STRING OPTIONS(description= 'Original time')) \
170+
PARTITION BY _PARTITIONDATE \
171+
OPTIONS(partition_expiration_days=15)",
172+
"SELECT data.* FROM `$table_name` AS data \
173+
LEFT JOIN $uploaded_table_name AS uploaded USING(gclid, time) \
174+
WHERE uploaded.gclid IS NULL;"
175+
)
141176
return self._ExecutionIntoBigQueryRequest()
142177

143178
def expand(self, executions):

megalista_dataflow/third_party/steps.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from third_party.uploaders.appsflyer.appsflyer_s2s_uploader_async import AppsFlyerS2SUploaderDoFn
88
from models.execution import DestinationType
99
from uploaders.big_query.transactional_events_results_writer import TransactionalEventsResultsWriter
10-
from sources.batches_from_executions import BatchesFromExecutions
10+
from sources.batches_from_executions import BatchesFromExecutions, TransactionalType
11+
1112

1213
class AppsFlyerEventsStep(beam.PTransform):
1314
def __init__(self, params):
@@ -20,9 +21,12 @@ def expand(self, executions):
2021
BatchesFromExecutions(
2122
DestinationType.APPSFLYER_S2S_EVENTS,
2223
1000,
23-
True,
24+
TransactionalType.UUID,
2425
self.params.dataflow_options.bq_ops_dataset)
2526
| 'Upload - AppsFlyer S2S events' >>
2627
beam.ParDo(AppsFlyerS2SUploaderDoFn(self.params.dataflow_options.appsflyer_dev_key))
27-
| 'Persist results - AppsFlyer S2S events' >> beam.ParDo(TransactionalEventsResultsWriter(self.params.dataflow_options.bq_ops_dataset))
28+
| 'Persist results - AppsFlyer S2S events' >> beam.ParDo(
29+
TransactionalEventsResultsWriter(
30+
self.params.dataflow_options.bq_ops_dataset,
31+
TransactionalType.UUID))
2832
)

0 commit comments

Comments
 (0)