10
10
from singer_sdk .exceptions import FatalAPIError
11
11
from singer_sdk .helpers .jsonpath import extract_jsonpath
12
12
13
- from tap_github .client import GitHubGraphqlStream , GitHubRestStream
13
+ from tap_github .client import GitHubDiffStream , GitHubGraphqlStream , GitHubRestStream
14
14
from tap_github .schema_objects import (
15
15
files_object ,
16
16
label_object ,
@@ -1079,6 +1079,14 @@ def post_process(self, row: dict, context: dict | None = None) -> dict:
1079
1079
row ["commit_timestamp" ] = row ["commit" ]["committer" ]["date" ]
1080
1080
return row
1081
1081
1082
+ def get_child_context (self , record : dict , context : dict | None ) -> dict :
1083
+ return {
1084
+ "org" : context ["org" ] if context else None ,
1085
+ "repo" : context ["repo" ] if context else None ,
1086
+ "repo_id" : context ["repo_id" ] if context else None ,
1087
+ "commit_id" : record ["sha" ],
1088
+ }
1089
+
1082
1090
schema = th .PropertiesList (
1083
1091
th .Property ("org" , th .StringType ),
1084
1092
th .Property ("repo" , th .StringType ),
@@ -1162,6 +1170,37 @@ class CommitCommentsStream(GitHubRestStream):
1162
1170
).to_dict ()
1163
1171
1164
1172
1173
+ class CommitDiffsStream (GitHubDiffStream ):
1174
+ name = "commit_diffs"
1175
+ path = "/repos/{org}/{repo}/commits/{commit_id}"
1176
+ primary_keys : ClassVar [list [str ]] = ["commit_id" ]
1177
+ parent_stream_type = CommitsStream
1178
+ ignore_parent_replication_key = False
1179
+ state_partitioning_keys : ClassVar [list [str ]] = ["repo" , "org" ]
1180
+
1181
+ def post_process (self , row : dict , context : dict [str , str ] | None = None ) -> dict :
1182
+ row = super ().post_process (row , context )
1183
+ if context is not None :
1184
+ # Get commit ID (sha) from context
1185
+ row ["org" ] = context ["org" ]
1186
+ row ["repo" ] = context ["repo" ]
1187
+ row ["repo_id" ] = context ["repo_id" ]
1188
+ row ["commit_id" ] = context ["commit_id" ]
1189
+ return row
1190
+
1191
+ schema = th .PropertiesList (
1192
+ # Parent keys
1193
+ th .Property ("org" , th .StringType ),
1194
+ th .Property ("repo" , th .StringType ),
1195
+ th .Property ("repo_id" , th .IntegerType ),
1196
+ th .Property ("commit_id" , th .StringType ),
1197
+ # Rest
1198
+ th .Property ("diff" , th .StringType ),
1199
+ th .Property ("success" , th .BooleanType ),
1200
+ th .Property ("error_message" , th .StringType ),
1201
+ ).to_dict ()
1202
+
1203
+
1165
1204
class LabelsStream (GitHubRestStream ):
1166
1205
"""Defines 'labels' stream."""
1167
1206
@@ -1355,14 +1394,23 @@ def get_child_context(self, record: dict, context: dict | None) -> dict:
1355
1394
).to_dict ()
1356
1395
1357
1396
1358
- class PullRequestCommits (GitHubRestStream ):
1397
+ class PullRequestCommitsStream (GitHubRestStream ):
1359
1398
name = "pull_request_commits"
1360
1399
path = "/repos/{org}/{repo}/pulls/{pull_number}/commits"
1361
1400
ignore_parent_replication_key = False
1362
1401
primary_keys : ClassVar [list [str ]] = ["node_id" ]
1363
1402
parent_stream_type = PullRequestsStream
1364
1403
state_partitioning_keys : ClassVar [list [str ]] = ["repo" , "org" ]
1365
1404
1405
+ def get_child_context (self , record : dict , context : dict | None ) -> dict :
1406
+ return {
1407
+ "org" : context ["org" ] if context else None ,
1408
+ "repo" : context ["repo" ] if context else None ,
1409
+ "repo_id" : context ["repo_id" ] if context else None ,
1410
+ "pull_number" : context ["pull_number" ] if context else None ,
1411
+ "commit_id" : record ["sha" ],
1412
+ }
1413
+
1366
1414
schema = th .PropertiesList (
1367
1415
# Parent keys
1368
1416
th .Property ("org" , th .StringType ),
@@ -1443,7 +1491,7 @@ def post_process(self, row: dict, context: dict[str, str] | None = None) -> dict
1443
1491
return row
1444
1492
1445
1493
1446
- class PullRequestDiffsStream (GitHubRestStream ):
1494
+ class PullRequestDiffsStream (GitHubDiffStream ):
1447
1495
name = "pull_request_diffs"
1448
1496
path = "/repos/{org}/{repo}/pulls/{pull_number}"
1449
1497
primary_keys : ClassVar [list [str ]] = ["pull_id" ]
@@ -1453,53 +1501,48 @@ class PullRequestDiffsStream(GitHubRestStream):
1453
1501
# Known Github API errors
1454
1502
tolerated_http_errors : ClassVar [list [int ]] = [404 , 406 , 422 , 502 ]
1455
1503
1456
- @property
1457
- def http_headers (self ) -> dict :
1458
- headers = super ().http_headers
1459
- headers ["Accept" ] = "application/vnd.github.v3.diff"
1460
- return headers
1504
+ def post_process (self , row : dict , context : dict [str , str ] | None = None ) -> dict :
1505
+ row = super ().post_process (row , context )
1506
+ if context is not None :
1507
+ # Get PR ID from context
1508
+ row ["org" ] = context ["org" ]
1509
+ row ["repo" ] = context ["repo" ]
1510
+ row ["repo_id" ] = context ["repo_id" ]
1511
+ row ["pull_number" ] = context ["pull_number" ]
1512
+ row ["pull_id" ] = context ["pull_id" ]
1513
+ return row
1461
1514
1462
- def parse_response (self , response : requests .Response ) -> Iterable [dict ]:
1463
- """Parse the response to yield the diff text instead of an object and prevent buffer overflow.""" # noqa: E501
1464
- if response .status_code != 200 :
1465
- contents = response .json ()
1466
- self .logger .info (
1467
- "Skipping PR due to %d error: %s" ,
1468
- response .status_code ,
1469
- contents ["message" ],
1470
- )
1471
- yield {
1472
- "success" : False ,
1473
- "error_message" : contents ["message" ],
1474
- }
1475
- return
1515
+ schema = th .PropertiesList (
1516
+ # Parent keys
1517
+ th .Property ("org" , th .StringType ),
1518
+ th .Property ("repo" , th .StringType ),
1519
+ th .Property ("repo_id" , th .IntegerType ),
1520
+ th .Property ("pull_number" , th .IntegerType ),
1521
+ th .Property ("pull_id" , th .IntegerType ),
1522
+ # Rest
1523
+ th .Property ("diff" , th .StringType ),
1524
+ th .Property ("success" , th .BooleanType ),
1525
+ th .Property ("error_message" , th .StringType ),
1526
+ ).to_dict ()
1476
1527
1477
- if content_length_str := response .headers .get ("Content-Length" ):
1478
- content_length = int (content_length_str )
1479
- max_size = 41_943_040 # 40 MiB
1480
- if content_length > max_size :
1481
- self .logger .info (
1482
- "Skipping PR. The diff size (%.2f MiB) exceeded the maximum size "
1483
- "limit of 40 MiB." ,
1484
- content_length / 1024 / 1024 ,
1485
- )
1486
- yield {
1487
- "success" : False ,
1488
- "error_message" : "Diff exceeded the maximum size limit of 40 MiB." ,
1489
- }
1490
- return
1491
1528
1492
- yield {"diff" : response .text , "success" : True }
1529
+ class PullRequestCommitDiffsStream (GitHubDiffStream ):
1530
+ name = "pull_request_commit_diffs"
1531
+ path = "/repos/{org}/{repo}/commits/{commit_id}"
1532
+ primary_keys : ClassVar [list [str ]] = ["commit_id" ]
1533
+ parent_stream_type = PullRequestCommitsStream
1534
+ ignore_parent_replication_key = False
1535
+ state_partitioning_keys : ClassVar [list [str ]] = ["repo" , "org" ]
1493
1536
1494
1537
def post_process (self , row : dict , context : dict [str , str ] | None = None ) -> dict :
1495
1538
row = super ().post_process (row , context )
1496
1539
if context is not None :
1497
- # Get PR ID from context
1540
+ # Get commit ID (sha) from context
1498
1541
row ["org" ] = context ["org" ]
1499
1542
row ["repo" ] = context ["repo" ]
1500
1543
row ["repo_id" ] = context ["repo_id" ]
1501
1544
row ["pull_number" ] = context ["pull_number" ]
1502
- row ["pull_id " ] = context ["pull_id " ]
1545
+ row ["commit_id " ] = context ["commit_id " ]
1503
1546
return row
1504
1547
1505
1548
schema = th .PropertiesList (
@@ -1508,7 +1551,7 @@ def post_process(self, row: dict, context: dict[str, str] | None = None) -> dict
1508
1551
th .Property ("repo" , th .StringType ),
1509
1552
th .Property ("repo_id" , th .IntegerType ),
1510
1553
th .Property ("pull_number" , th .IntegerType ),
1511
- th .Property ("pull_id " , th .IntegerType ),
1554
+ th .Property ("commit_id " , th .StringType ),
1512
1555
# Rest
1513
1556
th .Property ("diff" , th .StringType ),
1514
1557
th .Property ("success" , th .BooleanType ),
0 commit comments