zhimin-z commited on
Commit
c3011cc
·
1 Parent(s): 9278f09
Files changed (1) hide show
  1. msr.py +10 -6
msr.py CHANGED
@@ -433,7 +433,11 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
433
  # Build identifier list for IN clause
434
  identifier_list = ', '.join([f"'{id}'" for id in identifiers])
435
 
436
- # Build comprehensive query with CTEs using parameterized file lists (JSON.gz format)
 
 
 
 
437
  # Optimized: Single file scan + ROW_NUMBER() deduplication (no DISTINCT)
438
  query = f"""
439
  WITH all_review_events AS (
@@ -444,7 +448,7 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
444
  TRY_CAST(json_extract_string(repo, '$.name') AS VARCHAR) as repo_name,
445
  payload,
446
  created_at
447
- FROM read_json($review_patterns, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
448
  WHERE
449
  TRY_CAST(type AS VARCHAR) IN ('PullRequestReviewEvent', 'IssueCommentEvent', 'PullRequestReviewCommentEvent')
450
  AND TRY_CAST(json_extract_string(actor, '$.login') AS VARCHAR) IN ({identifier_list})
@@ -490,9 +494,9 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
490
  TRY_CAST(json_extract_string(payload, '$.pull_request.closed_at') AS VARCHAR) as closed_at,
491
  created_at,
492
  ROW_NUMBER() OVER (PARTITION BY json_extract_string(payload, '$.pull_request.html_url') ORDER BY created_at DESC) as rn
493
- FROM read_json($status_patterns, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
494
  WHERE
495
- TRY_CAST(type AS VARCHAR) = 'PullRequestEvent'
496
  AND TRY_CAST(json_extract_string(payload, '$.action') AS VARCHAR) = 'closed'
497
  AND json_extract_string(payload, '$.pull_request.html_url') IS NOT NULL
498
  AND json_extract_string(payload, '$.pull_request.html_url') IN (
@@ -545,8 +549,8 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
545
  WHERE reviewer IN ({identifier_list})
546
  """).fetchall()
547
  else:
548
- # Execute query with parameters
549
- results = conn.execute(query, {'review_patterns': review_patterns, 'status_patterns': status_patterns}).fetchall()
550
 
551
  # Cache the complete results for all future queries in this date range
552
  if len(results) > 0:
 
433
  # Build identifier list for IN clause
434
  identifier_list = ', '.join([f"'{id}'" for id in identifiers])
435
 
436
+ # Convert file patterns to SQL array format for direct interpolation
437
+ review_patterns_sql = str(review_patterns).replace("'", "'")
438
+ status_patterns_sql = str(status_patterns).replace("'", "'")
439
+
440
+ # Build comprehensive query with CTEs using direct SQL array format (JSON.gz format)
441
  # Optimized: Single file scan + ROW_NUMBER() deduplication (no DISTINCT)
442
  query = f"""
443
  WITH all_review_events AS (
 
448
  TRY_CAST(json_extract_string(repo, '$.name') AS VARCHAR) as repo_name,
449
  payload,
450
  created_at
451
+ FROM read_json({review_patterns_sql}, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
452
  WHERE
453
  TRY_CAST(type AS VARCHAR) IN ('PullRequestReviewEvent', 'IssueCommentEvent', 'PullRequestReviewCommentEvent')
454
  AND TRY_CAST(json_extract_string(actor, '$.login') AS VARCHAR) IN ({identifier_list})
 
494
  TRY_CAST(json_extract_string(payload, '$.pull_request.closed_at') AS VARCHAR) as closed_at,
495
  created_at,
496
  ROW_NUMBER() OVER (PARTITION BY json_extract_string(payload, '$.pull_request.html_url') ORDER BY created_at DESC) as rn
497
+ FROM read_json({status_patterns_sql}, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
498
  WHERE
499
+ type = 'PullRequestEvent'
500
  AND TRY_CAST(json_extract_string(payload, '$.action') AS VARCHAR) = 'closed'
501
  AND json_extract_string(payload, '$.pull_request.html_url') IS NOT NULL
502
  AND json_extract_string(payload, '$.pull_request.html_url') IN (
 
549
  WHERE reviewer IN ({identifier_list})
550
  """).fetchall()
551
  else:
552
+ # Execute query using f-string interpolation
553
+ results = conn.execute(query).fetchall()
554
 
555
  # Cache the complete results for all future queries in this date range
556
  if len(results) > 0: