utils/github: use Pulls API to check open pull requests on CI

We currently use the search API to check for duplicate pull requests,
but this is not very reliable. Our `autobump.yml` workflow routinely
opens duplicate pull requests [1] because the search API often returns
incorrect results.

We can make this more reliable by using the Pulls API instead.
Unfortunately, querying the Pulls API is very slow (~10s vs less than a
second for the search API), so let's limit its usage to calls made
inside CI, which should help @BrewTestBot avoid opening duplicate PRs.
(Most recent dupes were authored by @BrewTestBot.)

[1] https://github.com/Homebrew/homebrew-core/pulls?q=is%3Apr+author%3ABrewTestBot+is%3Aunmerged+in%3Acomments+Duplicate
This commit is contained in:
Carlo Cabrera 2023-05-10 13:08:32 +08:00
parent b3f7c6f7a5
commit 0376c6c5c1
No known key found for this signature in database
GPG Key ID: C74D447FC549A1D0

View File

@ -506,14 +506,16 @@ module GitHub
nil
end
def self.fetch_pull_requests(name, tap_remote_repo, state: nil, version: nil)
if version.present?
query = "#{name} #{version} is:pr"
regex = /(^|\s)#{Regexp.quote(name)}(:|,|\s)(.*\s)?#{Regexp.quote(version)}(:|,|\s|$)/i
else
query = "#{name} is:pr"
regex = /(^|\s)#{Regexp.quote(name)}(:|,|\s|$)/i
def self.pull_request_title_regex(name, version = nil)
return /(^|\s)#{Regexp.quote(name)}(:|,|\s|$)/i.freeze if version.blank?
/(^|\s)#{Regexp.quote(name)}(:|,|\s)(.*\s)?#{Regexp.quote(version)}(:|,|\s|$)/i.freeze
end
def self.fetch_pull_requests(name, tap_remote_repo, state: nil, version: nil)
regex = pull_request_title_regex(name, version)
query = "is:pr #{name} #{version}".strip
issues_for_formula(query, tap_remote_repo: tap_remote_repo, state: state).select do |pr|
pr["html_url"].include?("/pull/") && regex.match?(pr["title"])
end
@ -522,8 +524,42 @@ module GitHub
[]
end
# WARNING: This returns results in a slightly different form from `fetch_pull_requests`.
def self.fetch_open_pull_requests(name, tap_remote_repo, version: nil)
return [] if tap_remote_repo.blank?
# Bust the cache every three minutes.
cache_expiry = 3 * 60
cache_epoch = Time.now - (Time.now.to_i % cache_expiry)
cache_key = "#{tap_remote_repo}_#{cache_epoch.to_i}"
@open_pull_requests ||= {}
@open_pull_requests[cache_key] ||= begin
owner, repo = tap_remote_repo.split("/")
endpoint = "repos/#{owner}/#{repo}/pulls"
query_parameters = ["state=open", "direction=desc"]
pull_requests = []
API.paginate_rest("#{API_URL}/#{endpoint}", additional_query_params: query_parameters.join("&")) do |page|
pull_requests.concat(page)
end
pull_requests
end
regex = pull_request_title_regex(name, version)
@open_pull_requests[cache_key].select { |pr| regex.match?(pr["title"]) }
end
def self.check_for_duplicate_pull_requests(name, tap_remote_repo, state:, file:, args:, version: nil)
pull_requests = fetch_pull_requests(name, tap_remote_repo, state: state, version: version).select do |pr|
# `fetch_open_pull_requests` is more reliable but *really* slow, so let's use it only in CI.
pull_requests = if state == "open" && ENV["CI"].present?
fetch_open_pull_requests(name, tap_remote_repo, version: version)
else
fetch_pull_requests(name, tap_remote_repo, state: state, version: version)
end
pull_requests.select! do |pr|
get_pull_request_changed_files(
tap_remote_repo, pr["number"]
).any? { |f| f["filename"] == file }