From 7b0c3d54f10214aa404ac739cb9c357e90381c9f Mon Sep 17 00:00:00 2001 From: Bo Anderson Date: Thu, 14 Mar 2024 01:53:39 +0000 Subject: [PATCH] utils/github: use GraphQL PR searching --- Library/Homebrew/dev-cmd/bump.rb | 6 +- Library/Homebrew/utils/github.rb | 98 +++++++++++++++++++++++----- Library/Homebrew/utils/github/api.rb | 22 +++++++ 3 files changed, 104 insertions(+), 22 deletions(-) diff --git a/Library/Homebrew/dev-cmd/bump.rb b/Library/Homebrew/dev-cmd/bump.rb index da2ce67a1c..654f7e64e5 100644 --- a/Library/Homebrew/dev-cmd/bump.rb +++ b/Library/Homebrew/dev-cmd/bump.rb @@ -312,11 +312,7 @@ module Homebrew nil end - if pull_requests&.any? - pull_requests = pull_requests.map { |pr| "#{pr["title"]} (#{Formatter.url(pr["html_url"])})" }.join(", ") - end - - pull_requests + pull_requests&.map { |pr| "#{pr["title"]} (#{Formatter.url(pr["html_url"])})" }&.join(", ") end sig { diff --git a/Library/Homebrew/utils/github.rb b/Library/Homebrew/utils/github.rb index bb14c0c331..641f42bf24 100644 --- a/Library/Homebrew/utils/github.rb +++ b/Library/Homebrew/utils/github.rb @@ -516,19 +516,67 @@ module GitHub /(^|\s)#{Regexp.quote(name)}(:|,|\s)(.*\s)?#{Regexp.quote(version)}(:|,|\s|$)/i end + sig { + params(name: String, tap_remote_repo: String, state: T.nilable(String), version: T.nilable(String)) + .returns(T::Array[T::Hash[String, T.untyped]]) + } def self.fetch_pull_requests(name, tap_remote_repo, state: nil, version: nil) regex = pull_request_title_regex(name, version) query = "is:pr #{name} #{version}".strip - issues_for_formula(query, tap_remote_repo:, state:).select do |pr| - pr["html_url"].include?("/pull/") && regex.match?(pr["title"]) + # Unauthenticated users cannot use GraphQL so use search REST API instead. + # Limit for this is 30/minute so is usually OK unless you're spamming bump PRs (e.g. CI). + if API.credentials_type == :none + return issues_for_formula(query, tap_remote_repo:, state:).select do |pr| + pr["html_url"].include?("/pull/") && regex.match?(pr["title"]) + end + elsif state == "open" && ENV["GITHUB_REPOSITORY_OWNER"] == "Homebrew" + # Try use PR API, which might be cheaper on rate limits in some cases. + # The rate limit of the search API under GraphQL is unclear as it's + # costs the same as any other query accoding to /rate_limit. + # The PR API is also not very scalable so limit to Homebrew CI. + return fetch_open_pull_requests(name, tap_remote_repo, version:) + end + + query += " repo:#{tap_remote_repo} in:title" + query += " state:#{state}" if state.present? + graphql_query = <<~EOS + query($query: String!, $after: String) { + search(query: $query, type: ISSUE, first: 100, after: $after) { + nodes { + ... on PullRequest { + number + title + url + state + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + EOS + variables = { query: } + + pull_requests = [] + API.paginate_graphql(graphql_query, variables:) do |result| + data = result["search"] + pull_requests.concat(data["nodes"].select { |pr| regex.match?(pr["title"]) }) + data["pageInfo"] + end + pull_requests.map! do |pr| + pr.merge({ + "html_url" => pr.delete("url"), + "state" => pr.fetch("state").downcase, + }) end rescue API::RateLimitExceededError => e opoo e.message - [] + pull_requests || [] end - # WARNING: The GitHub API returns results in a slightly different form here compared to `fetch_pull_requests`. def self.fetch_open_pull_requests(name, tap_remote_repo, version: nil) return [] if tap_remote_repo.blank? @@ -539,29 +587,45 @@ module GitHub @open_pull_requests ||= {} @open_pull_requests[cache_key] ||= begin + query = <<~EOS + query($owner: String!, $repo: String!, $states: [PullRequestState!], $after: String) { + repository(owner: $owner, name: $repo) { + pullRequests(states: $states, first: 100, after: $after) { + nodes { + number + title + url + } + pageInfo { + hasNextPage + endCursor + } + } + } + } + EOS owner, repo = tap_remote_repo.split("/") - endpoint = "repos/#{owner}/#{repo}/pulls" - query_parameters = ["state=open", "direction=desc"] + variables = { owner:, repo:, states: ["OPEN"] } + regex = pull_request_title_regex(name, version) + pull_requests = [] - - API.paginate_rest("#{API_URL}/#{endpoint}", additional_query_params: query_parameters.join("&")) do |page| - pull_requests.concat(page) + API.paginate_graphql(query, variables:) do |result| + data = result.dig("repository", "pullRequests") + pull_requests.concat(data["nodes"]) + data["pageInfo"] end - pull_requests end - regex = pull_request_title_regex(name, version) @open_pull_requests[cache_key].select { |pr| regex.match?(pr["title"]) } + .map { |pr| pr.merge("html_url" => pr.delete("url")) } + rescue API::RateLimitExceededError => e + opoo e.message + pull_requests || [] end def self.check_for_duplicate_pull_requests(name, tap_remote_repo, state:, file:, quiet:, version: nil) - # `fetch_open_pull_requests` is more reliable but *really* slow, so let's use it only in CI. - pull_requests = if state == "open" && ENV["CI"].present? - fetch_open_pull_requests(name, tap_remote_repo, version:) - else - fetch_pull_requests(name, tap_remote_repo, state:, version:) - end + pull_requests = fetch_pull_requests(name, tap_remote_repo, state:, version:) pull_requests.select! do |pr| get_pull_request_changed_files( diff --git a/Library/Homebrew/utils/github/api.rb b/Library/Homebrew/utils/github/api.rb index 3fd033b18c..df1464dfb7 100644 --- a/Library/Homebrew/utils/github/api.rb +++ b/Library/Homebrew/utils/github/api.rb @@ -318,6 +318,28 @@ module GitHub end end + sig { + params( + query: String, + variables: T.nilable(T::Hash[Symbol, T.untyped]), + _block: T.proc.params(data: T::Hash[String, T.untyped]).returns(T::Hash[String, T.untyped]), + ).void + } + def self.paginate_graphql(query, variables: nil, &_block) + result = API.open_graphql(query, variables:) + + has_next_page = T.let(true, T::Boolean) + variables ||= {} + while has_next_page + page_info = yield result + has_next_page = page_info["hasNextPage"] + if has_next_page + variables[:after] = page_info["endCursor"] + result = API.open_graphql(query, variables:) + end + end + end + def self.raise_error(output, errors, http_code, headers, scopes) json = begin JSON.parse(output)