-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
343 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
.Ruserdata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
Package: worldfootballR | ||
Type: Package | ||
Title: An R Package To Extract World Football (Soccer) data from fbref.com | ||
Version: 0.0.0.9000 | ||
Authors@R: person("Jason Zivkovic", email = "jase.ziv83@gmail.com", | ||
role = c("aut", "cre")) | ||
URL: https://github.com/JaseZiv/worldfootballR | ||
BugReports: https://github.com/JaseZiv/worldfootballR/issues | ||
Description: This R package will enable users to extract a number of different game and player statistics and metrics from fbref.com. | ||
License: GPL-3 | ||
Encoding: UTF-8 | ||
LazyData: true | ||
Imports: | ||
rlang, | ||
dplyr, | ||
purrr, | ||
tidyr, | ||
stringr, | ||
lubridate, | ||
magrittr, | ||
scales, | ||
xml2, | ||
rvest, | ||
curl, | ||
rvest, | ||
xml2 | ||
RoxygenNote: 7.1.0 | ||
Suggests: knitr, | ||
rmarkdown, | ||
ggplot2 | ||
VignetteBuilder: knitr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(get_match_results) | ||
export(get_match_urls) | ||
importFrom(magrittr,"%>%") | ||
importFrom(rlang,.data) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#' Get match results | ||
#' | ||
#' Returns the game results for a given league season | ||
#' | ||
#' @param country the three character country code | ||
#' @param gender gender of competition, either "M" or "F" | ||
#' @param season_end_year the year the season concludes, in quotes, ie "2021" | ||
#' | ||
#' @return returns a dataframe with the results of the competition, season and gender | ||
#' | ||
#' @importFrom magrittr %>% | ||
#' @importFrom rlang .data | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' get_match_results(country = "ITA", gender = "M", season_end_year = "2020") | ||
#' } | ||
|
||
|
||
get_match_results <- function(country, gender, season_end_year) { | ||
|
||
print(paste0("Scraping ", country, " fist division for the ", season_end_year, " season (gender = ", gender, ")")) | ||
|
||
competitions <- .get_tier1_competitions() | ||
|
||
main_url <- "https://fbref.com" | ||
|
||
country <- country | ||
gender <- gender | ||
season <- season_end_year | ||
select_season <- .get_league_season_url(country, gender, season) | ||
|
||
|
||
fixtures_url <- xml2::read_html(select_season) %>% | ||
rvest::html_nodes(".hoversmooth") %>% | ||
rvest::html_nodes(".full") %>% | ||
rvest::html_nodes("a") %>% | ||
rvest::html_attr("href") %>% .[grepl("Fixtures", .)] %>% paste0(main_url, .) | ||
|
||
|
||
season_summary <- xml2::read_html(fixtures_url) %>% | ||
rvest::html_table() %>% .[1] %>% data.frame() %>% | ||
dplyr::filter(.data$Date != "") | ||
|
||
|
||
suppressWarnings( | ||
season_summary <- season_summary %>% | ||
tidyr::separate(.data$Score, into = c("HomeGoals", "AwayGoals"), sep = "–") %>% | ||
dplyr::mutate(HomeGoals = as.numeric(.data$HomeGoals), | ||
AwayGoals = as.numeric(.data$AwayGoals), | ||
Attendance = as.numeric(gsub(",", "", .data$Attendance))) | ||
) | ||
|
||
season_summary <- cbind(season_end_year, season_summary) | ||
|
||
if(!any(stringr::str_detect(names(season_summary), "Round"))) { | ||
Round <- rep(NA, nrow(season_summary)) | ||
season_summary <- cbind(Round, season_summary) | ||
} | ||
|
||
if(any(stringr::str_detect(names(season_summary), "xG"))) { | ||
season_summary <- season_summary %>% | ||
dplyr::select(Season=season_end_year, Round, .data$Wk, .data$Day, .data$Date, .data$Time, .data$Home, .data$HomeGoals, Home_xG=.data$xG, .data$Away, .data$AwayGoals, Away_xG=.data$xG.1, .data$Attendance, .data$Venue, .data$Referee, .data$Notes) | ||
} else { | ||
season_summary <- season_summary %>% | ||
dplyr::select(Season=season_end_year, Round, .data$Wk, .data$Day, .data$Date, .data$Time, .data$Home, .data$HomeGoals, .data$Away, .data$AwayGoals, .data$Attendance, .data$Venue, .data$Referee, .data$Notes) | ||
} | ||
|
||
|
||
return(season_summary) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#' Get match URLs | ||
#' | ||
#' Returns the URL for each match played for a given league season | ||
#' | ||
#' @param country the three character country code | ||
#' @param gender gender of competition, either "M" or "F" | ||
#' @param season the year the season concludes, in quotes, ie "2021" | ||
#' | ||
#' @return returns a character vector of all fbref match URLs for a given competition, season and gender | ||
#' | ||
#' @importFrom magrittr %>% | ||
#' @importFrom rlang .data | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' get_match_urls(country = "ENG", gender = "M", season_end_year = "2020") | ||
#' } | ||
|
||
get_match_urls <- function(country, gender, season) { | ||
main_url <- "https://fbref.com" | ||
|
||
country <- toupper(country) | ||
gender <- toupper(gender) | ||
season <- season | ||
|
||
selected_season <- .get_league_season_url(country, gender, season) | ||
|
||
fixtures_url <- xml2::read_html(selected_season) %>% | ||
rvest::html_nodes(".hoversmooth") %>% | ||
rvest::html_nodes(".full") %>% | ||
rvest::html_nodes("a") %>% | ||
rvest::html_attr("href") %>% .[grepl("Fixtures", .)] %>% paste0(main_url, .) | ||
|
||
match_report_urls <- xml2::read_html(fixtures_url) %>% | ||
# html_nodes(".left~ .left+ .left a") %>% | ||
rvest::html_nodes("td.left~ .left+ .left a") %>% | ||
rvest::html_attr("href") %>% | ||
paste0(main_url, .) %>% unique() | ||
|
||
return(match_report_urls) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
utils::globalVariables(c(".")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#' Get Tier 1 competitions | ||
#' | ||
#' Returns a df of the top leagues around the world | ||
#' | ||
#' @return returns a dataframe with the tier 1 competitions from around the world | ||
#' | ||
#' @importFrom magrittr %>% | ||
#' @importFrom rlang .data | ||
#' | ||
|
||
.get_tier1_competitions <- function() { | ||
main_url <- "https://fbref.com" | ||
# read page to all competitions | ||
all_comps_url <- xml2::read_html("https://fbref.com/en/comps/") | ||
# this just gets the Tier 1 club comps - this will need to be modified if more comps are required | ||
comps <- all_comps_url %>% rvest::html_nodes("#all_comps_1_fa_club_league_senior") | ||
# get the urls for each competition, then paste fbref url | ||
competition_urls <- comps %>% rvest::html_node("tbody") %>% rvest::html_nodes("th a") %>% rvest::html_attr("href") | ||
competition_urls <- paste0(main_url, competition_urls) | ||
# scrape the table that contains the competitons | ||
competitions <- comps %>% rvest::html_nodes(".sortable") %>% rvest::html_table() %>% data.frame() | ||
# add the competition url column | ||
competitions <- cbind(competitions, competition_urls) | ||
# remove the two character country code for the flag, and only leave the 3 character code | ||
competitions$Country <- gsub(".*? ", "", competitions$Country) | ||
|
||
return(competitions) | ||
} | ||
|
||
|
||
|
||
#' Get URL of league season | ||
#' | ||
#' Returns a URL for the selected league seasons | ||
#' | ||
#' @param country the three character country code | ||
#' @param gender gender of competition, either "M" or "F" | ||
#' @param season the year the season concludes, in quotes, ie "2021" | ||
|
||
#' @return a URL for the selected league seasons | ||
#' | ||
#' @importFrom magrittr %>% | ||
#' @importFrom rlang .data | ||
#' | ||
.get_league_season_url <- function(country, gender, season) { | ||
main_url <- "https://fbref.com" | ||
|
||
competitions <- .get_tier1_competitions() | ||
|
||
league_url <- competitions %>% | ||
dplyr::filter(toupper(.data$Country) == toupper(country), | ||
toupper(.data$Gender) == toupper(gender)) %>% | ||
dplyr::pull(.data$competition_urls) | ||
|
||
league_url <- xml2::read_html(league_url) | ||
|
||
seasons <- league_url %>% | ||
rvest::html_nodes("th a") %>% | ||
rvest::html_text() %>% | ||
gsub(".*-", "", .) | ||
|
||
seasons_urls <- league_url %>% | ||
rvest::html_nodes("th a") %>% | ||
rvest::html_attr("href") %>% | ||
paste0(main_url, .) | ||
|
||
select_season <- cbind(seasons, seasons_urls) %>% data.frame() %>% | ||
dplyr::filter(seasons == season) %>% | ||
dplyr::pull(seasons_urls) | ||
|
||
return(select_season) | ||
|
||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX | ||
|
||
AutoAppendNewline: Yes | ||
StripTrailingWhitespace: Yes | ||
|
||
BuildType: Package | ||
PackageUseDevtools: Yes | ||
PackageInstallArgs: --no-multiarch --with-keep.source |