-
Notifications
You must be signed in to change notification settings - Fork 0
/
rvest_addThisLinkedInCourse.R
53 lines (36 loc) · 2.06 KB
/
rvest_addThisLinkedInCourse.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# scrape the LinkedIn Learning website for links to each video file
install.packages("xml2")
library(xml2)
install.packages("rvest")
library(rvest)
# Setup -------------------------------------------------------------------
# I expect URLToCourse to be something akin to https://www.linkedin.com/learning/r-programming-in-data-science-dates-and-times/calculating-times-and-dates-with-r
URLToCourse <- "https://www.linkedin.com/learning/code-clinic-r-2/welcome?u=2125562"
# https://www.linkedin.com/learning/r-programming-in-data-science-setup-and-start/welcome?u=2125562"
# https://www.linkedin.com/learning/r-programming-in-data-science-high-velocity-data/how-can-you-use-r-with-high-velocity-data?u=2125562"
# https://www.linkedin.com/learning/r-programming-in-data-science-high-volume-data/wrangling-high-volume-data-with-r?u=2125562"
# https://www.linkedin.com/learning/r-programming-in-data-science-high-variety-data/jumping-over-the-high-variety-hurdle?u=2125562"
# https://www.linkedin.com/learning/r-for-data-science-lunchbreak-lessons/weighted-mean?u=2125562"
# https://www.linkedin.com/learning/r-programming-in-data-science-dates-and-times/calculating-times-and-dates-with-r"
# run this ----------------------------------------------------------------
load("infoAboutLILCourses.rds")
thisPage <- read_html(URLToCourse)
alldivs <- html_nodes(thisPage, '.toc__item__link')
for( aVideo in alldivs ) {
# print(xml_attrs(aVideo))
# print(xml_attr(aVideo, "href"))
urlSplit <- strsplit(x = url_parse(xml_attr(aVideo, "href"))$path, "/")
tmpDataFrame <- data.frame(course = urlSplit[[1]][3], video = urlSplit[[1]][4])
infoAboutLILCourses <- rbind(infoAboutLILCourses, tmpDataFrame)
}
infoAboutLILCourses <- unique(infoAboutLILCourses)
save(infoAboutLILCourses, file = "infoAboutLILCourses.rds")
# example add this to files
affiliate <- "https://linkedin-learning.pxf.io/rwkly_dataSets"
video <- "r-built-in-data-sets"
topics <- "data()"
# stuff for later
course <- " "
video <- " "
infoAboutLILCourses <- data.frame(course, video)
infoAboutLILCourses <- infoAboutLILCourses[-1,]