Skip to content

Commit 0c27339

Browse files
committed
common roadblocks
1 parent 95ec145 commit 0c27339

File tree

5 files changed

+1176
-0
lines changed

5 files changed

+1176
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import requests"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 2,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36\"}"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 4,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"r = requests.get('https://www.youtube.com', headers = headers)"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 5,
33+
"metadata": {},
34+
"outputs": [
35+
{
36+
"data": {
37+
"text/plain": [
38+
"200"
39+
]
40+
},
41+
"execution_count": 5,
42+
"metadata": {},
43+
"output_type": "execute_result"
44+
}
45+
],
46+
"source": [
47+
"r.status_code"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": []
56+
}
57+
],
58+
"metadata": {
59+
"kernelspec": {
60+
"display_name": "Python 3",
61+
"language": "python",
62+
"name": "python3"
63+
},
64+
"language_info": {
65+
"codemirror_mode": {
66+
"name": "ipython",
67+
"version": 3
68+
},
69+
"file_extension": ".py",
70+
"mimetype": "text/x-python",
71+
"name": "python",
72+
"nbconvert_exporter": "python",
73+
"pygments_lexer": "ipython3",
74+
"version": "3.7.6"
75+
}
76+
},
77+
"nbformat": 4,
78+
"nbformat_minor": 4
79+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
<!DOCTYPE html>
2+
<html>
3+
4+
<head>
5+
<title>HTML Form</title>
6+
</head>
7+
8+
<body>
9+
10+
<form action="/secure/users/sign_in?after_success_url=%2Fhome" method="post">
11+
12+
<label for="user_email">Email Address</label>
13+
<input type="email" name="user[email]" id="user_email">
14+
15+
<label for="user_password">Password</label>
16+
<input type="password" name="user[password]" id="user_password">
17+
18+
<input type="hidden" name="utf-8" value="">
19+
<input type="hidden" name="authenticity_token" value="/VIF79Gh0/GAHAZziuS3RR5L8u8==">
20+
21+
<input type="submit" value="Log In">
22+
23+
</form>
24+
25+
</body>
26+
27+
</html>
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import requests"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 2,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"# URL of the POST request - need to inspect the HTML or use devtools to obtain\n",
19+
"url = \"target_url_of_post_request\""
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": 3,
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"# Define parameters sent with the POST request\n",
29+
"# (if there are additional ones, define them as well)\n",
30+
"user = \"Your username goes here\"\n",
31+
"password = \"Your password goes here\""
32+
]
33+
},
34+
{
35+
"cell_type": "code",
36+
"execution_count": 4,
37+
"metadata": {},
38+
"outputs": [],
39+
"source": [
40+
"# Arrange all parameters in a dictionary format with the right names\n",
41+
"payload = {\n",
42+
" \"user[email]\": user,\n",
43+
" \"user[password]\": password\n",
44+
"}"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": 5,
50+
"metadata": {},
51+
"outputs": [],
52+
"source": [
53+
"# Create a session so that we have consistent cookies\n",
54+
"s = requests.Session()"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": 6,
60+
"metadata": {},
61+
"outputs": [
62+
{
63+
"data": {
64+
"text/plain": [
65+
"200"
66+
]
67+
},
68+
"execution_count": 6,
69+
"metadata": {},
70+
"output_type": "execute_result"
71+
}
72+
],
73+
"source": [
74+
"# Submit the POST request through the session\n",
75+
"p = s.post(url, data = payload)\n",
76+
"p.status_code"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": 7,
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"# You are now logged in and can proceed with scraping the data\n",
86+
"# .\n",
87+
"# .\n",
88+
"# .\n",
89+
"\n",
90+
"# Don't forget to close the session when you are done\n",
91+
"s.close()"
92+
]
93+
}
94+
],
95+
"metadata": {
96+
"kernelspec": {
97+
"display_name": "Python 3",
98+
"language": "python",
99+
"name": "python3"
100+
},
101+
"language_info": {
102+
"codemirror_mode": {
103+
"name": "ipython",
104+
"version": 3
105+
},
106+
"file_extension": ".py",
107+
"mimetype": "text/x-python",
108+
"name": "python",
109+
"nbconvert_exporter": "python",
110+
"pygments_lexer": "ipython3",
111+
"version": "3.7.6"
112+
}
113+
},
114+
"nbformat": 4,
115+
"nbformat_minor": 2
116+
}

0 commit comments

Comments
 (0)