Skip to content

Commit 4f1a86c

Browse files
committed
Initial commit
0 parents  commit 4f1a86c

File tree

8 files changed

+384
-0
lines changed

8 files changed

+384
-0
lines changed

.gitignore

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
*.ini
2+
!*.sample.ini
3+
*.csv
4+
!*.sample.csv
5+
*.txt
6+
!*.sample.txt
7+
!requirements.txt
8+
*.json
9+
!*.sample.json
10+
11+
12+
# General
13+
.DS_Store
14+
.AppleDouble
15+
.LSOverride
16+
17+
# Icon must end with two \r
18+
Icon
19+
20+
# Thumbnails
21+
._*
22+
23+
# Files that might appear in the root of a volume
24+
.DocumentRevisions-V100
25+
.fseventsd
26+
.Spotlight-V100
27+
.TemporaryItems
28+
.Trashes
29+
.VolumeIcon.icns
30+
.com.apple.timemachine.donotpresent
31+
32+
# Directories potentially created on remote AFP share
33+
.AppleDB
34+
.AppleDesktop
35+
Network Trash Folder
36+
Temporary Items
37+
.apdisk
38+
39+
40+
# Byte-compiled / optimized / DLL files
41+
__pycache__/
42+
*.py[cod]
43+
*$py.class
44+
45+
# Scrapy stuff:
46+
.scrapy
47+
48+
# pyenv
49+
.python-version
50+
51+
# Environments
52+
.env
53+
.venv
54+
env/
55+
venv/
56+
ENV/
57+
env.bak/
58+
venv.bak/

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# DataForSEO SERP API Python Client
2+
3+
## Setup
4+
5+
## Usage
6+
7+
### 1. Post tasks
8+
9+
### 2. See if tasks are ready
10+
11+
### 3. Get results
12+

client.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from http.client import HTTPSConnection
2+
from base64 import b64encode
3+
from json import loads
4+
from json import dumps
5+
6+
class RestClient:
7+
domain = "api.dataforseo.com"
8+
# domain = "sandbox.dataforseo.com"
9+
10+
def __init__(self, username, password):
11+
self.username = username
12+
self.password = password
13+
14+
def request(self, path, method, data=None):
15+
connection = HTTPSConnection(self.domain)
16+
try:
17+
base64_bytes = b64encode(
18+
("%s:%s" % (self.username, self.password)).encode("ascii")
19+
).decode("ascii")
20+
headers = {'Authorization' : 'Basic %s' % base64_bytes}
21+
connection.request(method, path, headers=headers, body=data)
22+
response = connection.getresponse()
23+
return loads(response.read().decode())
24+
finally:
25+
connection.close()
26+
27+
def get(self, path):
28+
return self.request(path, 'GET')
29+
30+
def post(self, path, data):
31+
if isinstance(data, str):
32+
data_str = data
33+
else:
34+
data_str = dumps(data)
35+
return self.request(path, 'POST', data_str)

config.sample.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[general]
2+
user =
3+
password =

requirements.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
astroid==2.2.5
2+
beautifulsoup4==4.6.0
3+
bs4==0.0.1
4+
certifi==2017.11.5
5+
charade==1.0.3
6+
chardet==3.0.4
7+
cmdline-csv2keychain==0.1.3
8+
idna==2.6
9+
isort==4.3.21
10+
JPype1==0.6.2
11+
lazy-object-proxy==1.4.1
12+
lxml==4.1.1
13+
mccabe==0.6.1
14+
nltk==3.3
15+
numpy==1.15.2
16+
pandas==0.23.4
17+
pylint==2.3.1
18+
python-dateutil==2.7.3
19+
pytz==2018.5
20+
requests==2.18.4
21+
six==1.11.0
22+
typed-ast==1.4.0
23+
urllib3==1.22
24+
validate-email==1.3
25+
wrapt==1.11.2

tasks_get.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
####
2+
## DATAFORSEO SERPS API
3+
##
4+
## Get available tasks results
5+
####
6+
7+
import csv
8+
import configparser
9+
import argparse
10+
import time
11+
import datetime
12+
from client import RestClient
13+
14+
15+
if __name__ == '__main__':
16+
parser = argparse.ArgumentParser()
17+
parser.add_argument('--config', default="config.ini",
18+
type=str, help='Global config file (default: "config.ini")')
19+
parser.add_argument('--output', default="keyword-results",
20+
type=str, help='Output basename (default: "keyword-results")')
21+
parser.add_argument('--delay', default=10,
22+
type=float, help='Delay in seconds between batches of requests (default: 10)')
23+
args = parser.parse_args()
24+
25+
conf = configparser.ConfigParser()
26+
conf.read(args.config)
27+
user = conf['general']['user']
28+
password = conf['general']['password']
29+
30+
# Output headers
31+
fields=['task_id','status','request','request_type','domain','location_code','language_code','timestamp','results_count','rank_group','rank_absolute','result_type','title','description','url','breadcrumb']
32+
# Output name
33+
timestr = time.strftime("%Y%m%d-%H%M%S")
34+
tag = args.output + "-" + timestr
35+
filename = tag + ".csv"
36+
37+
with open(filename,'w',newline='') as file:
38+
writer = csv.DictWriter(file, fieldnames=fields, delimiter=";")
39+
writer.writeheader()
40+
file.close()
41+
42+
client = RestClient(user,password)
43+
44+
# While there are results, request the next batch
45+
next_batch = True
46+
while next_batch:
47+
response = client.get("/v3/serp/google/organic/tasks_ready")
48+
if response['status_code'] == 20000:
49+
tasks_available = response["tasks"][0]["result_count"]
50+
print("{} tasks available".format(tasks_available))
51+
if tasks_available < 1:
52+
next_batch = False
53+
results = []
54+
for task in response['tasks']:
55+
if (task['result'] and (len(task['result']) > 0)):
56+
for resultTaskInfo in task['result']:
57+
if(resultTaskInfo['endpoint_regular']):
58+
results.append(client.get(resultTaskInfo['endpoint_regular']))
59+
60+
for result in results:
61+
for task in result["tasks"]:
62+
task_id = task['id']
63+
status = task['status_message']
64+
for kw in task["result"]:
65+
keyword = kw["keyword"]
66+
request_type = kw["type"]
67+
domain = kw["se_domain"]
68+
location_code = kw["location_code"]
69+
language_code = kw["language_code"]
70+
timestamp = kw["datetime"]
71+
results_count = kw["se_results_count"]
72+
73+
for item in kw["items"]:
74+
row = dict()
75+
row["task_id"] = task_id
76+
row["status"] = status
77+
row["request"] = keyword
78+
row["request_type"] = request_type
79+
row["domain"] = domain
80+
row["location_code"] = location_code
81+
row["language_code"] = language_code
82+
row["timestamp"] = timestamp
83+
row["results_count"] = results_count
84+
row["result_type"] = item["type"]
85+
row["rank_group"] = item["rank_group"]
86+
row["rank_absolute"] = item["rank_absolute"]
87+
row["title"] = item["title"]
88+
row["description"] = item["description"]
89+
row["url"] = item["url"]
90+
row["breadcrumb"] = item["breadcrumb"]
91+
92+
with open(filename,'a',newline='') as file:
93+
writer = csv.DictWriter(file, fieldnames=fields, delimiter=";")
94+
writer.writerow(row)
95+
file.close()
96+
97+
print("Batch done.")
98+
time.sleep(args.delay)
99+
else:
100+
next_batch = False
101+
print("error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))

tasks_post.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
####
2+
## DATAFORSEO SERPS API
3+
##
4+
## Post tasks
5+
####
6+
7+
import csv
8+
import configparser
9+
import argparse
10+
import time
11+
import datetime
12+
from client import RestClient
13+
14+
def range_limited_float_type(arg):
15+
""" Type function for argparse - a float within some predefined bounds """
16+
try:
17+
f = int(arg)
18+
except ValueError:
19+
raise argparse.ArgumentTypeError("Must be an integer")
20+
if f < 1 or f > 100:
21+
raise argparse.ArgumentTypeError("Argument must be < " + str(1) + "and > " + str(100))
22+
return f
23+
24+
25+
if __name__ == '__main__':
26+
parser = argparse.ArgumentParser()
27+
parser.add_argument('--config', default="config.ini",
28+
type=str, help='Global config file (default: "config.ini")')
29+
parser.add_argument('--input', required=True,
30+
type=str, help='List of keywords to request')
31+
parser.add_argument('--output', default="keyword-requests",
32+
type=str, help='Output basename (default: "keyword-requests")')
33+
parser.add_argument('--language_code', default="fr",
34+
type=str, help='Language code for requests (default: "fr")')
35+
parser.add_argument('--location_code', default=2250,
36+
type=int, help='Location code for requests (default: "2250" for France, get other codes on <https://api.dataforseo.com/v3/serp/google/locations>)')
37+
parser.add_argument('--nb_results', default=10,
38+
type=int, help='Number of results (default: 10)')
39+
parser.add_argument('--device', choices=['desktop','mobile'], default="desktop",
40+
help='Device type (default:"desktop")')
41+
parser.add_argument('--priority', choices=['high','low'], default='low',
42+
help='Priority queue (default: "low")')
43+
parser.add_argument('--batch', default=100,
44+
type=range_limited_float_type, help='Max number of tasks per batch (default: 100)')
45+
parser.add_argument('--delay', default=10,
46+
type=float, help='Delay in seconds between batches of requests (default: 10)')
47+
parser.add_argument('--sep', default=";",
48+
type=str, help='CSV file separator (default: ";")')
49+
args = parser.parse_args()
50+
51+
# Read list of requests
52+
with open(args.input,'r') as file:
53+
kws = list()
54+
for line in file.readlines():
55+
kws.append(str.strip(line))
56+
file.close()
57+
58+
# Output headers
59+
fields=['request','status','id','tag']
60+
# Output name
61+
timestr = time.strftime("%Y%m%d-%H%M%S")
62+
tag = args.output + "-" + timestr
63+
filename = tag + ".csv"
64+
65+
print('Requests are tagged: {}'.format(tag))
66+
67+
# Set priority queue
68+
priority = {0:0,'low':1,'high':2}
69+
priority = priority[args.priority]
70+
71+
conf = configparser.ConfigParser()
72+
conf.read(args.config)
73+
user = conf['general']['user']
74+
password = conf['general']['password']
75+
76+
# Send requests
77+
with open(filename,'w',newline='') as file:
78+
writer = csv.DictWriter(file, fieldnames=fields,delimiter=";")
79+
writer.writeheader()
80+
81+
client = RestClient(user, password)
82+
83+
# We need to send batches of max 100 tasks
84+
i = 0
85+
j = args.batch
86+
87+
# Cut the kws list in batches
88+
while j < len(kws)+args.batch:
89+
post_data = dict()
90+
for kw in kws[i:j]:
91+
post_data[len(post_data)] = dict(
92+
language_code=args.language_code,
93+
location_code=args.location_code,
94+
keyword=kw,
95+
priority=priority,
96+
depth=args.nb_results,
97+
device=args.device,
98+
tag=tag,
99+
)
100+
101+
response = client.post("/v3/serp/google/organic/task_post", post_data)
102+
if response["status_code"] == 20000:
103+
for task in response["tasks"]:
104+
data = dict()
105+
data["request"] = task["data"]["keyword"]
106+
data["status"] = task["status_message"]
107+
data["id"] = task["id"]
108+
data["tag"] = task["data"]["tag"]
109+
writer.writerow(data)
110+
print("Batch {} done.".format(int((i/args.batch) + 1)))
111+
else:
112+
print("Error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))
113+
114+
i = j
115+
j += args.batch
116+
time.sleep(args.delay)
117+

tasks_ready.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
####
2+
## DATAFORSEO SERPS API
3+
##
4+
## Shows number of tasks ready for download
5+
####
6+
7+
import csv
8+
import configparser
9+
import argparse
10+
import time
11+
import datetime
12+
from client import RestClient
13+
14+
15+
if __name__ == '__main__':
16+
parser = argparse.ArgumentParser()
17+
parser.add_argument('--config', default="config.ini",
18+
type=str, help='Global config file (default: "config.ini")')
19+
args = parser.parse_args()
20+
21+
conf = configparser.ConfigParser()
22+
conf.read(args.config)
23+
user = conf['general']['user']
24+
password = conf['general']['password']
25+
26+
client = RestClient(user,password)
27+
28+
response = client.get("/v3/serp/google/organic/tasks_ready")
29+
if response["status_code"] == 20000:
30+
tasks_available = response["tasks"][0]["result_count"]
31+
print("{} tasks available".format(tasks_available))
32+
else:
33+
print("error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))

0 commit comments

Comments
 (0)