Test realase (#12) (#13)

* File and directory creation * Adding proxy scrapper script. Result: list of elite proxy with port * Script edited. Deleted unneeded lines, deleted bs, function returns clear dataframe. * Saving to csv added * Delete duplicates script with update of list in file added * Exception FileNotFound added. Checking status code. * Result of proxy scrapper * Convert proxy to list. Files reorganization. * Header list. * ImportError fix csv file added to gitignore * Scrapping world's list from tibia.com * Scrapping character information from tibia.com. * Temporary data file * Highscores scrapper. Early version * Deleted proxy usage. Deleted unneeded code. * Deleted unneeded code. * Main page html code added. Footer changing font size. Added YouTube, Twitch buttons to nav bar * Fixing spacing, typo and content information for future work * Test news added * Database added. Creating models.
jakubg89 · Dec 2, 2022 · f8624c8 · f8624c8
1 parent 380dc4a
commit f8624c8
Show file tree

Hide file tree

Showing 13 changed files with 644 additions and 12 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,6 @@
 manage.py
 tibia_stats/settings.py
 ignore/
-static/root/
+static/root/
+scripts/proxy_scrapper/proxy_list.csv
+scripts/tibiacom_scrapper/temp
diff --git a/main/models.py b/main/models.py
@@ -1,3 +1,31 @@
 from django.db import models
 
-# Create your models here.
+
+class News(models.Model):
+    news_id = models.AutoField(primary_key=True)
+    id_on_tibiacom = models.SmallIntegerField(blank=True, null=True)
+    url_tibiacom = models.CharField(unique=True, max_length=255, blank=True, null=True)
+    type = models.CharField(max_length=20, blank=True, null=True)
+    content = models.TextField(blank=True, null=True)
+    content_html = models.TextField(blank=True, null=True)
+
+    class Meta:
+        managed = False
+        db_table = 'news'
+
+
+class World(models.Model):
+    world_id = models.SmallIntegerField(primary_key=True)
+    name = models.CharField(max_length=45, blank=True, null=True)
+    name_value = models.CharField(max_length=45, blank=True, null=True)
+    pvp_type = models.CharField(max_length=30, blank=True, null=True)
+    pvp_type_value = models.IntegerField(blank=True, null=True)
+    battleye_protected = models.CharField(max_length=45, blank=True, null=True)
+    battleye_date = models.DateField(blank=True, null=True)
+    battleye_value = models.IntegerField(blank=True, null=True)
+    location = models.CharField(max_length=45, blank=True, null=True)
+    location_value = models.IntegerField(blank=True, null=True)
+
+    class Meta:
+        managed = False
+        db_table = 'world'
diff --git a/scripts/proxy_scrapper/proxy_functions.py b/scripts/proxy_scrapper/proxy_functions.py
@@ -0,0 +1,28 @@
+import pandas as pd
+from .proxy_scrapper import scrap_proxy
+
+
+def delete_duplicates():
+    try:
+        proxy_list = pd.read_csv('proxy_list.csv', sep=':')
+        proxy_list = proxy_list.drop_duplicates()
+        proxy_list.to_csv('proxy_list.csv',
+                          encoding='utf-8',
+                          index=False,
+                          sep=':',
+                          header=False,
+                          mode='w')
+    except FileNotFoundError:
+        scrap_proxy()
+        delete_duplicates()
+
+
+def get_proxy_list():
+    temp_list = []
+    with open('proxy_list.csv') as file:
+        for line in file:
+            line = line.strip()
+            temp_list.append(line)
+    file.close()
+    temp_list.pop()
+    return temp_list
diff --git a/scripts/proxy_scrapper/proxy_list.csv b/scripts/proxy_scrapper/proxy_list.csv
@@ -0,0 +1,125 @@
+149.129.187.190:3128
+169.57.1.85:8123
+83.229.73.175:80
+8.209.68.1:8080
+83.229.72.174:80
+20.111.54.16:80
+103.87.169.146:56642
+172.105.107.25:999
+110.164.3.7:8888
+20.210.113.32:8123
+20.24.43.214:8123
+208.82.61.66:3128
+167.71.199.211:33195
+198.49.68.80:80
+157.230.122.224:80
+204.185.204.64:8080
+217.160.50.205:80
+68.183.134.155:80
+64.225.97.57:8080
+134.238.252.143:8080
+14.139.242.7:80
+130.18.255.115:8080
+180.94.69.66:8080
+128.199.243.33:44783
+80.48.119.28:8080
+49.0.2.242:8090
+164.62.72.90:80
+197.242.159.51:80
+43.255.113.232:8082
+151.181.91.10:80
+153.127.35.151:8000
+105.174.7.254:8080
+47.242.43.30:1080
+51.79.50.46:9300
+198.59.191.234:8080
+200.105.215.22:33630
+209.97.173.38:44433
+165.22.60.129:34311
+91.209.11.131:80
+190.92.134.220:80
+82.165.184.53:80
+43.205.33.122:80
+167.71.212.129:35111
+58.27.59.249:80
+185.15.172.212:3128
+45.56.98.229:49857
+52.200.191.158:80
+121.139.171.39:80
+167.172.158.85:81
+143.198.82.124:8081
+139.59.39.34:80
+121.139.171.32:80
+20.205.46.128:80
+121.139.171.43:80
+143.110.232.177:80
+43.135.156.58:59394
+43.135.157.80:59394
+87.247.186.105:80
+62.171.188.233:8000
+118.27.113.167:8080
+176.56.107.234:33911
+43.135.156.130:59394
+133.242.171.216:3128
+54.211.109.92:80
+176.196.250.86:3128
+188.0.147.102:3128
+121.139.171.49:80
+185.237.99.218:61443
+134.122.58.174:80
+54.36.239.180:5000
+154.236.189.29:8080
+159.203.31.27:8118
+103.59.88.24:81
+115.96.208.124:8080
+5.2.75.58:8118
+112.140.186.124:808
+165.192.111.151:3129
+200.25.254.193:54240
+64.227.23.88:8118
+138.201.125.229:8118
+189.3.169.34:9812
+154.19.187.251:3128
+41.65.236.43:1976
+45.138.135.94:8080
+151.248.117.248:80
+5.189.184.6:80
+153.120.6.180:8181
+121.139.171.51:80
+201.229.250.19:8080
+121.139.171.46:80
+121.139.171.26:80
+82.165.105.48:80
+121.139.171.4:80
+47.254.47.61:8080
+178.79.138.253:8080
+194.195.213.197:1080
+20.106.201.217:5003
+187.217.54.84:80
+121.58.254.35:3128
+122.49.208.243:3128
+47.245.34.161:8080
+139.162.44.152:57114
+182.72.203.246:80
+198.11.175.180:8080
+212.71.255.43:38613
+147.139.164.26:8080
+8.209.249.96:8080
+172.105.190.51:8017
+37.112.57.47:8080
+159.192.249.87:8080
+121.1.41.162:111
+121.139.171.1:80
+157.230.241.133:33273
+52.197.8.131:80
+117.102.202.230:8080
+178.128.110.95:42451
+183.181.8.173:11070
+1.224.3.122:3888
+8.219.5.240:8081
+51.75.206.209:80
+172.104.252.86:80
+96.126.103.64:9991
+43.154.116.244:24000
+173.255.209.155:1080
+168.119.53.93:80
diff --git a/scripts/proxy_scrapper/proxy_scrapper.py b/scripts/proxy_scrapper/proxy_scrapper.py
@@ -0,0 +1,18 @@
+import requests
+import pandas as pd
+
+
+def scrap_proxy():
+    url = 'https://free-proxy-list.net/'
+    request = requests.get(url)
+
+    if request.status_code == 200:
+        proxy_df = pd.DataFrame(data=pd.read_html(request.text)[0])
+        proxy_df = proxy_df[proxy_df['Anonymity'] == 'elite proxy']
+        proxy_df = proxy_df[['IP Address', 'Port']].reset_index(drop=True).drop_duplicates(subset='IP Address')
+        proxy_df.to_csv('proxy_list.csv',
+                        encoding='utf-8',
+                        index=False,
+                        sep=':',
+                        header=False,
+                        mode='a')
diff --git a/scripts/tibiacom_scrapper/character_info.py b/scripts/tibiacom_scrapper/character_info.py
@@ -0,0 +1,60 @@
+# Return dict with information about specific character
+
+import requests
+from bs4 import BeautifulSoup
+
+
+def check_name(name):
+    player_name = name
+
+    # Checking if name contains letters only, if yes than it perform scrapping data
+    if player_name.replace(' ', '').isalpha():
+        info = get_char_info(player_name)
+        does_it_exist = info[1]
+        information = info[0]
+        return does_it_exist, information
+    else:
+        information = 'The name contains characters that are not allowed.'
+        does_it_exist = False
+        return does_it_exist, information
+
+
+# scrapping information about specific character and return dictionary with information from tibia.com
+def get_char_info(name):
+    player_name = name
+    player_info = {}
+    rows_count = 0
+    url = ('https://www.tibia.com/community/?subtopic=characters&name=' + player_name)
+    soup = BeautifulSoup(requests.get(url).text, 'html.parser')
+
+    find_cell = soup.findAll('td')
+    is_char = find_cell[0].text
+    if is_char.find('does not exist') >= 0:
+        player_info.update({player_name: ' does not exist.'})
+        does_it_exist = False
+    else:
+        raw_cells = soup.findAll('td')
+        player_info_list = []
+        del raw_cells[0]
+        for i in raw_cells:
+            player_info_list.append(i.text)
+
+        del player_info_list[0]
+        for i in player_info_list:
+            rows_count += 1
+            if i == 'Premium Account' or i == 'Free Account':
+                break
+
+        for i in range(0, rows_count, 2):
+            player_info.update({player_info_list[i].replace(':', ''): player_info_list[i + 1]})
+            i -= 1
+
+        # Deleting comment section from data
+        for key in player_info.keys():
+            if key == 'Comment':
+                player_info.pop('Comment')
+                break
+
+        does_it_exist = True
+
+    return player_info, does_it_exist
diff --git a/scripts/tibiacom_scrapper/data.py b/scripts/tibiacom_scrapper/data.py
@@ -0,0 +1,34 @@
+# professions
+# 1 : None
+# 2 : Knights
+# 3 : Paladins
+# 4 : Sorcerers
+# 5 : Druids
+def profession():
+    prof = [2, 3, 4, 5]
+    return prof
+
+
+# category - can be picked any to scrap whole highscores of selected category
+# 1 : Achievements
+# 2 : Axe Fighting
+# 15 : Boss Points
+# 3 : Charm Points
+# 4 : Club Fighting
+# 5 : Distance Fighting
+# 14 : Drome Score
+# 6 : Experience Points
+# 7 : Fishing
+# 8 : Fist Fighting
+# 9 : Goshnar's Taint
+# 10 : Loyalty Points
+# 11 : Magic Level
+# 12 : Shielding
+# 13 : Sword Fighting
+def category():
+    # skill = [1, 2, 15, 3, 4, 5, 14, 6, 7, 8, 9, 10, 11, 12, 13]
+    skill = 6
+    return skill
+
+
+
diff --git a/scripts/tibiacom_scrapper/headers.py b/scripts/tibiacom_scrapper/headers.py
@@ -0,0 +1,3 @@
+def headers():
+    headers_list = {'User-Agent': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36'}
+    return headers_list