-
Notifications
You must be signed in to change notification settings - Fork 0
/
req.py
113 lines (89 loc) · 34.9 KB
/
req.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import requests, os
from bs4 import BeautifulSoup
os.system("clear")
def check_site(ministry=27, day=22, month=9, year=2023):
url = "https://pib.gov.in/Allrel.aspx"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/117.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Content-Type": "application/x-www-form-urlencoded",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
}
data = {
"script_HiddenField": "",
"__EVENTTARGET": "ctl00$ContentPlaceHolder1$ddlday",
"__EVENTARGUMENT": "",
"__LASTFOCUS": "",
"__VIEWSTATE": "",
"__VIEWSTATEGENERATOR": "CBED066B",
"__VIEWSTATEENCRYPTED": "",
"__EVENTVALIDATION": "DjLTgSccy/KUWijW/ADZ+fywV/h9r03Lp0x/l0NiYc29pjsKPhgr8qS9SoSc0XE/3ejGT3GUJQwD2ZWlqEJE9yGiQxlKOaGBlJQ6A1fCTOUWLbpQ4j1gS6MisT5vgwsM2MxFR7cp070gMgsJG0PWyynxXitAVSEO/30w3Z5LjUGYWABhFhs61lIa1htxjE8RrGsOpZGP0JC4LIRrlEj9MdBDFmJvU91UceAFopv3M59g2fOfxBt6hNdVwj0LnCcwtkkJ7oLERma3a2UDFsd6LB4m7BvD+DIIBSr75gGicu60dmH+fSSu2QDYOCIzHl0vykTLGdalX5RpUaAY1ePJFv+kEGsghqffDAvAarTDcin3QOfUOm7BPOQHNJeE6vghY9DuA7GkOAgtRd1TelRJfXGNUWCWOPLZjNJjGZ2udjKzHEqU1UU6xftvjk8cN+Uc0N/7QCKZymee/ifveF+hvptIDHnMlgoXTYzpTUvWUtN0yfIQ1VBeVQGDw0XwpI9tWPsgaXXPGMigfOOSTVLNthIuwYguD1pCDJLluZp47PR27DIeXLUEJJEvpBFjnmdjyN3MtsixRx7RqQDCT3+6e5s+cjyAsWuwMQdePvDcBSyIEwi++4bOfYq1qBhGdjlA9BwNaR3BIcbcq35QnJ1sM/1yqZYmaX2RWLLnWrmapiKPm2jLdTejJi2HL35lRdgn1vZZCxZ091SeuDDQbgg3vEOByvubqOhTCTdZ3y8gBr2AnbhAjqaYo52UyMoBkwBvftNFmtGeFz8rE6cfrj/FYRu+JhRv+mLqZerjk/2lEtcqU/NnE1XRXRoZbpFxXMhOGOi0LzdugoyLWDW/cr1HZulsmzODwJl7by6/hSU0659Vd+IOjLzl+rd3m2aFP7lZcem2LQrENRxmb5DAAd+SGYmvXuATpbScRsE2CWXkFZF7OZRPTUyT3ljNyaEaqOk5CR+8W+OuoaHcaKyMMnJrBcOCGDAVTjTujVrfVsNPfTP5eJmp3/yURIAReFn/yPtmC160IkS2BbpHOwR3w2AsEpDU6862IGiupaR//ZifmTCVSBBJFgD6ITgtXgfJqWXs8qKYj3FYkn0f/gT8iz4zxkeQsGAVMcPT8gheTjm4br9ckVeriGVuD63ZxDmR/E4VW4y0lglQRzVCVS7/axwc3UpjZvl+Gn3+1MLxSIx3bN/6et593UPaCjn0MJevJzf0u69tTctzP4dO6q2oBOf2MouFLT9F65h1is0i45JBInEawPvRcPOmBhYrIkGKQFUx7DE3BkB7zIn2qJevjWZep+rUHWMjtWiNe9jc/2vp/9BufrU9PtfY54fQyUqdqZb6MYlhOdAFZTqdq4KMibrQPMDzk4uzEdJxI3qXpy2r/ZorH+7EgrktSLKY4pyTOBDXi+HY5flgRiHAf7u8QKj34Y+vOIcVOR6LukR/piERL4eGYsva+E3yAMYQiod3tOG2Ec8k7/5vGKnxbznnkIxyt26yRxCGffDcJjy+RaUM8owSdpgTKeZ5fOL3lXFfK3ouEG1apo2XIBPwvqeoKi1fSbWgdvQeBeQ6SjhTCb1gahm1yQephH/vDhiUY4SOMkY2hElXjoYNpi/5FirgAB6nu76nQ3kJ7GGuMo8v6g0pPXMfy5UwvHCQ4f/EueIlIaXMhhnIr6Y8fM8nQHLko6GDTJzvLRLxb6B+ivR+4EKYz/kvBo2ToW7IxlzUDYLkWjLYluV4s5hwHzg83x7uU0UB59u509lxgSDJFOAyLdxi95Y6bSaiSbiNHlXqGJ4b4IZhmd9FEmXEGEksIa3lJsZ5SLHo1vrbnCcVteYHx58wxXPvSbCKe7bPH5KkxzDb0K2XasZZDX2OEA7bU/N4stxJo5GRinHqPGd7bx8X+HUMZz58R2sP9mqeoFv0tLyWMTLHuVl+iolQp1kbnaDbi+Z9rJGRgluVFkbxWG3b1PoxSTTKqalG9w5M2MyqPkP5cWMnY1yNlenCPNnpivw+lPIiGn4tHwnN2pNYx7pPoOlvCvKql4MxXTbNdzJsa+CO+w58HyckA7vev3YmkUf9bS6X1d6VrMH+TC63A2YG/woB9GaCjSPUgjeqFo3RSHR/Yen6SC5lFFRb7B9FkbVfY3DcezimI2AEY6m6V8EVhdc0SphDNaYlqG2t8xvepp8Jftzk8XiRCu+WDKqonGfQPbDnm5UVr4rwQ7FXRzhtDEkfb5Nw7kBi33XQTFV3N21pAU2aG7FVGxoeYKGgwqXgrGOM9UlZ7TTPl/udU7eYTNQmKn8jkxCTlq4vSjkZ+6EkRkzepVVnXmkK2ReO4X0mCBQvNA1ylP085kJzGMxDagwukkK0NEngjTGDlZg53L2vjSdzZG3QIFMMLFAH/N0MCzmDunoMR6oRqBRETvtYM0nQBtZkFdfh+QPrAaqt3hVvxedoE7y7trvj8QcKOf+HDrRINhAy0MUQ/wMY1iZTM4uEKpDlZJHFblxB2cSyskOIfW/aji7Gfqn1lLJu2txmPzGffq2MEkuy+JvGSmnF/la6BYb2DThb2yQdqMCwIhruJaaHijwaSMT8oZRCYBeox1j92tlI/M5HsngOrT5CCZY+7OEdjql+4S8d0rym9sRMBCtK8B8lyERnQE7DsU4fTor3u972fUNx8nn1JF297iAnVsCcVdj7hwepm5fmtfulDfe9nwNoN9YxvBPckdNHY3ny1hk5g4zu/WkYAdmU+tQTiwyL+ic4wzeISompiZKcqJ3aSYGBx04zvuv4oE5t2uVu0it/+thY5aZ8rHg7tRYyYT9y2OKQELKHrX+f/SFLcPTFKrP/DXJoEK2lvnGFQoNHJonf36eVofzA0RvM+9JPpOqCavyoSQGJ8MjCI8UTzyJgkVrWJGo/Y11sz5vEbm3727aKNFMUL8LLumf3KiS4HWxPUlawZ7VyUkIRajjZDHkpjm/CHzFj3Ia6/WbHdjEwHxCx5sWE6hzrFt1WsRmXSjugzJ5XQQ9kpxnyqCWeNjHKLegtqFaYj57Dc6zPggpebqgi1asytl6zjOQOWkrDBaa2vNfTJrKvfcSdsuPWRtxuI1rnrNETYxWvlonJIFfWXpfgYmV712aONHBSG9Mrnyu6KvZ+rnEY0QiYRpCiQw1VNMeElKiXb3ZghlW+u7+IE7d+K7vMujGRD2V2huCLJBI6d1Nx3R94+9o7Q1IZlwwyFPKRM2hSSgpgx9jwY7kUZEIx+2r/lkWbK80LPq7r8Y6B1ZmHHMKrqYaZt1LxzrRUeC7BlDyE5HMvuZQp+1F3yH4YhPxGxMVGii6de0SvTAMdvNfrCwmUOpXmQSiPFIIWAVpsltY7LO6iGnUQkH9ue5wAhGzL0U3F7n5nTmg2KEcOKRBlf0UhNXjpmhyWtV2DWK6pp9ZzWnWnfXCE+J3LZEf4H7tSYvZnIE5NiNwtfj21hfOXBN+wF4dOFxRXKYLKPXa1xAdUsSFzyA7zltaQZ6YxPmDAGNOMPEZuPho4RNB+akNl5i2mDPhFhYda2YunhBI57FbN0fcwQPVDq5vvnlJCpJVPR0WGVX1j4S8r5p2W4OK8yKaTiqdM7AwB/XuTqrVzPLSJJ3iY3s/uE1KuXE67reWX/uKoKUWUEwg+A5xhe3V0x1UY8flJn2dpGti93hJRw6HAYnY7KfdCCQsOcQdhmAxAvmTJODNwIBqkv3aCULWj3Jp7s7wnC3vJq4cWwFm5I6yibkTp7KQFgFshAm6oRxcy5W2rz3Pf3NvMkc23khCM3qfgmjhUO0TZnDyysqTuVF6W6L2dSIuyhBNb8UFlF4p/FRp5bVy3hL2qtaRJPyNh3lZNd2uxTAA1FCMESetHrqqgIFNiGjYINu2e1wcTaURX/LyGZugrhFittgwz4cV+",
"ctl00$Bar1$ddlregion": "3",
"ctl00$Bar1$ddlLang": "1",
"ctl00$ContentPlaceHolder1$hydregionid": "3",
"ctl00$ContentPlaceHolder1$hydLangid": "1",
"ctl00$ContentPlaceHolder1$ddlMinistry": str(ministry),
"ctl00$ContentPlaceHolder1$ddlday": str(day),
"ctl00$ContentPlaceHolder1$ddlMonth": str(month),
"ctl00$ContentPlaceHolder1$ddlYear": str(year)
}
response = requests.post(url, headers=headers, data=data, cookies={"ASP.NET_SessionId": "YOUR_SESSION_ID_HERE"})
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
# Step 3: Extract data from the parsed HTML
# For example, let's extract and print the title of the page
# title = soup.title.string
# print(f"Title of the page: {title}")
# Find the div with class "content-area"
content_div = soup.find('div', class_='content-area')
# Initialize a list to store the ministry data
ministry_data = []
# Find all the ul elements within the content_div
ul_elements = content_div.find_all('ul', recursive=False)
# Loop through each ul element
for ul in ul_elements:
# Find the h3 element (Ministry name)
#print(ul)
ministry_name = ul.find('h3').text
# Find the ul element with class "num"
num_ul = ul.find('ul', class_='num')
# Find all li elements within num_ul and extract the link text and href
info_list = [{"link_text": str(li.a.text).replace("\u200b", "").strip(), "href": "https://pib.gov.in" + li.a['href']} for li in num_ul.find_all('li')]
# Create a dictionary for the ministry and its info
ministry_info = {
"ministry": ministry_name,
"infos": info_list
}
# Append the ministry_info to the ministry_data list
ministry_data.append(ministry_info)
# Convert the ministry_data list to JSON format
import json
ministry_json = json.dumps(ministry_data, indent=4)
# Print the JSON representation
return { "msg" : ministry_json }
else:
return {
"msg" : "website not working",
"status_code": response.status_code
}
print(check_site(0, 24, 9, 2023))
from deta import Deta
deta = Deta("c0uJEdiLS8zv_tATyVLnLAD5TL2QTqcNkPhWCXRZQbZwY")
# This how to connect to or create a database.
# db = deta.Base("pib_db")
db = deta.Base("pib_db")
data = {}
for b in range(4, 10):
for a in range(1, 32):
aaa = check_site(0, a, b, 2023)
print(aaa)
if aaa != None:
db.put(aaa, "D"+str(a)+"-M" + str(b) +"-Y2023")
print("inserted")