Skip to content

Commit b766359

Browse files
author
Miguel Yanez
committed
Refactoring to cleaner code
1 parent 5161482 commit b766359

File tree

16 files changed

+348
-208
lines changed

16 files changed

+348
-208
lines changed
File renamed without changes.
File renamed without changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__author__ = 'mayanez'
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import json
2+
import logging
3+
import urllib
4+
from scrapers.solution_model import Seat, Flight, SeatQuery
5+
from selenium import webdriver
6+
7+
logging.basicConfig(level=logging.INFO)
8+
9+
class FlightStatsDriver(webdriver.PhantomJS):
10+
__logger = logging.getLogger(__name__)
11+
__base_url = "http://www.flightstats.com"
12+
__request_uri = "/go/FlightAvailability/flightAvailability.do"
13+
__http_header = {
14+
'Host' : 'www.flightstats.com',
15+
'Content-Type' : 'text/html'}
16+
__params = {
17+
'departure' : '',
18+
'airline' : '',
19+
'arrival' : '',
20+
'connection' : '',
21+
'queryDate' : '', #yyyy-mm-dd
22+
'queryTime' : '2',
23+
'excludeConnectionCodes' : '',
24+
'cabinCode' : 'A',
25+
'numOfSeats' : '1',
26+
'queryType' : 'D',
27+
'fareClassCodes' : ''}
28+
29+
@property
30+
def origin(self):
31+
return self.__origin
32+
33+
@origin.setter
34+
def origin(self, origin):
35+
self.__origin = origin
36+
37+
@property
38+
def destination(self):
39+
return self.__destination
40+
41+
@destination.setter
42+
def destination(self, destination):
43+
self.__destination = destination
44+
45+
@property
46+
def depart_date(self):
47+
return self.__depart_date
48+
49+
@depart_date.setter
50+
def depart_date(self, depart_date):
51+
self.__depart_date = depart_date
52+
53+
@property
54+
def return_date(self):
55+
return self.__return_date
56+
57+
@return_date.setter
58+
def return_date(self, return_date):
59+
self.__return_date = return_date
60+
61+
def __init__(self, executable_path, service_log_path):
62+
webdriver.PhantomJS(executable_path=executable_path, service_log_path=service_log_path)
63+
64+
def __extract_flights_with_seats(self, json_obj):
65+
66+
flight_list = list()
67+
self.__logger.info('Extracting flights with seats')
68+
for k, results in json_obj.iteritems():
69+
for k2, flights in results['flights'].iteritems():
70+
airline = flights['airline']
71+
fno = flights['flightNumber']
72+
dep_city = flights['depCode']
73+
arr_city = flights['arrCode']
74+
flight = Flight(dep_city=dep_city, arr_city=arr_city, airline=airline, fno=fno, dep_time=self.depart_date)
75+
seats = list()
76+
77+
for k3, cabin in flights['cabins'].iteritems():
78+
cabin_code = cabin['code']
79+
80+
for fare_class, seat_availability in cabin['fares'].iteritems():
81+
if seat_availability == "":
82+
seat_availability = 0
83+
else:
84+
seat_availability = int(seat_availability)
85+
86+
seat = Seat(cabin_code=cabin_code, fare_class=fare_class, availability=seat_availability)
87+
seats.append(seat)
88+
89+
flight.seats = seats
90+
flight_list.append(flight)
91+
92+
return flight_list
93+
94+
def get_seat_availability(self):
95+
params = urllib.urlencode(self.__params)
96+
request_url = self.__base_url + self.__request_uri +("?%s" % params)
97+
self.__logger.info('Requesting URL: %s' % (self.__request_url))
98+
self.get(request_url)
99+
self.__logger.info('Running Javascript to retrieve available routes')
100+
result = self.execute_script('return JSON.stringify(availRoutes)')
101+
j = json.loads(unicode(result))
102+
103+
flight_list = self.__extract_flights_with_seats(j)
104+
self.__logger.info('Saving SeatQuery to Database')
105+
seat_query = SeatQuery(flights=flight_list)
106+
seat_query.save()
107+
self.__logger.info('Quiting the Web Driver')
108+
self.quit
109+
110+
return flight_list

scrapers/engines/flightstats.py renamed to flight_scraper/engines/flight_stats/flightstats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import logging
55

66
from selenium import webdriver
7-
from scrapers.solution_model import *
7+
from flight_scraper.solution_model import *
88

99
logging.basicConfig(level=logging.INFO)
1010
logger = logging.getLogger(__name__)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__author__ = 'mayanez'
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import json
2+
import logging
3+
import datetime
4+
import requests
5+
from flight_scraper.solution_model import Solution, Flight, Itinerary
6+
7+
logging.basicConfig(level=logging.INFO)
8+
9+
class ItaMatrixDriver(object):
10+
__logger = logging.getLogger(__name__)
11+
engine = "ITA Matrix"
12+
__base_url = "http://matrix.itasoftware.com"
13+
__request_uri = "/xhr/shop/search?"
14+
__http_header = {
15+
'Host': 'matrix.itasoftware.com',
16+
'Content-Type': 'application/x-www-form-urlencoded',
17+
'Cache-Control': 'no-cache',
18+
'Content-Length': '0'
19+
}
20+
__base_request = "name=specificDates&summarizers=carrierStopMatrix\
21+
%2CcurrencyNotice%2CsolutionList%2CitineraryPriceSlider%2C\
22+
itineraryCarrierList%2CitineraryDepartureTimeRanges%2CitineraryArrivalTimeRanges\
23+
%2CdurationSliderItinerary%2CitineraryOrigins%2CitineraryDestinations%2C\
24+
itineraryStopCountList%2CwarningsItinerary&format=JSON&inputs="
25+
__json_request = json.loads('{"slices":[{"origins":["PDX"],"originPreferCity":false,"commandLine":"airlines AA DL AS UA","destinations":["SEA"],"destinationPreferCity":false,"date":"2013-06-07","isArrivalDate":false,"dateModifier":{"minus":0,"plus":0}},{"destinations":["PDX"],"destinationPreferCity":false,"origins":["SEA"],"originPreferCity":false,"commandLine":"airlines AA DL AS","date":"2013-06-09","isArrivalDate":false,"dateModifier":{"minus":0,"plus":0}}],"pax":{"adults":1},"cabin":"COACH","maxStopCount":0,"changeOfAirport":false,"checkAvailability":true,"page":{"size":2000},"sorts":"default"}')
26+
27+
28+
@property
29+
def origin(self):
30+
return self.__json_request['slices'][0]['origins'][0]
31+
32+
@origin.setter
33+
def origin(self, origin):
34+
self.__json_request['slices'][0]['origins'][0] = origin
35+
self.__json_request['slices'][1]['destinations'][0] = origin
36+
37+
@property
38+
def destination(self):
39+
return self.__json_request['slices'][0]['destinations'][0]
40+
41+
@destination.setter
42+
def destination(self, destination):
43+
self.__json_request['slices'][0]['destinations'][0] = destination
44+
self.__json_request['slices'][1]['origins'][0] = destination
45+
46+
@property
47+
def depart_date(self):
48+
return datetime.datetime.strptime(self.__json_request['slices'][0]['date'], "%Y-%m-%d")
49+
50+
@depart_date.setter
51+
def depart_date(self, depart_date):
52+
self.__json_request['slices'][0]['date'] = depart_date.strftime('%Y-%m-%d')
53+
54+
@property
55+
def return_date(self):
56+
return datetime.datetime.strptime(self.__json_request['slices'][1]['date'], "%Y-%m-%d")
57+
58+
@return_date.setter
59+
def return_date(self, return_date):
60+
self.__json_request['slices'][1]['date'] = return_date.strftime('%Y-%m-%d')
61+
62+
def __init__(self, origin, destination, depart_date, return_date):
63+
self.origin = origin
64+
self.destination = destination
65+
self.depart_date = depart_date
66+
self.return_date = return_date
67+
68+
def build_solutions(self):
69+
"""
70+
Builds search solution. Adds to MongoDB and returns the Solution object.
71+
"""
72+
data = self.__base_request + json.dumps(self.__json_request)
73+
request_url = self.__base_url + self.__request_uri + data
74+
75+
self.__logger.info('Making request to ITA Matrix: %s', (request_url))
76+
print 'Making request to ITA Matrix: %s' % (request_url)
77+
response = requests.post(request_url, headers=self.__http_header)
78+
response_json = json.loads(response.text[4:])
79+
80+
print response_json
81+
self.__logger.info('Creating objects to insert to database')
82+
solution = Solution(engine=self.engine, origin=self.origin, destination=self.destination, depart_date=self.depart_date, return_date=self.return_date)
83+
solution.min_price = response_json['result']['solutionList']['minPrice']
84+
85+
for sol in response_json['result']['solutionList']['solutions']:
86+
origin_flight_airline = sol['itinerary']['slices'][0]['flights'][0][:2]
87+
origin_flight_number = int(sol['itinerary']['slices'][0]['flights'][0][2:])
88+
dep_time = datetime.datetime.strptime(sol['itinerary']['slices'][0]['departure'][:-6], "%Y-%m-%dT%H:%M")
89+
arr_time = datetime.datetime.strptime(sol['itinerary']['slices'][0]['arrival'][:-6], "%Y-%m-%dT%H:%M")
90+
arr_city = sol['itinerary']['slices'][0]['destination']['code']
91+
dep_city = sol['itinerary']['slices'][0]['origin']['code']
92+
93+
origin_flight = Flight(airline=origin_flight_airline, fno=origin_flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time)
94+
95+
return_flight_airline = sol['itinerary']['slices'][1]['flights'][0][:2]
96+
return_flight_number = int(sol['itinerary']['slices'][1]['flights'][0][2:])
97+
dep_time = datetime.datetime.strptime(sol['itinerary']['slices'][1]['departure'][:-6], "%Y-%m-%dT%H:%M")
98+
arr_time = datetime.datetime.strptime(sol['itinerary']['slices'][1]['arrival'][:-6], "%Y-%m-%dT%H:%M")
99+
arr_city = sol['itinerary']['slices'][1]['destination']['code']
100+
dep_city = sol['itinerary']['slices'][1]['origin']['code']
101+
102+
return_flight = Flight(airline=return_flight_airline, fno=return_flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time)
103+
104+
flight_list = [origin_flight, return_flight]
105+
price = sol['displayTotal']
106+
itinerary = Itinerary(flights=flight_list, price=price)
107+
solution.itineraries.append(itinerary)
108+
109+
solution.save()
110+
111+
return solution
112+
113+

flight_scraper/flight_scraper.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from scrapers.solution_model import Solution, SeatQuery
2+
from engines.ita_matrix.driver import ItaMatrixDriver
3+
4+
5+
class FlightScraper(object):
6+
7+
@property
8+
def origin(self):
9+
return self.__origin
10+
11+
@origin.setter
12+
def origin(self, origin):
13+
self.__origin = origin
14+
15+
@property
16+
def destination(self):
17+
return self.__destination
18+
19+
@destination.setter
20+
def destination(self, destination):
21+
self.__destination = destination
22+
23+
@property
24+
def depart_date(self):
25+
return self.__depart_date
26+
27+
@depart_date.setter
28+
def depart_date(self, depart_date):
29+
self.__depart_date = depart_date
30+
31+
@property
32+
def return_date(self):
33+
return self.__return_date
34+
35+
@return_date.setter
36+
def return_date(self, return_date):
37+
self.__return_date = return_date
38+
39+
def search_flights(self):
40+
ita_driver = ItaMatrixDriver(origin=self.__origin, destination=self.__destination, depart_date=self.__depart_date, return_date=self.__return_date)
41+
return ita_driver.build_solutions()
42+
43+
def solutions(self):
44+
"""
45+
Returns a Solution object from MongoDB
46+
"""
47+
return Solution.objects(depart_date=self.__depart_date, return_date=self.__return_date, origin=self.__origin, destination=self.__destination)
48+
49+
def itineraries(self, flights_to_match):
50+
results = list()
51+
solutions = self.get_solutions()
52+
53+
for sol in solutions:
54+
itineraries = sol.itineraries
55+
for itinerary in itineraries:
56+
flights = set(itinerary.flights)
57+
matched = flights.intersection(flights_to_match)
58+
if len(matched) > 0:
59+
results.append(itinerary)
60+
61+
return results
62+
63+
def __get_seats(self, date):
64+
seat_query = SeatQuery.objects(flights__dep_city=self.__origin, flights__arr_city=self.__destination, flights__dep_time=date)
65+
return seat_query
66+
67+
def departure_seats(self):
68+
return self.__get_seats(self.__depart_date)
69+
70+
def return_seats(self):
71+
return self.__get_seats(self.__return_date)
72+
73+
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)