Skip to content

Commit 77e3c0f

Browse files
author
toby petty
authored
Merge pull request #15 from csfyrakis/master
Added extra column for when was the property updated or added in the market
2 parents d2dc7f1 + 184b9b4 commit 77e3c0f

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

rightmove_webscraper.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,25 +91,29 @@ def get_page(self, request_content):
9191
xp_addresses = """//address[@class="propertyCard-address"]//span/text()"""
9292
xp_weblinks = """//div[@class="propertyCard-details"]\
9393
//a[@class="propertyCard-link"]/@href"""
94+
9495
xp_agent_urls = """//div[@class="propertyCard-contactsItem"]\
9596
//div[@class="propertyCard-branchLogo"]\
9697
//a[@class="propertyCard-branchLogo-link"]/@href"""
98+
xp_time_on_market = """//span[@class="propertyCard-contactsAddedOrReduced"]/text()"""
99+
97100

98101
# Create data lists from xpaths:
99102
price_pcm = tree.xpath(xp_prices)
100103
titles = tree.xpath(xp_titles)
101104
addresses = tree.xpath(xp_addresses)
105+
time_in_market = tree.xpath(xp_time_on_market)
102106
base = "http://www.rightmove.co.uk"
103107
weblinks = ["{}{}".format(base, tree.xpath(xp_weblinks)[w]) \
104108
for w in range(len(tree.xpath(xp_weblinks)))]
105109
agent_urls = ["{}{}".format(base, tree.xpath(xp_agent_urls)[a]) \
106110
for a in range(len(tree.xpath(xp_agent_urls)))]
107111

108112
# Store the data in a Pandas DataFrame:
109-
data = [price_pcm, titles, addresses, weblinks, agent_urls]
113+
data = [price_pcm, titles, addresses, weblinks, agent_urls,time_in_market]
110114
temp_df = pd.DataFrame(data)
111115
temp_df = temp_df.transpose()
112-
temp_df.columns = ["price", "type", "address", "url", "agent_url"]
116+
temp_df.columns = ["price", "type", "address", "url", "agent_url","time_in_market"]
113117

114118
# Drop empty rows which come from placeholders in the html:
115119
temp_df = temp_df[temp_df["address"].notnull()]

0 commit comments

Comments
 (0)