@@ -91,25 +91,29 @@ def get_page(self, request_content):
9191 xp_addresses = """//address[@class="propertyCard-address"]//span/text()"""
9292 xp_weblinks = """//div[@class="propertyCard-details"]\
9393 //a[@class="propertyCard-link"]/@href"""
94+
9495 xp_agent_urls = """//div[@class="propertyCard-contactsItem"]\
9596 //div[@class="propertyCard-branchLogo"]\
9697 //a[@class="propertyCard-branchLogo-link"]/@href"""
98+ xp_time_on_market = """//span[@class="propertyCard-contactsAddedOrReduced"]/text()"""
99+
97100
98101 # Create data lists from xpaths:
99102 price_pcm = tree .xpath (xp_prices )
100103 titles = tree .xpath (xp_titles )
101104 addresses = tree .xpath (xp_addresses )
105+ time_in_market = tree .xpath (xp_time_on_market )
102106 base = "http://www.rightmove.co.uk"
103107 weblinks = ["{}{}" .format (base , tree .xpath (xp_weblinks )[w ]) \
104108 for w in range (len (tree .xpath (xp_weblinks )))]
105109 agent_urls = ["{}{}" .format (base , tree .xpath (xp_agent_urls )[a ]) \
106110 for a in range (len (tree .xpath (xp_agent_urls )))]
107111
108112 # Store the data in a Pandas DataFrame:
109- data = [price_pcm , titles , addresses , weblinks , agent_urls ]
113+ data = [price_pcm , titles , addresses , weblinks , agent_urls , time_in_market ]
110114 temp_df = pd .DataFrame (data )
111115 temp_df = temp_df .transpose ()
112- temp_df .columns = ["price" , "type" , "address" , "url" , "agent_url" ]
116+ temp_df .columns = ["price" , "type" , "address" , "url" , "agent_url" , "time_in_market" ]
113117
114118 # Drop empty rows which come from placeholders in the html:
115119 temp_df = temp_df [temp_df ["address" ].notnull ()]
0 commit comments