Skip to content

Commit

Permalink
what
Browse files Browse the repository at this point in the history
  • Loading branch information
harbinzhang committed Jan 17, 2017
1 parent 662d0a1 commit 592ce68
Show file tree
Hide file tree
Showing 9 changed files with 170 additions and 45 deletions.
148 changes: 105 additions & 43 deletions .idea/workspace.xml

Large diffs are not rendered by default.

File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
40 changes: 38 additions & 2 deletions scawler/ceshi.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,40 @@
#!/bin/sh
import cookielib
import os
import re
import string
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pymongo
from time import sleep
import random


import random
print 1+ random.randint(1,1000)/1000.0
proxy = '70.248.28.23:800'
service_args = [
'--proxy='+proxy,
'--proxy-type=https',
# '--proxy-auth=username:password',
]

phantomjsdriver = "../lib/phantomjs"
os.environ["webdriver.phantomjs.driver"] = phantomjsdriver
driver = webdriver.PhantomJS(phantomjsdriver,service_args=service_args)
driver2 = webdriver.PhantomJS(phantomjsdriver)



driver.set_page_load_timeout(7)
driver.get('https://www.linkedin.com')
html = driver.page_source
print html
# soup = BeautifulSoup(html,'html.parser')
# print soup


# driver2.set_page_load_timeout(7)
# driver2.get('https://www.linkedin.com/')
# html2 = driver2.page_source
# soup2 = BeautifulSoup(html2,'html.parser')
# print soup2
27 changes: 27 additions & 0 deletions scawler/phantomJSsample1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from selenium import webdriver
from selenium.webdriver.common.proxy import *
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.keys import Keys
import sys

singleproxy = "88.157.149.250:8080"
proxytype = "http"

user_agent = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36")

phan_args = ['--proxy=88.157.149.250:8080', 'proxy-type=http']
print "step 1"
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = user_agent
print "step 2"
driver = webdriver.PhantomJS(service_args=phan_args, desired_capabilities=dcap)
driver.get("https://www.whatismyip.com/")
print "step 3"
print driver.current_url

htmlpage = driver.page_source
print htmlpage.encode(sys.stdout.encoding, errors='replace')

0 comments on commit 592ce68

Please sign in to comment.