In [ ]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import urllib.request
driver = webdriver.Chrome()
driver.get("https://www.google.co.kr/imghp?hl=ko&ogbl") # 사이트를 가져오는 녀석
elem = driver.find_element_by_name("q")
elem.clear()
elem.send_keys("big data")
elem.send_keys(Keys.RETURN)
SCROLL_PAUSE_TIME = 1.5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
try:
driver.find_element_by_css_selector('.mye4qd').click()
except:
break
last_height = new_height
images = driver.find_elements_by_css_selector('.rg_i.Q4LuWd')
cnt=0
for img in images:
try:
cnt+=1
img.click()
time.sleep(2.9)
img_url = driver.find_element_by_css_selector('.n3VNCb').get_attribute('src')
urllib.request.urlretrieve(img_url, './pics/test{0}.png'.format(cnt))
except:
pass
driver.close()
'자바~하둡' 카테고리의 다른 글
python) 규제선형 모델 (0) | 2022.04.13 |
---|---|
python) XGBoost vs lightGBM 모델 학습과 하이퍼 파라미터 튜닝 (0) | 2022.04.12 |
python) LightGBM(Gradient Boosting Machine) 적용 - 위스콘신 유방암 예측 (0) | 2022.04.12 |
python) Ada Boosting Parameter (Human_activity_analysis) (0) | 2022.04.12 |
python) 로지스틱 회귀,의사결정나무, 랜덤포레스트_피마 인디언 당뇨병 분석 (0) | 2022.04.07 |