https://github.com/gyunseul9/coronagathering
import os
import re
import requests
import urllib.request
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
from google.colab import drive
c=drive.mount('/content/drive')
CSV_URI = 'drive/My Drive/development/test/'
def overlap_param(csv_udate, udate):
if csv_udate == udate:
print('Overlap Record')
return 1
else:
print('Add Record')
return 0
def make_date(string):
datetime.today()
_year = datetime.today().strftime("%Y")
numbers = re.findall('\d+', string)
_month = numbers[0]
_day = numbers[1]
_hour = numbers[2]
return _year+_month+_day+_hour
def remove_keyword(string):
string = string.replace('(', '')
string = string.replace(')', '')
return string
def write_csv(df):
savename = CSV_URI+'coronastatus.csv'
tmp = []
tmp = savename.split('/')
tmp2 = tmp[len(tmp)-1]
if os.path.exists(savename):
print('Exist CSV', tmp2)
df_read = pd.read_csv(savename, header=None)
last_row = df_read.tail(1)
csv_udate = last_row.iloc[:,0]
result = overlap_param(int(csv_udate.values[0]),int(df['udate'].values[0]))
else:
print('Does not exist CSV', tmp2)
result = 0
if result == 0:
df.to_csv(savename, header=False, index=False, mode='a', encoding='utf-8-sig')
def scrappy(soup):
dic_corona= {}
udate, area, num, before = [], [], [], []
for i in range(0, 16):
contents = soup.select('div.wrap.nj div.mainlive_container div.container div div.liveboard_layout div.live_right.main_box_toggle')
# print(contents)
try:
tmp = contents[0].select('h2 a span.livedate')[0].text.strip()
udate.append(make_date(tmp))
# print(make_date(tmp))
except ValueError:
udate.append('udate')
try:
tmp = contents[0].select('div.regional_patient_status_A div.rpsa_map div.rpsam_graph div#main_maplayout button')[i]
tmp = tmp.select('span.name')[0].text.strip()
area.append(tmp)
# print(tmp)
except ValueError:
area.append('area')
try:
tmp = contents[0].select('div.regional_patient_status_A div.rpsa_map div.rpsam_graph div#main_maplayout button')[i]
tmp = tmp.select('span.num')[0].text.strip()
num.append(tmp)
# print(tmp)
except ValueError:
num.append('num')
try:
tmp = contents[0].select('div.regional_patient_status_A div.rpsa_map div.rpsam_graph div#main_maplayout button')[i]
tmp = tmp.select('span.before')[0].text.strip()
before.append(remove_keyword(tmp))
# print(remove_keyword(tmp))
except ValueError:
before.append('before')
dic_corona['udate'] = udate
dic_corona['area'] = area
dic_corona['num'] = num
dic_corona['before'] = before
df_corona = pd.DataFrame(dic_corona)
write_csv(df_corona)
url = 'http://ncov.mohw.go.kr/'
resp = requests.get(url)
soup = BeautifulSoup(resp.text, 'lxml')
scrappy(soup)
'Python' 카테고리의 다른 글
(구글코랩) COVID-19 실시간 상황 수치 데이터를 MySQL 연동 소스를 공유합니다. (0) | 2020.08.12 |
---|---|
(구글코랩) 뉴스기사 이미지 다운로드, CSV 텍스트마이닝, 페이스북 피드, 팀즈 웹훅 소스를 공유합니다. (0) | 2020.08.11 |
윈도우10 파이썬 작업 스케줄러 설정하기 (0) | 2020.06.10 |
파이썬 부산창조경제혁신센터,알림마당,사업공고 크롤링 소스를 공개합니다. (0) | 2018.06.09 |
구름IDE에 마인크래프트 서버, 파이썬 API 설치 (0) | 2018.05.28 |