https://github.com/gyunseul9/coronagathering

 

import os

import re

import requests

import urllib.request

import pandas as pd

from datetime import datetime

from bs4 import BeautifulSoup

from google.colab import drive

 

 

c=drive.mount('/content/drive')

CSV_URI = 'drive/My Drive/development/test/'

 

def overlap_param(csv_udate, udate):

if csv_udate == udate:

print('Overlap Record')

return 1

else:

print('Add Record')

return 0

 

def make_date(string):

datetime.today()

_year = datetime.today().strftime("%Y")

 

numbers = re.findall('\d+', string)

_month = numbers[0]

_day = numbers[1]

_hour = numbers[2]

 

return _year+_month+_day+_hour

 

def remove_keyword(string):

string = string.replace('(', '')

string = string.replace(')', '')

 

return string

 

def write_csv(df):

savename = CSV_URI+'coronastatus.csv'

 

tmp = []

tmp = savename.split('/')

tmp2 = tmp[len(tmp)-1]

 

if os.path.exists(savename):

print('Exist CSV', tmp2)

df_read = pd.read_csv(savename, header=None)

 

last_row = df_read.tail(1)

csv_udate = last_row.iloc[:,0]

result = overlap_param(int(csv_udate.values[0]),int(df['udate'].values[0]))

else:

print('Does not exist CSV', tmp2)

result = 0

 

if result == 0:

df.to_csv(savename, header=False, index=False, mode='a', encoding='utf-8-sig')

 

def scrappy(soup):

 

dic_corona= {}

 

udate, area, num, before = [], [], [], []

 

for i in range(0, 16):

contents = soup.select('div.wrap.nj div.mainlive_container div.container div div.liveboard_layout div.live_right.main_box_toggle')

# print(contents)

 

try:

tmp = contents[0].select('h2 a span.livedate')[0].text.strip()

udate.append(make_date(tmp))

# print(make_date(tmp))

except ValueError:

udate.append('udate')

 

try:

tmp = contents[0].select('div.regional_patient_status_A div.rpsa_map div.rpsam_graph div#main_maplayout button')[i]

tmp = tmp.select('span.name')[0].text.strip()

area.append(tmp)

# print(tmp)

except ValueError:

area.append('area')

 

try:

tmp = contents[0].select('div.regional_patient_status_A div.rpsa_map div.rpsam_graph div#main_maplayout button')[i]

tmp = tmp.select('span.num')[0].text.strip()

num.append(tmp)

# print(tmp)

except ValueError:

num.append('num')

 

try:

tmp = contents[0].select('div.regional_patient_status_A div.rpsa_map div.rpsam_graph div#main_maplayout button')[i]

tmp = tmp.select('span.before')[0].text.strip()

before.append(remove_keyword(tmp))

# print(remove_keyword(tmp))

except ValueError:

before.append('before')

 

dic_corona['udate'] = udate

dic_corona['area'] = area

dic_corona['num'] = num

dic_corona['before'] = before

 

df_corona = pd.DataFrame(dic_corona)

 

write_csv(df_corona)

 

url = 'http://ncov.mohw.go.kr/'

 

resp = requests.get(url)

soup = BeautifulSoup(resp.text, 'lxml')

 

scrappy(soup)

Posted by 앤비
,