In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
In [2]:
url = 'https://movie.naver.com/movie/running/current.naver'
resp = requests.get(url)
resp.text
soup = BeautifulSoup(resp.text)
In [7]:
lst_dsc = soup.find_all('dl', class_='lst_dsc')[0]
In [9]:
dt_tit = lst_dsc.find('dt', class_='tit')
In [10]:
dt_tit.find('a').get_text()
Out[10]:
'베르네 부인의 장미정원'
In [12]:
dl_info_star = lst_dsc.find('dl', class_='info_star')
In [13]:
dl_info_star.find_all('span', class_='num')
Out[13]:
[<span class="num">9.80</span>, <span class="num">7.00</span>]
In [14]:
net = dl_info_star.find_all('span', class_='num')[0].get_text()
gija = dl_info_star.find_all('span', class_='num')[1].get_text()
In [15]:
print(net)
print(gija)
9.80
7.00
In [16]:
lst_dsc = soup.find_all('dl', class_='lst_dsc')
df = pd.DataFrame()
for dsc in lst_dsc:
    dt_tit = dsc.find('dt', class_='tit')
    title = dt_tit.find('a').get_text()
    dl_info_star = dsc.find('dl', class_='info_star')
    net = dl_info_star.find_all('span', class_='num')[0].get_text()
    if(len(dl_info_star.find_all('span', class_='num')) == 2):
        gija = dl_info_star.find_all('span', class_='num')[1].get_text()
    movie = dict()
    movie['title'] = title
    movie['net'] = float(net)
    movie['gija'] = float(gija)
    df = df.append(movie, ignore_index=True)
In [17]:
df.head()
Out[17]:
gija net title
0 7.00 9.80 베르네 부인의 장미정원
1 6.80 6.11 브로커
2 5.00 9.35 이공삼칠
3 6.50 9.15 윤시내가 사라졌다
4 7.25 8.08 올리 마키의 가장 행복한 날
In [18]:
df.tail()
Out[18]:
gija net title
88 6.0 9.41 해바라기
89 6.0 8.52 로맨스 빠빠
90 6.0 8.68 OK 목장의 결투
91 6.0 8.81 젊은이의 양지
92 6.0 8.56 가스등
In [19]:
df[['gija', 'net']].describe()
Out[19]:
gija net
count 93.000000 93.000000
mean 6.675484 7.857527
std 0.998419 1.602344
min 4.000000 0.000000
25% 6.000000 7.300000
50% 6.560000 8.250000
75% 7.500000 8.810000
max 8.630000 9.940000
In [20]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93 entries, 0 to 92
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   gija    93 non-null     float64
 1   net     93 non-null     float64
 2   title   93 non-null     object 
dtypes: float64(2), object(1)
memory usage: 2.3+ KB
In [ ]: