CS301_Proj5
Overview
Hurricanes often count among the worst natural disasters, both in terms of monetary costs and, more importantly, human life. Data Science can help us better understand these storms. For example, take a quick look at this FiveThirtyEight analysis by Maggie Koerth-Baker: Why We’re Stuck With An Inadequate Hurricane Rating System (you should all read FiveThirtyEight, btw!).
For this project, you’ll be analyzing data in the hurricanes.csv
file. We generated this data file by writing a Python program to
extract stats from this page:
https://en.wikipedia.org/wiki/List_of_United_States_hurricanes. By
the end of this semester, we’ll teach you to extract data from
websites like Wikipedia for yourself.
This project will focus on loops and strings. To start,
download project.py
, test.py
and hurricanes.csv
. You’ll do your
work in Jupyter Notebooks this week, producing a main.ipynb
file.
You’ll test as usual by running python test.py
to test a
main.ipynb
file (or python test.py other.ipynb
to test a notebook
with a different name). You may not use any extra modules that you
need to install with pip (only the standard modules that come with
Python, such as math
).
# project: p5
# submitter: naixinzhang
# partner: none
import project
#q1 How many records are in the dataset?
project.count()
132
#q2 What is the name of the hurricane at index 10?
project.get_name(10)
'Carol'
#q3 How many deaths were caused by the hurricane at the last index?
project.get_deaths(project.count()-1)
1
#q4 Is there a hurricane named Bob?
def if_bob():
for i in range(project.count()):
if project.get_name(i) == 'Bob':
return True
break
if i == project.count()-1:
return False
if_bob()
True
#q5 How many hurricanes named Florence are in the dataset?
def count_hurricanes():
count = 0
for i in range(project.count()):
if (project.get_name(i)).upper() == 'FLORENCE':
count = count+1
return count
count_hurricanes()
3
#q6 What is the fastest MPH achieved by a hurricane in the dataset?
def fastestMPH():
fast = 0
for i in range(project.count()):
if project.get_mph(i) > fast:
fast = project.get_mph(i)
return fast
fastestMPH()
190
#q7 What is the name of that fastest hurricane?
def name_of_fatest():
for i in range(project.count()):
if project.get_mph(i) == fastestMPH():
return project.get_name(i)
name_of_fatest()
'Allen'
#q8 How much damage (in dollars) was done by the hurricane Dolphin?
def count_dolphin():
for i in range(project.count()):
if project.get_name(i) == 'Dolphin':
if project.get_damage(i)[-1:] == 'K':
return (int(float(project.get_damage(i)[:-1])*1000))
if project.get_damage(i)[-1:] == 'M':
return (int(float(project.get_damage(i)[:-1])*1000000))
if project.get_damage(i)[-1:] == 'B':
return (int(float(project.get_damage(i)[:-1])*1000000000))
count_dolphin()
13500000
#q9 How many total deaths are represented in the dataset?
def total_d():
total_death = 0
for i in range(project.count()):
total_death = total_death + project.get_deaths(i)
return total_death
total_d()
18959
#q10 What was the deadliest hurricane between 2010 and 2019 (inclusive)?
def get_year(date):
'''Returns the year when the date is the in the 'mm/dd/yyyy' format'''
return int(date[6:10])
def deadliest_in_range(year1, year2):
worst_idx = None
for i in range(project.count()):
if get_year(project.get_formed(i))>= year1 and get_year(project.get_formed(i)) <= year2: # TODO: check if year is in range
if worst_idx == None or project.get_deaths(worst_idx) < project.get_deaths(i):
worst_idx = i
return worst_idx
project.get_name(deadliest_in_range(2010,2019))
'Maria'
#q11 What was the deadliest hurricane of the 20th century (1901 to 2000, inclusive)?
project.get_name(deadliest_in_range(1901,2000))
'Inez'
#q12 In what year did the most deadly hurricane in the dataset form?
def get_most():
dead = 0
for i in range(project.count()):
if project.get_deaths(i) > dead:
dead = project.get_deaths(i)
return dead
def cal_year():
for i in range(project.count()):
if project.get_deaths(i) == get_most():
return get_year(project.get_formed(i))
cal_year()
1899
#q13 How much damage (in dollars) was done by the deadliest hurricane of the 20th century?
def num_damage():
if project.get_damage(deadliest_in_range(1901,2000))[-1:] =='K':
return int(float(project.get_damage(deadliest_in_range(1901,2000))[:-1])*1000)
if project.get_damage(deadliest_in_range(1901,2000))[-1:] =='M':
return int(float(project.get_damage(deadliest_in_range(1901,2000))[:-1])*1000000)
if project.get_damage(deadliest_in_range(1901,2000))[-1:] =='B':
return int(float(project.get_damage(deadliest_in_range(1901,2000))[:-1])*1000000000)
num_damage()
226500000
#q14 What were the total damages across all hurricanes in the dataset, in dollars?
def total_dama():
total_dem = 0
for i in range(project.count()):
if project.get_damage(i)[-1:] =='K':
total_dem = total_dem + int(float(project.get_damage(i)[:-1])*1000)
if project.get_damage(i)[-1:] =='M':
total_dem = total_dem + int(float(project.get_damage(i)[:-1])*1000000)
if project.get_damage(i)[-1:] =='B':
total_dem = total_dem + int(float(project.get_damage(i)[:-1])*1000000000)
return total_dem
total_dama()
864230464997
def get_month(date):
'''Returns the month when the date is the in the 'mm/dd/yyyy' format'''
return int(date[:2])
def hurricanes_in_month(mm):
num_of_hurricanes = 0
for i in range(project.count()):
if get_month(project.get_formed(i)) == mm:
num_of_hurricanes = num_of_hurricanes + 1
return num_of_hurricanes
#q15: How many hurricanes were formed in the month of July?
hurricanes_in_month(7)
17
#q16: How many hurricanes were formed in the month of December?
hurricanes_in_month(12)
3
#q17: How many hurricanes were formed in the month of January?
hurricanes_in_month(1)
3
#q18: How many hurricanes were formed in the month of May?
hurricanes_in_month(5)
2
#q19 Which month experienced the formation of the most number of hurricanes?
max_hurricans = max(hurricanes_in_month(i) for i in range(1,13))
def getMonth():
for i in range(1,13):
if hurricanes_in_month(i) == max_hurricans:
return i
getMonth()
9
#q20 How many years experienced the formation of at least four hurricanes?
def hurricanes_in_year(yr):
num_of_hurricanes = 0
for i in range(project.count()):
if get_year(project.get_formed(i)) == yr :
num_of_hurricanes = num_of_hurricanes + 1
return num_of_hurricanes
def min_max_year():
res_min_year = 1e4
res_max_year = 0
for i in range(project.count()):
if get_year(project.get_formed(i)) < res_min_year :
res_min_year = get_year(project.get_formed(i))
if get_year(project.get_formed(i)) > res_max_year :
res_max_year = get_year(project.get_formed(i))
return res_min_year, res_max_year
def get_years():
num = 0
min_year, max_year = min_max_year()
for i in range(min_year, max_year+1):
if hurricanes_in_year(i) >= 4:
num = num + 1
return num
get_years()
10