CS301_Proj5

CS301_Proj5

Overview

Hurricanes often count among the worst natural disasters, both in terms of monetary costs and, more importantly, human life. Data Science can help us better understand these storms. For example, take a quick look at this FiveThirtyEight analysis by Maggie Koerth-Baker: Why We’re Stuck With An Inadequate Hurricane Rating System (you should all read FiveThirtyEight, btw!).

For this project, you’ll be analyzing data in the hurricanes.csv file. We generated this data file by writing a Python program to extract stats from this page: https://en.wikipedia.org/wiki/List_of_United_States_hurricanes. By the end of this semester, we’ll teach you to extract data from websites like Wikipedia for yourself.

This project will focus on loops and strings. To start, download project.py, test.py and hurricanes.csv. You’ll do your work in Jupyter Notebooks this week, producing a main.ipynb file. You’ll test as usual by running python test.py to test a main.ipynb file (or python test.py other.ipynb to test a notebook with a different name). You may not use any extra modules that you need to install with pip (only the standard modules that come with Python, such as math).

# project: p5
# submitter: naixinzhang
# partner: none
import project
#q1 How many records are in the dataset?
project.count()
132
#q2 What is the name of the hurricane at index 10?
project.get_name(10)
'Carol'
#q3 How many deaths were caused by the hurricane at the last index?
project.get_deaths(project.count()-1)
1
#q4 Is there a hurricane named Bob?
def if_bob():
    for i in range(project.count()):
        if project.get_name(i) == 'Bob':
            return True
            break
        if i == project.count()-1:
            return False
if_bob()
True
#q5 How many hurricanes named Florence are in the dataset?
def count_hurricanes():
    count = 0
    for i in range(project.count()):
        if (project.get_name(i)).upper() == 'FLORENCE':
            count = count+1
    return count
count_hurricanes()
3
#q6 What is the fastest MPH achieved by a hurricane in the dataset?
def fastestMPH():
    fast = 0
    for i in range(project.count()):
        if project.get_mph(i) > fast:
            fast = project.get_mph(i)
    return fast
fastestMPH()
190
#q7 What is the name of that fastest hurricane?
def name_of_fatest():
    for i in range(project.count()):
        if project.get_mph(i) == fastestMPH():
            return project.get_name(i)

name_of_fatest()
'Allen'
#q8 How much damage (in dollars) was done by the hurricane Dolphin?
def count_dolphin():
    for i in range(project.count()):
        if project.get_name(i) == 'Dolphin':
            if project.get_damage(i)[-1:] == 'K':
                return (int(float(project.get_damage(i)[:-1])*1000))
            if project.get_damage(i)[-1:] == 'M':
                return (int(float(project.get_damage(i)[:-1])*1000000))
            if project.get_damage(i)[-1:] == 'B':
                return (int(float(project.get_damage(i)[:-1])*1000000000)) 
count_dolphin()            
13500000
#q9 How many total deaths are represented in the dataset?
def total_d():
    total_death = 0
    for i in range(project.count()):
        total_death = total_death + project.get_deaths(i)
    return total_death

total_d()
18959
#q10 What was the deadliest hurricane between 2010 and 2019 (inclusive)?

def get_year(date):
    '''Returns the year when the date is the in the 'mm/dd/yyyy' format'''
    return int(date[6:10])

def deadliest_in_range(year1, year2):
    worst_idx = None
    for i in range(project.count()):
        if get_year(project.get_formed(i))>= year1 and get_year(project.get_formed(i)) <= year2:  # TODO: check if year is in range
            if worst_idx == None or project.get_deaths(worst_idx) < project.get_deaths(i):
                worst_idx = i
    return worst_idx

project.get_name(deadliest_in_range(2010,2019))
'Maria'
#q11 What was the deadliest hurricane of the 20th century (1901 to 2000, inclusive)?
project.get_name(deadliest_in_range(1901,2000))
'Inez'
#q12 In what year did the most deadly hurricane in the dataset form?
def get_most():
    dead = 0
    for i in range(project.count()):
        if project.get_deaths(i) > dead:
            dead = project.get_deaths(i) 
    return dead 
def cal_year():
    for i in range(project.count()):
        if project.get_deaths(i) == get_most():
            return get_year(project.get_formed(i))
cal_year()
1899
#q13 How much damage (in dollars) was done by the deadliest hurricane of the 20th century?
def num_damage():
    if project.get_damage(deadliest_in_range(1901,2000))[-1:] =='K':
        return int(float(project.get_damage(deadliest_in_range(1901,2000))[:-1])*1000)
    if project.get_damage(deadliest_in_range(1901,2000))[-1:] =='M':
        return int(float(project.get_damage(deadliest_in_range(1901,2000))[:-1])*1000000)
    if project.get_damage(deadliest_in_range(1901,2000))[-1:] =='B':
        return int(float(project.get_damage(deadliest_in_range(1901,2000))[:-1])*1000000000)
num_damage()
226500000
#q14 What were the total damages across all hurricanes in the dataset, in dollars?
def total_dama():
    total_dem = 0
    for i in range(project.count()):
        if project.get_damage(i)[-1:] =='K':
            total_dem = total_dem + int(float(project.get_damage(i)[:-1])*1000)
        if project.get_damage(i)[-1:] =='M':
            total_dem = total_dem + int(float(project.get_damage(i)[:-1])*1000000)
        if project.get_damage(i)[-1:] =='B':
            total_dem = total_dem + int(float(project.get_damage(i)[:-1])*1000000000)
    return total_dem
total_dama()
864230464997
def get_month(date):
    '''Returns the month when the date is the in the 'mm/dd/yyyy' format'''
    return int(date[:2])
def hurricanes_in_month(mm):
    num_of_hurricanes = 0  
    for i in range(project.count()):
        if get_month(project.get_formed(i)) == mm:
            num_of_hurricanes = num_of_hurricanes + 1
    return num_of_hurricanes
#q15: How many hurricanes were formed in the month of July?
hurricanes_in_month(7)
17
#q16: How many hurricanes were formed in the month of December?
hurricanes_in_month(12)
3
#q17: How many hurricanes were formed in the month of January?
hurricanes_in_month(1)
3
#q18: How many hurricanes were formed in the month of May?
hurricanes_in_month(5)
2
#q19 Which month experienced the formation of the most number of hurricanes?
max_hurricans = max(hurricanes_in_month(i) for i in range(1,13))
def getMonth():
    for i in range(1,13):
        if hurricanes_in_month(i) == max_hurricans:
            return i
getMonth()
9
#q20 How many years experienced the formation of at least four hurricanes?

def hurricanes_in_year(yr):
    num_of_hurricanes = 0  
    for i in range(project.count()):
        if get_year(project.get_formed(i)) == yr :
            num_of_hurricanes = num_of_hurricanes + 1
    return num_of_hurricanes

def min_max_year():
    res_min_year = 1e4
    res_max_year = 0
    for i in range(project.count()):
        if get_year(project.get_formed(i)) < res_min_year :
            res_min_year = get_year(project.get_formed(i))
        if get_year(project.get_formed(i)) > res_max_year :
            res_max_year = get_year(project.get_formed(i)) 
    return res_min_year, res_max_year

def get_years():
    num = 0
    min_year, max_year = min_max_year()
    for i in range(min_year, max_year+1):
        if hurricanes_in_year(i) >= 4:
            num = num + 1
    return num
get_years()
10

  TOC