CS301_Proj7

CS301_Proj7

Intro

Let’s Play Fifa19, Python style! In this project, you will get more practice with lists and start using dictionaries. Start by downloading test.py and Fifa19.csv (which was adapted from this dataset). This dataset is too large to preview on GitHub (>17K rows), but you can view the raw version or using a program such as Excel. You can also preview an example with 100 rows here. For this project, you’ll create a new main.ipynb and answer questions in the usual format.

The Data

Try to familarize yourself with the data before starting the analysis. We have players belonging to a wide range of nationalities and clubs in Fifa19. As you can see the data includes their weekly wages, in Euros (yes, wages are per week!), net worth of the player (in Euros) and the performace rating (score out of 100). For instance, the player named “Neymar” is associated with Brazil, is signed up by club “Paris Saint-Germain”, and is paid a weekly wage of ‘€290K’ (290000 Euros).

#project: p7
#submitter: naixinzhang
#partner: none
import csv

fifa_file = open('Fifa19.csv', encoding='utf-8')
file_reader = csv.reader(fifa_file)
player_data = list(file_reader)
fifa_file.close()
header = player_data[0]
player_data = player_data[1:]
for row in player_data:
    for idx in [2,4]:
        row[idx] = float(row[idx])
# q1: what is the name of the oldest player?
def find_oldest():
    Name_index = header.index('Name')
    Age_index = header.index('Age')
    curr_max_age = None
    curr_oldest = None
    for row in player_data:
        if curr_max_age == None or row[Age_index] > curr_max_age:
            curr_max_age = row[Age_index]
            curr_oldest = row[Name_index]
    return curr_oldest
find_oldest()
'O. Pérez'
def str_to_num(str):
    str = str.split('€')[1]
    num = None
    if str[-1] == 'K':
        num = float(str.split('K')[0])*1000
    elif str[-1] == 'M':
        num = float(str.split('M')[0])*1000000
    else:
        num = float(str)
    return num
# q2: what is the name of the highest-paid player?
def find_highest_paid():
    Name_index = header.index('Name')
    Wage_index = header.index('Wage')
    curr_max_wage = None
    curr_richest = None
    for row in player_data:
        curr_wage = str_to_num(row[Wage_index])
        if curr_max_wage == None or curr_wage > curr_max_wage:
            curr_max_wage = curr_wage
            curr_richest = row[Name_index]
    return curr_richest
find_highest_paid()
'L. Messi'
# q3: what is the name of the highest value player?
def find_highest_value():
    Name_index = header.index('Name')
    Value_index = header.index('Value')
    curr_max_value = None
    curr_highest_value = None
    for row in player_data:
        curr_value = str_to_num(row[Value_index])
        if curr_max_value == None or curr_value > curr_max_value:
            curr_max_value = curr_value
            curr_highest_value = row[Name_index]
    return curr_highest_value
find_highest_value()
'Neymar Jr'
# q4: what club is that player(in above q3) in?
def find_club(player):
    Name_index = header.index('Name')
    Club_index = header.index('Club')
    for row in player_data:
        if row[Name_index] == player:
            return row[Club_index]
    return None
find_club(find_highest_value())
'Paris Saint-Germain'
def get_column(col_name):
    index = header.index(col_name)
    result = []
    for row in player_data:
        result.append(row[index])
    return result
# q5: what are the first five nationalities listed in the dataset?
def get_first_five_nationalities():
    return get_column('Nationality')[:5]
get_first_five_nationalities()
['Argentina', 'Portugal', 'Brazil', 'Spain', 'Belgium']
# q6: which five names are alphabetically first in the dataset?
def find_five_names():
    name_list = get_column('Name')
    name_list.sort()
    return name_list[:5]
find_five_names()
['A. Abang', 'A. Abdellaoui', 'A. Abdennour', 'A. Abdi', 'A. Abdu Jaber']
# q7: what is the average value?
def average_value():
    value_list = get_column('Value')
    length = len(value_list)
    for i in range(length):
        value_list[i] = str_to_num(value_list[i])
    return sum(value_list)/length
average_value()
2410695.8861976163
# q8: what is the average age?
def average_age():
    age_list = get_column('Age')
    length = len(age_list)
    for i in range(length):
        age_list[i] = float(age_list[i])
    return sum(age_list)/length
average_age()
25.122205745043114
def player_count(country):
    country_index = header.index('Nationality')
    count = 0
    for row in player_data:
        if row[country_index] == country:
            count += 1
    return count
# q9: how many players have Portugal as their nationality?
player_count('Portugal')
322
# q10: how many players have Brazil as their nationality?
player_count('Brazil')
827
# q11: which country has the most players participating in FIFA19?
def country_number_players():
    country_num = {}
    country_index = header.index('Nationality')
    for row in player_data:
        country = row[country_index]
        if not country in country_num:
            country_num[country] = 1
        else:
            country_num[country] += 1
    return country_num
country_num = country_number_players()
max(country_num, key=country_num.get)
'England'
def player_to_dict(player_id):
    id_index = header.index('Id')
    result = {}
    length = len(header)
    for row in player_data:
        if int(row[id_index]) == player_id:
            for i in range(length):
                result[header[i]] = row[i]
    return result
# q12: what are the stats for the player with Id equal to 20801?
player_to_dict(20801)
{'Id': '20801',
 'Name': 'Cristiano Ronaldo',
 'Age': 33.0,
 'Nationality': 'Portugal',
 'Overall': 94.0,
 'Club': 'Juventus',
 'Value': '€77M',
 'Wage': '€405K',
 'Preferred Foot': 'Right',
 'Jersey Number': '7',
 'Height': "6'2",
 'Weight': '183lbs'}
# q13: what are the stats for the player with Id equal to 190871?
player_to_dict(190871)
{'Id': '190871',
 'Name': 'Neymar Jr',
 'Age': 26.0,
 'Nationality': 'Brazil',
 'Overall': 92.0,
 'Club': 'Paris Saint-Germain',
 'Value': '€118.5M',
 'Wage': '€290K',
 'Preferred Foot': 'Right',
 'Jersey Number': '10',
 'Height': "5'9",
 'Weight': '150lbs'}
# q14: what are the stats for the player with Id equal to 158023?
player_to_dict(158023)
{'Id': '158023',
 'Name': 'L. Messi',
 'Age': 31.0,
 'Nationality': 'Argentina',
 'Overall': 94.0,
 'Club': 'FC Barcelona',
 'Value': '€110.5M',
 'Wage': '€565K',
 'Preferred Foot': 'Left',
 'Jersey Number': '10',
 'Height': "5'7",
 'Weight': '159lbs'}
# q15: what are the stats for the player with Id equal to 192985?
player_to_dict(192985)
{'Id': '192985',
 'Name': 'K. De Bruyne',
 'Age': 27.0,
 'Nationality': 'Belgium',
 'Overall': 91.0,
 'Club': 'Manchester City',
 'Value': '€102M',
 'Wage': '€355K',
 'Preferred Foot': 'Right',
 'Jersey Number': '7',
 'Height': "5'11",
 'Weight': '154lbs'}
# q16: how many players are there per nationality?
country_number_players()
{'Argentina': 937,
 'Portugal': 322,
 'Brazil': 827,
 'Spain': 1072,
 'Belgium': 260,
 'Croatia': 126,
 'Uruguay': 149,
 'Slovenia': 55,
 'Poland': 350,
 'Germany': 1198,
 'France': 914,
 'England': 1662,
 'Italy': 702,
 'Egypt': 31,
 'Colombia': 618,
 'Denmark': 336,
 'Gabon': 15,
 'Wales': 129,
 'Senegal': 130,
 'Costa Rica': 30,
 'Slovakia': 54,
 'Netherlands': 453,
 'Bosnia Herzegovina': 61,
 'Morocco': 85,
 'Serbia': 126,
 'Algeria': 60,
 'Austria': 298,
 'Greece': 102,
 'Chile': 391,
 'Sweden': 397,
 'Korea Republic': 335,
 'Finland': 67,
 'Guinea': 31,
 'Montenegro': 23,
 'Armenia': 10,
 'Switzerland': 220,
 'Norway': 341,
 'Czech Republic': 100,
 'Scotland': 286,
 'Ghana': 114,
 'Central African Rep.': 3,
 'DR Congo': 52,
 'Ivory Coast': 100,
 'Russia': 79,
 'Ukraine': 73,
 'Iceland': 47,
 'Mexico': 366,
 'Jamaica': 32,
 'Albania': 40,
 'Venezuela': 67,
 'Japan': 478,
 'Turkey': 303,
 'Ecuador': 43,
 'Paraguay': 85,
 'Mali': 43,
 'Nigeria': 121,
 'Cameroon': 90,
 'Dominican Republic': 2,
 'Israel': 14,
 'Kenya': 10,
 'Hungary': 38,
 'Republic of Ireland': 368,
 'Romania': 54,
 'United States': 353,
 'Cape Verde': 19,
 'Australia': 236,
 'Peru': 37,
 'Togo': 12,
 'Syria': 9,
 'Zimbabwe': 13,
 'Angola': 15,
 'Burkina Faso': 16,
 'Iran': 17,
 'Estonia': 13,
 'Tunisia': 32,
 'Equatorial Guinea': 5,
 'New Zealand': 44,
 'FYR Macedonia': 20,
 'United Arab Emirates': 1,
 'China PR': 392,
 'Guinea Bissau': 15,
 'Bulgaria': 32,
 'Kosovo': 33,
 'South Africa': 71,
 'Madagascar': 12,
 'Georgia': 26,
 'Tanzania': 3,
 'Gambia': 15,
 'Cuba': 4,
 'Belarus': 4,
 'Uzbekistan': 2,
 'Benin': 15,
 'Congo': 25,
 'Mozambique': 4,
 'Honduras': 16,
 'Canada': 64,
 'Northern Ireland': 80,
 'Cyprus': 8,
 'Saudi Arabia': 340,
 'Curacao': 14,
 'Moldova': 5,
 'Bolivia': 30,
 'Trinidad & Tobago': 4,
 'Sierra Leone': 6,
 'Zambia': 9,
 'Chad': 2,
 'Philippines': 2,
 'Haiti': 10,
 'Comoros': 6,
 'Libya': 4,
 'Panama': 15,
 'São Tomé & Príncipe': 1,
 'Eritrea': 2,
 'Oman': 1,
 'Iraq': 7,
 'Burundi': 3,
 'Fiji': 1,
 'New Caledonia': 1,
 'Lithuania': 8,
 'Luxembourg': 8,
 'Korea DPR': 4,
 'Liechtenstein': 3,
 'St Kitts Nevis': 3,
 'Latvia': 6,
 'Suriname': 4,
 'Uganda': 6,
 'El Salvador': 5,
 'Bermuda': 2,
 'Kuwait': 1,
 'Antigua & Barbuda': 4,
 'Thailand': 5,
 'Mauritius': 1,
 'Guatemala': 3,
 'Liberia': 1,
 'Kazakhstan': 4,
 'Niger': 3,
 'Mauritania': 4,
 'Montserrat': 4,
 'Namibia': 3,
 'Azerbaijan': 5,
 'Guam': 1,
 'Faroe Islands': 6,
 'India': 30,
 'Nicaragua': 2,
 'Barbados': 3,
 'Lebanon': 1,
 'Palestine': 1,
 'Guyana': 3,
 'Sudan': 3,
 'St Lucia': 1,
 'Ethiopia': 1,
 'Puerto Rico': 1,
 'Grenada': 1,
 'Jordan': 1,
 'Rwanda': 1,
 'Qatar': 1,
 'Afghanistan': 4,
 'Hong Kong': 2,
 'Andorra': 1,
 'Malta': 1,
 'Belize': 1,
 'South Sudan': 1,
 'Indonesia': 1,
 'Botswana': 1}
# q17: how many players for each Jersey Number?
def jersey_number_players():
    jersey_num = {}
    jersey_index = header.index('Jersey Number')
    for row in player_data:
        jersey = row[jersey_index]
        if not jersey in jersey_num:
            jersey_num[jersey] = 1
        else:
            jersey_num[jersey] += 1
    return jersey_num
jersey_number_players()
{'10': 593,
 '7': 604,
 '1': 566,
 '9': 577,
 '15': 501,
 '8': 612,
 '21': 536,
 '13': 419,
 '22': 531,
 '5': 579,
 '3': 547,
 '14': 542,
 '12': 390,
 '11': 590,
 '2': 519,
 '23': 546,
 '26': 390,
 '6': 586,
 '17': 554,
 '18': 545,
 '4': 573,
 '19': 545,
 '31': 280,
 '25': 409,
 '37': 135,
 '30': 371,
 '44': 74,
 '29': 358,
 '24': 425,
 '20': 568,
 '16': 517,
 '33': 287,
 '28': 357,
 '27': 423,
 '77': 77,
 '47': 28,
 '38': 102,
 '40': 114,
 '92': 10,
 '36': 145,
 '87': 14,
 '34': 207,
 '32': 250,
 '83': 6,
 '70': 29,
 '35': 183,
 '89': 15,
 '56': 17,
 '99': 70,
 '57': 8,
 '91': 21,
 '86': 4,
 '45': 47,
 '63': 4,
 '39': 107,
 '43': 41,
 '42': 54,
 '93': 14,
 '72': 7,
 '71': 10,
 '88': 44,
 '55': 35,
 '80': 16,
 '50': 42,
 '66': 27,
 '60': 10,
 '73': 6,
 '67': 5,
 '74': 2,
 '69': 6,
 '76': 4,
 '41': 53,
 '90': 26,
 '46': 29,
 '75': 6,
 '79': 2,
 '62': 6,
 '81': 5,
 '61': 7,
 '49': 19,
 '95': 12,
 '53': 11,
 '96': 13,
 '97': 16,
 '68': 4,
 '98': 21,
 '94': 11,
 '58': 5,
 '78': 5,
 '': 60,
 '48': 17,
 '52': 10,
 '54': 11,
 '84': 4,
 '82': 5,
 '65': 4,
 '64': 2,
 '51': 7,
 '59': 5,
 '85': 1}
# q18: what is the average player score (represented by the overall column) per Jersey Number?
def jersey_total_score():
    jersey_tot_score = {}
    jersey_index = header.index('Jersey Number')
    score_index = header.index('Overall')
    for row in player_data:
        jersey = row[jersey_index]
        if not jersey in jersey_tot_score:
            jersey_tot_score[jersey] = row[score_index]
        else:
            jersey_tot_score[jersey] += row[score_index]
    return jersey_tot_score
def jersey_average_score():
    jersey_num = jersey_number_players()
    jersey_tot_score = jersey_total_score()
    jersey_ave_score = {}
    for key in jersey_num:
        jersey_ave_score[key] = jersey_tot_score[key]/jersey_num[key]
    return jersey_ave_score
jersey_average_score()
{'10': 70.38617200674537,
 '7': 68.87251655629139,
 '1': 68.35689045936395,
 '9': 69.28769497400347,
 '15': 66.53493013972056,
 '8': 68.83006535947712,
 '21': 66.2723880597015,
 '13': 66.90214797136038,
 '22': 66.31638418079096,
 '5': 68.49740932642487,
 '3': 67.38939670932358,
 '14': 66.9870848708487,
 '12': 65.43076923076923,
 '11': 68.3406779661017,
 '2': 67.58766859344894,
 '23': 66.46886446886447,
 '26': 64.04358974358975,
 '6': 68.19283276450511,
 '17': 66.9115523465704,
 '18': 66.45688073394496,
 '4': 67.76614310645724,
 '19': 66.63669724770642,
 '31': 63.06785714285714,
 '25': 64.59413202933985,
 '37': 61.785185185185185,
 '30': 63.1644204851752,
 '44': 64.70270270270271,
 '29': 63.92178770949721,
 '24': 64.65411764705883,
 '20': 66.7306338028169,
 '16': 66.08510638297872,
 '33': 63.29965156794425,
 '28': 63.943977591036415,
 '27': 64.36170212765957,
 '77': 66.1038961038961,
 '47': 63.57142857142857,
 '38': 61.77450980392157,
 '40': 61.51754385964912,
 '92': 68.9,
 '36': 60.5448275862069,
 '87': 68.35714285714286,
 '34': 62.072463768115945,
 '32': 62.152,
 '83': 67.66666666666667,
 '70': 64.72413793103448,
 '35': 60.92896174863388,
 '89': 65.4,
 '56': 65.11764705882354,
 '99': 64.35714285714286,
 '57': 68.125,
 '91': 66.76190476190476,
 '86': 64.0,
 '45': 62.0,
 '63': 69.0,
 '39': 62.52336448598131,
 '43': 60.048780487804876,
 '42': 62.5,
 '93': 67.28571428571429,
 '72': 65.0,
 '71': 64.5,
 '88': 66.5,
 '55': 63.0,
 '80': 63.4375,
 '50': 62.30952380952381,
 '66': 63.77777777777778,
 '60': 64.1,
 '73': 64.33333333333333,
 '67': 64.6,
 '74': 67.5,
 '69': 68.66666666666667,
 '76': 68.0,
 '41': 61.37735849056604,
 '90': 65.61538461538461,
 '46': 60.206896551724135,
 '75': 66.5,
 '79': 71.5,
 '62': 62.0,
 '81': 65.0,
 '61': 61.57142857142857,
 '49': 59.68421052631579,
 '95': 65.25,
 '53': 62.45454545454545,
 '96': 64.53846153846153,
 '97': 62.4375,
 '68': 67.0,
 '98': 60.904761904761905,
 '94': 66.72727272727273,
 '58': 62.8,
 '78': 65.6,
 '': 61.63333333333333,
 '48': 61.470588235294116,
 '52': 61.8,
 '54': 61.0,
 '84': 63.75,
 '82': 59.8,
 '65': 58.5,
 '64': 62.5,
 '51': 58.0,
 '59': 57.2,
 '85': 57.0}
#q19: which Jersey Number has highest average overall?
jersey_ave_score = jersey_average_score()
max(jersey_ave_score, key=jersey_ave_score.get)
'79'
#q20: which club has highest average wage?
def club_number_players():
    club_num = {}
    club_index = header.index('Club')
    for row in player_data:
        club = row[club_index]
        if not club in club_num:
            club_num[club] = 1
        else:
            club_num[club] += 1
    return club_num
def club_total_wage():
    club_tot_wage = {}
    club_index = header.index('Club')
    wage_index = header.index('Wage')
    for row in player_data:
        club = row[club_index]
        wage = str_to_num(row[wage_index])
        if not club in club_tot_wage:
            club_tot_wage[club] = wage
        else:
            club_tot_wage[club] += wage
    return club_tot_wage
def club_average_wage():
    club_num = club_number_players()
    club_tot_wage = club_total_wage()
    club_ave_wage = {}
    for key in club_num:
        club_ave_wage[key] = club_tot_wage[key]/club_num[key]
    return club_ave_wage
club_ave_wage = club_average_wage()
max(club_ave_wage, key=club_ave_wage.get)
'Real Madrid'

  TOC