53  Exercise 3: Double Dash!!(boards)

You’re now going to create a dashboard in Streamlit.

Here is a link to the dataset. Link

You can use the starter code from https://github.com/hsma-programme/h6_7b_web_apps_1/blob/main/exercises/exercise_3/starter_code.py

import pandas as pd
import plotly.express as px
import geopandas
import matplotlib.pyplot as plt

###########################################################
# Display a plot of the medals won by a country over time
###########################################################

# ========================== #
chosen_country = "UK"
# ========================== #

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

# Turn the dataframe into a 'long' format for plotting - don't worry about what that means!
# What we'll end up with is a dataframe with columns for Year, Country, Medal Type (called 'variable')
# and number of medals (called 'value')
medals_per_country_per_year_long = medals_per_country_per_year.melt(id_vars=["Year", "Country", "NOC"])

# Create a line plot using the plotly express library
fig = px.line(
    # Filter our dataframe to just our 'chosen_country'
    medals_per_country_per_year_long[medals_per_country_per_year_long["Country"] == chosen_country],
    # The value to plot on the vertical axis - this corresponds to the number of medals
    y="value",
    # Plot the year on the x axis
    x="Year",
    # Plot the number of medals on the y axis
    color="variable",
    # Colour the medals as orange for bronze, silver for silver, gold for gold, and blue for total
    color_discrete_sequence=["orange", "silver", "gold", "blue"],
    # Add a title, using an f-string to include the name of the chosen country
    title=f"Medals Won over Time - {chosen_country}"
    )

# Display the plotly plot in an interactive window
fig.show()

##################################################################
# Display a table of just the medal winners in Athletics in 2012
##################################################################

# ========================== #
year = 2012
# ========================== #

# ========================== #
sport = "Athletics"
# ========================== #

country_medals_by_event = pd.read_csv("country_medals_by_event.csv")

country_medals_by_event_filtered = (
    country_medals_by_event[
        (country_medals_by_event["Sport"] == sport) &
        (country_medals_by_event["Year"] == year)
    ])

print(country_medals_by_event_filtered[['Event', 'Bronze', 'Silver', 'Gold']])

############################################################################
# Print out the country that won the most gold medals in the selected year
############################################################################
# Hint - this might make a nice metric card!

# ========================== #
year = 2012
# ========================== #

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

# Filter the dataframe to just the rows for the selected year
medals_per_country_per_year_filtered = (
    medals_per_country_per_year[medals_per_country_per_year["Year"] == year]
    )

# Sort by number of gold medals won in descending order (so the most medals is at the top of the table)
# Then just keep the first row (i.e. the one for the country with the most gold medals in the
# selected year)
top_gold_medal_winner_row = (
    medals_per_country_per_year
    .sort_values(["Gold"], ascending=False)
    .head(1)
    )

# Using an f string, populate the text with the number of gold medals this country won
print(f"The country that won the most medals in 2012 was" +
      f" {top_gold_medal_winner_row['Country'].values[0]}" +
      f" with {top_gold_medal_winner_row['Gold'].values[0]:.0f} gold medals.")


###########################################################################
# Print out the number of gold medals won by the selected country in 2012
###########################################################################
# Hint - this might make a nice metric card!

# ========================== #
year = 2012
# ========================== #

# ========================== #
chosen_country = "UK"
# ========================== #

# Filter the medals table down to the selected year and country
medals_chosen_country = (
    medals_per_country_per_year[
        (medals_per_country_per_year["Year"] == year) &
        (medals_per_country_per_year["Country"] == chosen_country)
        ]
    )

# Print out how many medals the country won
print(f"The {chosen_country} won {medals_chosen_country['Gold'].values[0]:.0f}" +
      f" gold medals in {year}.")

# Calculate the difference between this country and the top scorer
# Hint - this might be an interesting value to display as part of a metric card...
difference = medals_chosen_country['Gold'].values[0] - top_gold_medal_winner_row['Gold'].values[0]

# Print out a message with the difference
if difference < 0:
    print(f"This was {abs(difference):.0f} fewer" +
          f" than {top_gold_medal_winner_row['Country'].values[0]}")
# Unless the difference = 0, in which case the selected country is also the most successful country
else:
    print("They were the top country for gold medals in that year.")


#########################################
# Print a map of medal winners in 2012
#########################################

# ========================== #
year = 2012
# ========================== #

# ========================== #
selected_medal_type="Gold"
# ========================== #

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

country_outlines = geopandas.read_file("countries_outlines.geojson")

medals_per_country_per_year_gdf = pd.concat([
    pd.merge(country_outlines, medals_per_country_per_year, left_on="id", right_on="NOC", how="inner"),
    pd.merge(country_outlines, medals_per_country_per_year, left_on="name", right_on="Country", how="inner")
]).drop_duplicates()

fig, ax = plt.subplots(figsize=(15, 8))

ax = medals_per_country_per_year_gdf[medals_per_country_per_year_gdf["Year"] == year].plot(
    selected_medal_type, legend=True, ax=ax
    )

ax.axis('off')

plt.title(f"Number of {selected_medal_type} Medals in {year}")

fig.show()


###################################################
# Print out possible countries to select from for
# the next run of the script
###################################################

print("The countries you could choose from are: " +
      f"{medals_per_country_per_year['Country'].unique().tolist()}")

print(f"Choose a year between {medals_per_country_per_year['Year'].min()}" +
      f" and {medals_per_country_per_year['Year'].max()}")

print("The sports you could choose from are: " +
      f"{country_medals_by_event['Sport'].unique().tolist()}")
                                            Event          Bronze  \
4612                 Athletics Men's 1,500 metres         Morocco   
4613                Athletics Men's 10,000 metres        Ethiopia   
4614                   Athletics Men's 100 metres             USA   
4615           Athletics Men's 110 metres Hurdles         Jamaica   
4616           Athletics Men's 20 kilometres Walk           China   
4617                   Athletics Men's 200 metres         Jamaica   
4618    Athletics Men's 3,000 metres Steeplechase           Kenya   
4619         Athletics Men's 4 x 100 metres Relay          France   
4620         Athletics Men's 4 x 400 metres Relay        Trinidad   
4621                   Athletics Men's 400 metres        Trinidad   
4622           Athletics Men's 400 metres Hurdles     Puerto Rico   
4623                 Athletics Men's 5,000 metres           Kenya   
4624           Athletics Men's 50 kilometres Walk           China   
4625                   Athletics Men's 800 metres           Kenya   
4626                    Athletics Men's Decathlon            Cuba   
4627                 Athletics Men's Discus Throw         Estonia   
4628                 Athletics Men's Hammer Throw           Japan   
4629                    Athletics Men's High Jump          Canada   
4630                Athletics Men's Javelin Throw         Finland   
4631                    Athletics Men's Long Jump             USA   
4632                     Athletics Men's Marathon           Kenya   
4633                   Athletics Men's Pole Vault         Germany   
4634                     Athletics Men's Shot Put             USA   
4635                  Athletics Men's Triple Jump           Italy   
4636               Athletics Women's 1,500 metres         Bahrain   
4637              Athletics Women's 10,000 metres           Kenya   
4638                 Athletics Women's 100 metres         Jamaica   
4639         Athletics Women's 100 metres Hurdles             USA   
4640         Athletics Women's 20 kilometres Walk           China   
4641                 Athletics Women's 200 metres             USA   
4642  Athletics Women's 3,000 metres Steeplechase        Ethiopia   
4643       Athletics Women's 4 x 100 metres Relay         Ukraine   
4644       Athletics Women's 4 x 400 metres Relay         Jamaica   
4645                 Athletics Women's 400 metres             USA   
4646         Athletics Women's 400 metres Hurdles  Czech Republic   
4647               Athletics Women's 5,000 metres        Ethiopia   
4648                 Athletics Women's 800 metres          Russia   
4649               Athletics Women's Discus Throw           China   
4650               Athletics Women's Hammer Throw         Germany   
4651                 Athletics Women's Heptathlon          Russia   
4652                  Athletics Women's High Jump          Russia   
4653              Athletics Women's Javelin Throw         Germany   
4654                  Athletics Women's Long Jump             USA   
4655                   Athletics Women's Marathon          Russia   
4656                 Athletics Women's Pole Vault          Russia   
4657                   Athletics Women's Shot Put           China   
4658                Athletics Women's Triple Jump         Ukraine   

                  Silver                Gold  
4612                 USA             Algeria  
4613                 USA                  UK  
4614             Jamaica             Jamaica  
4615                 USA                 USA  
4616           Guatemala               China  
4617             Jamaica             Jamaica  
4618              France               Kenya  
4619            Trinidad             Jamaica  
4620                 USA             Bahamas  
4621  Dominican Republic             Grenada  
4622                 USA  Dominican Republic  
4623            Ethiopia                  UK  
4624           Australia              Russia  
4625            Botswana               Kenya  
4626                 USA                 USA  
4627                Iran             Germany  
4628            Slovenia             Hungary  
4629                 USA              Russia  
4630             Ukraine            Trinidad  
4631           Australia                  UK  
4632               Kenya              Uganda  
4633             Germany              France  
4634             Germany              Poland  
4635                 USA                 USA  
4636              Turkey              Turkey  
4637               Kenya            Ethiopia  
4638                 USA             Jamaica  
4639                 USA           Australia  
4640              Russia              Russia  
4641             Jamaica                 USA  
4642             Tunisia              Russia  
4643             Jamaica                 USA  
4644              Russia                 USA  
4645                  UK                 USA  
4646                 USA              Russia  
4647               Kenya            Ethiopia  
4648        South Africa              Russia  
4649              Russia             Croatia  
4650              Poland              Russia  
4651             Germany                  UK  
4652                 USA              Russia  
4653             Germany      Czech Republic  
4654              Russia                 USA  
4655               Kenya            Ethiopia  
4656                Cuba                 USA  
4657              Russia         New Zealand  
4658            Colombia          Kazakhstan  
The country that won the most medals in 2012 was USA with 82 gold medals.
The UK won 29 gold medals in 2012.
This was 53 fewer than USA
The countries you could choose from are: ['Australia', 'Austria', 'Denmark', 'France', 'Germany', 'Greece', 'Hungary', 'Switzerland', 'UK', 'USA', 'Belgium', 'Canada', 'Colombia', 'Cuba', 'Czech Republic', 'Haiti', 'India', 'Italy', 'Luxembourg', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Spain', 'Sweden', 'Finland', 'Russia', 'South Africa', 'Brazil', 'Estonia', 'Japan', 'Argentina', 'Ireland', 'Monaco', 'Poland', 'Portugal', 'Romania', 'Serbia', 'Uruguay', 'Chile', 'Egypt', 'Philippines', 'Latvia', 'Turkey', 'Iran', 'Jamaica', 'Panama', 'Peru', 'Puerto Rico', 'South Korea', 'Sri Lanka', 'Trinidad', 'Bulgaria', 'Lebanon', 'Venezuela', 'Bahamas', 'Iceland', 'Pakistan', 'Ethiopia', 'Ghana', 'Iraq', 'Morocco', 'Syria', 'Taiwan', 'Kenya', 'Nigeria', 'Tunisia', 'Cameroon', 'Mongolia', 'Uganda', 'Niger', 'North Korea', 'Bermuda', 'Thailand', 'Guyana', 'Tanzania', 'Zimbabwe', 'Algeria', 'China', 'Dominican Republic', 'Ivory Coast', 'Zambia', 'Costa Rica', 'Curacao', 'Djibouti', 'Indonesia', 'Senegal', 'Suriname', 'Virgin Islands, US', 'Croatia', 'Individual Olympic Athletes', 'Israel', 'Lithuania', 'Malaysia', 'Namibia', 'Qatar', 'Slovenia', 'Armenia', 'Azerbaijan', 'Belarus', 'Burundi', 'Ecuador', 'Georgia', 'Kazakhstan', 'Moldova', 'Mozambique', 'Slovakia', 'Tonga', 'Ukraine', 'Uzbekistan', 'Barbados', 'Kuwait', 'Kyrgyzstan', 'Macedonia', 'Saudi Arabia', 'Vietnam', 'Eritrea', 'Paraguay', 'United Arab Emirates', 'Afghanistan', 'Mauritius', 'Sudan', 'Tajikistan', 'Togo', 'Bahrain', 'Botswana', 'Cyprus', 'Gabon', 'Grenada', 'Guatemala', 'Montenegro', 'Fiji', 'Jordan', 'Kosovo']
Choose a year between 1896 and 2016
The sports you could choose from are: ['Athletics', 'Cycling', 'Fencing', 'Gymnastics', 'Shooting', 'Swimming', 'Tennis', 'Weightlifting', 'Wrestling', 'Archery', 'Basque Pelota', 'Cricket', 'Croquet', 'Equestrianism', 'Football', 'Golf', 'Polo', 'Rowing', 'Rugby', 'Sailing', 'Tug-Of-War', 'Water Polo', 'Boxing', 'Diving', 'Lacrosse', 'Roque', 'Figure Skating', 'Hockey', 'Jeu De Paume', 'Motorboating', 'Racquets', 'Art Competitions', 'Modern Pentathlon', 'Ice Hockey', 'Alpinism', 'Aeronautics', 'Basketball', 'Canoeing', 'Handball', 'Judo', 'Volleyball', 'Rhythmic Gymnastics', 'Synchronized Swimming', 'Table Tennis', 'Badminton', 'Baseball', 'Beach Volleyball', 'Softball', 'Taekwondo', 'Trampolining', 'Triathlon', 'Rugby Sevens']
C:\Users\Sammi\AppData\Local\Temp\ipykernel_48536\2287783435.py:162: UserWarning:

FigureCanvasAgg is non-interactive, and thus cannot be shown

Try to include at least

If you have time, try adding in

We’ll work on this until 4:05.

Don’t forget to refer to the Streamlit book - all of the above are covered in there.

I’ll be asking each group to share their code afterwards - we will upload your code to a repository and I’ll publish each app on Streamlit Community Cloud so you can see each other’s dashboards easily.

53.1 Dataset Overview

Several dataframes are provided, as well as a geojson file of country outlines for mapping purposes.

import pandas as pd

athlete_details_eventwise = pd.read_csv("athlete_details_eventwise.csv")

athlete_details_eventwise.head(5)
Name Sex Age Height Weight Country NOC Year Sport Event Medal
0 Ahmad Shah Abouwi M NaN NaN NaN Afghanistan AFG 1956 Hockey Hockey Men's Hockey NaN
1 Jammal-ud-Din Affendi M 28.0 NaN NaN Afghanistan AFG 1936 Hockey Hockey Men's Hockey NaN
2 Mohammad Anwar Afzal M NaN NaN NaN Afghanistan AFG 1948 Football Football Men's Football NaN
3 Mohammad Aktar M 17.0 156.0 48.0 Afghanistan AFG 1980 Wrestling Wrestling Men's Light-Flyweight, Freestyle NaN
4 Mohammad Daoud Anwary M 22.0 NaN NaN Afghanistan AFG 1964 Wrestling Wrestling Men's Bantamweight, Freestyle NaN
import pandas as pd

country_medals_by_event = pd.read_csv("country_medals_by_event.csv")

country_medals_by_event.head()
Year Sport Event Bronze Silver Gold
0 1896 Athletics Athletics Men's 1,500 metres France USA Australia
1 1896 Athletics Athletics Men's 100 metres Hungary Germany USA
2 1896 Athletics Athletics Men's 110 metres Hurdles Data Missing UK USA
3 1896 Athletics Athletics Men's 400 metres UK USA USA
4 1896 Athletics Athletics Men's 800 metres Greece Hungary Australia
import pandas as pd

games_cities = pd.read_csv("games_cities.csv")

games_cities.head()
Year City
0 1896 Athina
1 1900 Paris
2 1904 St. Louis
3 1906 Athina
4 1908 London
import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

medals_per_country_per_year.head()
Year Country NOC Bronze Silver Gold Total
0 1896 Australia AUS 1.0 0.0 2.0 3.0
1 1896 Austria AUT 2.0 1.0 2.0 5.0
2 1896 Denmark DEN 3.0 2.0 1.0 6.0
3 1896 France FRA 2.0 4.0 5.0 11.0
4 1896 Germany GER 2.0 5.0 7.0 14.0
import geopandas

country_outlines = geopandas.read_file("countries_outlines.geojson")

country_outlines.plot()

53.2 Code Snippets

So you don’t have to spend too much time focussing on data manipulation and visualisation, here are a range of premade dataframes, graphs and maps you could choose to display in your app.

53.2.1 Dataframes

import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

medals_per_country_per_year.sort_values("Total", ascending=False).head(15)
Year Country NOC Bronze Silver Gold Total
45 1904 USA USA 75.0 79.0 76.0 230.0
611 1980 Russia URS 46.0 69.0 80.0 195.0
665 1984 USA USA 30.0 61.0 82.0 173.0
81 1908 UK GBR 37.0 50.0 56.0 143.0
707 1988 Russia URS 46.0 31.0 54.0 131.0
596 1980 Germany GDR 42.0 37.0 47.0 126.0
574 1976 Russia URS 35.0 41.0 49.0 125.0
1265 2016 USA USA 38.0 37.0 46.0 121.0
771 1992 Russia EUN 29.0 38.0 45.0 112.0
1096 2008 USA USA 35.0 39.0 36.0 110.0
215 1932 USA USA 30.0 36.0 44.0 110.0
783 1992 USA USA 37.0 34.0 37.0 108.0
493 1968 USA USA 34.0 28.0 45.0 107.0
1181 2012 USA USA 29.0 28.0 46.0 103.0
398 1960 Russia URS 31.0 29.0 43.0 103.0
import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

medals_per_country_per_year.sort_values(["Gold", "Total"], ascending=True).head(15)
Year Country NOC Bronze Silver Gold Total
14 1900 Colombia COL 0.0 1.0 0.0 1.0
25 1900 Mexico MEX 1.0 0.0 0.0 1.0
65 1908 Austria AUT 1.0 0.0 0.0 1.0
111 1920 Greece GRE 0.0 1.0 0.0 1.0
114 1920 Luxembourg LUX 0.0 1.0 0.0 1.0
116 1920 New Zealand NZL 1.0 0.0 0.0 1.0
135 1924 Haiti HAI 1.0 0.0 0.0 1.0
139 1924 Japan JPN 1.0 0.0 0.0 1.0
141 1924 Monaco MON 1.0 0.0 0.0 1.0
143 1924 New Zealand NZL 1.0 0.0 0.0 1.0
146 1924 Portugal POR 1.0 0.0 0.0 1.0
147 1924 Romania ROU 1.0 0.0 0.0 1.0
160 1928 Chile CHI 0.0 1.0 0.0 1.0
168 1928 Haiti HAI 0.0 1.0 0.0 1.0
178 1928 Philippines PHI 1.0 0.0 0.0 1.0
import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

total_medals_all_time.head(15)
Country Bronze Silver Gold Total
123 USA 707.0 802.0 1035.0 2544.0
98 Russia 487.0 498.0 592.0 1577.0
41 Germany 490.0 457.0 442.0 1389.0
122 UK 298.0 316.0 278.0 892.0
38 France 282.0 255.0 233.0 770.0
57 Italy 198.0 191.0 219.0 608.0
20 China 154.0 163.0 228.0 545.0
4 Australia 197.0 171.0 150.0 518.0
110 Sweden 188.0 175.0 150.0 513.0
48 Hungary 172.0 154.0 178.0 504.0
60 Japan 161.0 134.0 142.0 437.0
37 Finland 120.0 86.0 104.0 310.0
18 Canada 137.0 104.0 64.0 305.0
97 Romania 120.0 95.0 88.0 303.0
82 Netherlands 114.0 95.0 88.0 297.0
import pandas as pd

medals_by_event = pd.read_csv("country_medals_by_event.csv")
medals_by_event
Year Sport Event Bronze Silver Gold
0 1896 Athletics Athletics Men's 1,500 metres France USA Australia
1 1896 Athletics Athletics Men's 100 metres Hungary Germany USA
2 1896 Athletics Athletics Men's 110 metres Hurdles Data Missing UK USA
3 1896 Athletics Athletics Men's 400 metres UK USA USA
4 1896 Athletics Athletics Men's 800 metres Greece Hungary Australia
... ... ... ... ... ... ...
5211 2016 Wrestling Wrestling Women's Flyweight, Freestyle Bulgaria Azerbaijan Japan
5212 2016 Wrestling Wrestling Women's Heavyweight, Freestyle China Kazakhstan Canada
5213 2016 Wrestling Wrestling Women's Light-Heavyweight, Freestyle Kazakhstan Russia Japan
5214 2016 Wrestling Wrestling Women's Lightweight, Freestyle India Russia Japan
5215 2016 Wrestling Wrestling Women's Middleweight, Freestyle Kazakhstan Belarus Japan

5216 rows × 6 columns

import pandas as pd

selected_year = 2012
selected_sport = "Judo"

medals_by_event = pd.read_csv("country_medals_by_event.csv")
medals_by_event
Year Sport Event Bronze Silver Gold
0 1896 Athletics Athletics Men's 1,500 metres France USA Australia
1 1896 Athletics Athletics Men's 100 metres Hungary Germany USA
2 1896 Athletics Athletics Men's 110 metres Hurdles Data Missing UK USA
3 1896 Athletics Athletics Men's 400 metres UK USA USA
4 1896 Athletics Athletics Men's 800 metres Greece Hungary Australia
... ... ... ... ... ... ...
5211 2016 Wrestling Wrestling Women's Flyweight, Freestyle Bulgaria Azerbaijan Japan
5212 2016 Wrestling Wrestling Women's Heavyweight, Freestyle China Kazakhstan Canada
5213 2016 Wrestling Wrestling Women's Light-Heavyweight, Freestyle Kazakhstan Russia Japan
5214 2016 Wrestling Wrestling Women's Lightweight, Freestyle India Russia Japan
5215 2016 Wrestling Wrestling Women's Middleweight, Freestyle Kazakhstan Belarus Japan

5216 rows × 6 columns

import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

most_events_per_year_by_athlete = athlete_statistics.value_counts(['Name', 'Country', 'Year']).reset_index()

most_events_per_year_by_athlete.head(15)
Name Country Year count
0 Robert Tait McKenzie Canada 1932 44
1 Alfrd (Arnold-) Hajs (Guttmann-) Hungary 1928 28
2 Miltiades Manno Hungary 1932 27
3 Alfred James Munnings UK 1948 25
4 Wilhelm (William) Hunt Diederich USA 1932 19
5 Acee Blue Eagle USA 1932 18
6 Jean Lucien Nicolas Jacoby Luxembourg 1936 17
7 Stanisaw Noakowski Poland 1928 17
8 Mahonri Mackintosh Young USA 1932 16
9 ngel Zrraga Argelles Mexico 1928 16
10 Georges-mile Fauvelle France 1928 16
11 Pierre De Soete Belgium 1928 16
12 Willis Augustus Lee, Jr. USA 1920 15
13 Charles Zeller Klauder USA 1928 14
14 Georg Vilhelm Lagerstedt Sweden 1936 13
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

most_events_per_year_by_athlete = athlete_statistics.value_counts(['Name', 'Country', 'Year']).reset_index()

athlete_name_most_events = most_events_per_year_by_athlete.head(1)['Name'].values[0]
athlete_name_most_events_year = most_events_per_year_by_athlete.head(1)['Year'].values[0]

athlete_statistics[(athlete_statistics["Name"] == athlete_name_most_events) &
                    (athlete_statistics["Year"] == athlete_name_most_events_year)].head(15)
Name Sex Age Height Weight Country NOC Year Sport Event Medal
32262 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Medals And... Bronze
32263 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32264 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32265 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32266 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32267 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32268 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32269 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32270 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32271 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32272 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32273 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32274 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32275 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32276 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics.value_counts(['Name', 'Sex', 'Country']).reset_index()
Name Sex Country count
0 Robert Tait McKenzie M Canada 58
1 Heikki Ilmari Savolainen M Finland 39
2 Joseph "Josy" Stoffel M Luxembourg 38
3 Ioannis Theofilakis M Greece 36
4 Takashi Ono M Japan 33
... ... ... ... ...
116737 Jan Wijnants M Belgium 1
116738 Jan Wallisch M Czech Republic 1
116739 Jan Wagenaar M Netherlands 1
116740 Jan Wadas M Poland 1
116741 Karen Rayr van der Veen (-Wilkinson) F South Africa 1

116742 rows × 4 columns

import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

chosen_athlete = "Simone Arianne Biles"

athlete_statistics[athlete_statistics["Name"] == chosen_athlete]
Name Sex Age Height Weight Country NOC Year Sport Event Medal
203896 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Individual All-Around Gold
203897 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Team All-Around Gold
203898 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Floor Exercise Gold
203899 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Horse Vault Gold
203900 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Uneven Bars NaN
203901 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Balance Beam Bronze
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

search_string = "Ben"

athlete_statistics[athlete_statistics["Name"].str.contains(search_string)]
Name Sex Age Height Weight Country NOC Year Sport Event Medal
271 Benaissa Abed M 23.0 170.0 51.0 Algeria ALG 1988 Boxing Boxing Men's Flyweight NaN
327 Ben Ali Beghouach M 21.0 186.0 89.0 Algeria ALG 1988 Handball Handball Men's Handball NaN
328 Ben Ali Beghouach M 29.0 186.0 89.0 Algeria ALG 1996 Handball Handball Men's Handball NaN
348 Djillali Ben Brahim M 27.0 178.0 71.0 Algeria ALG 1980 Judo Judo Men's Lightweight NaN
349 Reda Ben Chehima M 26.0 175.0 75.0 Algeria ALG 2004 Fencing Fencing Men's Sabre, Individual NaN
... ... ... ... ... ... ... ... ... ... ... ...
219770 Lidija Benedeti-Lapajne F 25.0 175.0 64.0 Serbia YUG 1984 Athletics Athletics Women's High Jump NaN
219771 Marijan Bene M 25.0 170.0 68.0 Serbia YUG 1976 Boxing Boxing Men's Welterweight NaN
221821 Benson Mulomba M 22.0 177.0 54.0 Zambia ZAM 1972 Athletics Athletics Men's 800 metres NaN
221843 Benny Muziyo M 23.0 NaN NaN Zambia ZAM 2016 Boxing Boxing Men's Middleweight NaN
222158 Gavin Ben Sutherland M 37.0 186.0 78.0 Zimbabwe ZIM 2016 Archery Archery Men's Individual NaN

1617 rows × 11 columns

import pandas as pd

selected_year = 2012

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics[athlete_statistics["Year"] == selected_year][['Sport', 'Event']].drop_duplicates().reset_index(drop=True).head(15)
Sport Event
0 Athletics Athletics Men's 100 metres
1 Taekwondo Taekwondo Men's Welterweight
2 Boxing Boxing Men's Flyweight
3 Judo Judo Men's Half-Lightweight
4 Athletics Athletics Women's 100 metres
5 Taekwondo Taekwondo Men's Featherweight
6 Weightlifting Weightlifting Women's Lightweight
7 Swimming Swimming Women's 100 metres Butterfly
8 Weightlifting Weightlifting Men's Lightweight
9 Athletics Athletics Men's Shot Put
10 Swimming Swimming Men's 100 metres Freestyle
11 Weightlifting Weightlifting Men's Middle-Heavyweight
12 Judo Judo Women's Half-Lightweight
13 Shooting Shooting Men's Air Pistol, 10 metres
14 Shooting Shooting Men's Free Pistol, 50 metres
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics[athlete_statistics["Year"] == selected_year][['Sport', 'Event']].drop_duplicates().value_counts('Sport').reset_index().head(15)
Sport count
0 Athletics 47
1 Swimming 34
2 Wrestling 18
3 Cycling 18
4 Canoeing 16
5 Shooting 15
6 Weightlifting 15
7 Gymnastics 14
8 Judo 14
9 Rowing 14
10 Boxing 13
11 Fencing 10
12 Sailing 10
13 Taekwondo 8
14 Diving 8

53.2.2 Useful Lists and Info From Datasets

import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

all_sports = athlete_statistics["Sport"].drop_duplicates().tolist()

print(all_sports)
['Hockey', 'Football', 'Wrestling', 'Athletics', 'Taekwondo', 'Judo', 'Boxing', 'Weightlifting', 'Shooting', 'Fencing', 'Swimming', 'Sailing', 'Synchronized Swimming', 'Triathlon', 'Equestrianism', 'Cycling', 'Volleyball', 'Handball', 'Rowing', 'Table Tennis', 'Tennis', 'Gymnastics', 'Trampolining', 'Badminton', 'Canoeing', 'Beach Volleyball', 'Basketball', 'Diving', 'Rugby', 'Rugby Sevens', 'Polo', 'Modern Pentathlon', 'Archery', 'Water Polo', 'Art Competitions', 'Golf', 'Figure Skating', 'Softball', 'Baseball', 'Rhythmic Gymnastics', 'Tug-Of-War', 'Ice Hockey', 'Lacrosse', 'Basque Pelota', 'Cricket', 'Croquet', 'Motorboating', 'Racquets', 'Jeu De Paume', 'Alpinism', 'Aeronautics', 'Roque']
import pandas as pd

selected_year = 2012

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics_year = athlete_statistics[athlete_statistics["Year"] == selected_year]

all_events = athlete_statistics_year["Event"].drop_duplicates().tolist()

print(all_events)
["Athletics Men's 100 metres", "Taekwondo Men's Welterweight", "Boxing Men's Flyweight", "Judo Men's Half-Lightweight", "Athletics Women's 100 metres", "Taekwondo Men's Featherweight", "Weightlifting Women's Lightweight", "Swimming Women's 100 metres Butterfly", "Weightlifting Men's Lightweight", "Athletics Men's Shot Put", "Swimming Men's 100 metres Freestyle", "Weightlifting Men's Middle-Heavyweight", "Judo Women's Half-Lightweight", "Shooting Men's Air Pistol, 10 metres", "Shooting Men's Free Pistol, 50 metres", "Boxing Men's Welterweight", "Volleyball Women's Volleyball", "Athletics Men's 5,000 metres", "Athletics Women's Marathon", "Judo Women's Heavyweight", "Athletics Men's 3,000 metres Steeplechase", "Wrestling Men's Lightweight, Greco-Roman", "Boxing Men's Light-Heavyweight", "Weightlifting Men's Heavyweight", "Boxing Men's Heavyweight", "Boxing Men's Lightweight", "Taekwondo Men's Flyweight", "Athletics Men's Marathon", "Boxing Men's Light-Flyweight", "Fencing Women's Foil, Individual", "Cycling Men's Road Race, Individual", "Wrestling Men's Light-Heavyweight, Freestyle", "Athletics Men's 800 metres", "Athletics Men's 1,500 metres", "Fencing Women's Sabre, Individual", "Athletics Men's Triple Jump", "Boxing Men's Bantamweight", "Boxing Men's Middleweight", "Rowing Women's Single Sculls", "Wrestling Men's Welterweight, Greco-Roman", "Swimming Men's 200 metres Butterfly", "Swimming Women's 100 metres Backstroke", "Shooting Men's Trap", "Handball Women's Handball", "Basketball Women's Basketball", "Athletics Women's 800 metres", "Swimming Women's 50 metres Freestyle", "Canoeing Men's Canadian Singles, 200 metres", "Canoeing Men's Canadian Doubles, 1,000 metres", "Judo Women's Middleweight", "Canoeing Men's Canadian Singles, 1,000 metres", "Swimming Men's 400 metres Individual Medley", "Athletics Women's 400 metres", "Athletics Men's 200 metres", "Rowing Women's Coxless Pairs", "Sailing Men's One Person Dinghy", "Shooting Men's Small-Bore Rifle, Prone, 50 metres", "Volleyball Men's Volleyball", "Hockey Women's Hockey", "Swimming Women's 400 metres Individual Medley", "Hockey Men's Hockey", "Tennis Men's Singles", "Wrestling Women's Flyweight, Freestyle", "Swimming Women's 800 metres Freestyle", "Swimming Women's 10 kilometres Open Water", "Sailing Men's Two Person Dinghy", "Basketball Men's Basketball", "Judo Men's Middleweight", "Handball Men's Handball", "Athletics Men's 20 kilometres Walk", "Sailing Women's One Person Dinghy", "Rowing Men's Lightweight Double Sculls", "Athletics Men's Hammer Throw", "Athletics Women's Discus Throw", "Canoeing Men's Kayak Doubles, 200 metres", "Athletics Women's Hammer Throw", 'Tennis Mixed Doubles', "Tennis Women's Doubles", "Rowing Men's Single Sculls", "Beach Volleyball Women's Beach Volleyball", "Swimming Men's 50 metres Freestyle", "Swimming Men's 100 metres Backstroke", "Rowing Women's Lightweight Double Sculls", 'Equestrianism Mixed Jumping, Individual', "Athletics Men's Discus Throw", "Table Tennis Men's Singles", "Taekwondo Women's Flyweight", "Sailing Women's Windsurfer", "Judo Men's Half-Middleweight", "Gymnastics Men's Floor Exercise", "Gymnastics Men's Rings", "Sailing Women's Two Person Dinghy", "Tennis Men's Doubles", "Judo Women's Extra-Lightweight", "Swimming Men's 400 metres Freestyle", "Swimming Men's 1,500 metres Freestyle", "Gymnastics Women's Individual All-Around", "Gymnastics Women's Floor Exercise", "Gymnastics Women's Uneven Bars", "Gymnastics Women's Balance Beam", "Cycling Men's Omnium", "Cycling Men's BMX", "Sailing Men's Windsurfer", "Canoeing Men's Canadian Singles, Slalom", "Rowing Men's Double Sculls", "Synchronized Swimming Women's Duet", "Judo Men's Half-Heavyweight", "Cycling Men's Mountainbike, Cross-Country", "Triathlon Men's Olympic Distance", "Athletics Men's Javelin Throw", "Wrestling Men's Heavyweight, Greco-Roman", "Weightlifting Women's Light-Heavyweight", "Gymnastics Men's Individual All-Around", "Gymnastics Men's Horse Vault", "Gymnastics Men's Parallel Bars", "Gymnastics Men's Horizontal Bar", "Gymnastics Men's Pommelled Horse", "Judo Men's Extra-Lightweight", "Athletics Women's Javelin Throw", "Wrestling Men's Featherweight, Freestyle", "Wrestling Men's Middleweight, Greco-Roman", "Weightlifting Men's Light-Heavyweight", "Weightlifting Women's Super-Heavyweight", "Athletics Men's Long Jump", "Wrestling Men's Super-Heavyweight, Greco-Roman", "Wrestling Men's Welterweight, Freestyle", "Weightlifting Men's Super-Heavyweight", "Swimming Women's 100 metres Freestyle", "Athletics Men's 50 kilometres Walk", "Athletics Men's 4 x 100 metres Relay", "Synchronized Swimming Women's Team", "Judo Men's Heavyweight", "Shooting Women's Air Pistol, 10 metres", "Swimming Women's 4 x 200 metres Freestyle Relay", "Shooting Women's Trap", "Archery Women's Individual", "Swimming Women's 200 metres Freestyle", "Swimming Women's 400 metres Freestyle", "Shooting Men's Skeet", "Water Polo Women's Water Polo", "Water Polo Men's Water Polo", "Rowing Men's Lightweight Coxless Fours", "Cycling Men's Team Pursuit, 4,000 metres", "Athletics Women's 400 metres Hurdles", "Gymnastics Women's Team All-Around", "Rowing Men's Coxed Eights", "Athletics Women's Pole Vault", "Canoeing Women's Kayak Fours, 500 metres", "Diving Women's Platform", "Cycling Women's BMX", "Rowing Men's Coxless Pairs", "Athletics Women's 1,500 metres", "Diving Women's Synchronized Platform", 'Equestrianism Mixed Three-Day Event, Individual', 'Equestrianism Mixed Three-Day Event, Team', "Swimming Women's 4 x 100 metres Freestyle Relay", "Sailing Men's One Person Heavyweight Dinghy", "Shooting Men's Rapid-Fire Pistol, 25 metres", "Shooting Women's Sporting Pistol, 25 metres", "Rowing Men's Coxless Fours", "Rowing Women's Coxed Eights", "Badminton Women's Doubles", "Canoeing Men's Kayak Fours, 1,000 metres", "Athletics Men's 400 metres Hurdles", "Athletics Men's 4 x 400 metres Relay", "Diving Men's Platform", "Swimming Women's 200 metres Individual Medley", "Swimming Women's 4 x 100 metres Medley Relay", "Rowing Women's Double Sculls", "Sailing Women's Three Person Keelboat", "Triathlon Women's Olympic Distance", "Swimming Men's 4 x 100 metres Freestyle Relay", "Swimming Men's 4 x 100 metres Medley Relay", "Canoeing Men's Kayak Singles, Slalom", "Cycling Women's Team Pursuit", "Cycling Women's Omnium", "Modern Pentathlon Women's Individual", "Rowing Women's Quadruple Sculls", "Modern Pentathlon Men's Individual", "Boxing Women's Middleweight", "Canoeing Women's Kayak Doubles, 500 metres", "Rowing Men's Quadruple Sculls", "Swimming Women's 200 metres Breaststroke", "Canoeing Women's Kayak Singles, Slalom", "Table Tennis Men's Team", "Swimming Men's 200 metres Freestyle", "Swimming Men's 4 x 200 metres Freestyle Relay", "Trampolining Men's Individual", "Cycling Women's Road Race, Individual", "Cycling Women's Individual Time Trial", "Cycling Men's Team Sprint", "Shooting Men's Air Rifle, 10 metres", "Swimming Men's 100 metres Butterfly", "Swimming Men's 200 metres Individual Medley", "Swimming Women's 200 metres Butterfly", 'Equestrianism Mixed Dressage, Individual', 'Equestrianism Mixed Dressage, Team', 'Equestrianism Mixed Jumping, Team', "Cycling Women's Mountainbike, Cross-Country", "Swimming Women's 200 metres Backstroke", "Athletics Men's Pole Vault", "Boxing Men's Light-Welterweight", "Swimming Men's 10 kilometres Open Water", "Canoeing Men's Canadian Doubles, Slalom", "Sailing Men's Skiff", "Swimming Women's 100 metres Breaststroke", "Athletics Women's 3,000 metres Steeplechase", "Athletics Women's 20 kilometres Walk", "Swimming Men's 200 metres Backstroke", "Table Tennis Women's Singles", "Table Tennis Women's Team", "Weightlifting Women's Middleweight", "Boxing Men's Super-Heavyweight", "Shooting Women's Skeet", "Shooting Men's Double Trap", "Taekwondo Women's Welterweight", "Cycling Women's Team Sprint", "Athletics Women's 100 metres Hurdles", "Cycling Women's Sprint", "Cycling Women's Keirin", "Rhythmic Gymnastics Women's Individual", "Badminton Women's Singles", "Canoeing Women's Kayak Singles, 200 metres", "Canoeing Women's Kayak Singles, 500 metres", "Diving Women's Springboard", "Cycling Men's Sprint", "Cycling Men's Keirin", "Judo Women's Lightweight", "Swimming Men's 100 metres Breaststroke", "Swimming Men's 200 metres Breaststroke", "Cycling Men's Individual Time Trial", "Shooting Men's Small-Bore Rifle, Three Positions, 50 metres", "Shooting Women's Air Rifle, 10 metres", "Diving Women's Synchronized Springboard", "Canoeing Men's Kayak Doubles, 1,000 metres", "Badminton Men's Doubles", "Athletics Men's 400 metres", "Athletics Men's 10,000 metres", "Canoeing Men's Kayak Singles, 200 metres", "Canoeing Men's Kayak Singles, 1,000 metres", "Tennis Women's Singles", "Wrestling Men's Lightweight, Freestyle", "Shooting Women's Small-Bore Rifle, Three Positions, 50 metres", "Diving Men's Springboard", "Athletics Women's 5,000 metres", "Athletics Women's 10,000 metres", "Archery Men's Individual", "Athletics Women's Heptathlon", "Beach Volleyball Men's Beach Volleyball", "Judo Women's Half-Middleweight", "Wrestling Men's Light-Heavyweight, Greco-Roman", "Badminton Men's Singles", "Fencing Men's Foil, Individual", "Wrestling Men's Featherweight, Greco-Roman", "Wrestling Men's Middleweight, Freestyle", "Wrestling Men's Super-Heavyweight, Freestyle", "Judo Men's Lightweight", "Wrestling Men's Heavyweight, Freestyle", "Wrestling Women's Lightweight, Freestyle", "Weightlifting Men's Bantamweight", "Athletics Women's 4 x 100 metres Relay", "Athletics Men's High Jump", "Athletics Women's 200 metres", "Athletics Men's 110 metres Hurdles", "Athletics Women's Long Jump", "Judo Women's Half-Heavyweight", "Athletics Women's Triple Jump", "Athletics Women's High Jump", "Athletics Men's Decathlon", "Football Men's Football", "Fencing Men's Sabre, Individual", "Fencing Men's Sabre, Team", "Rhythmic Gymnastics Women's Group", "Athletics Women's 4 x 400 metres Relay", "Athletics Women's Shot Put", "Weightlifting Women's Heavyweight", "Gymnastics Women's Horse Vault", "Wrestling Women's Heavyweight, Freestyle", "Trampolining Women's Individual", "Football Women's Football", "Boxing Women's Lightweight", "Taekwondo Women's Heavyweight", "Boxing Women's Flyweight", "Sailing Men's Two Person Keelboat", "Fencing Men's epee, Individual", "Wrestling Women's Middleweight, Freestyle", "Taekwondo Men's Heavyweight", "Diving Men's Synchronized Springboard", 'Badminton Mixed Doubles', "Fencing Women's epee, Individual", "Taekwondo Women's Featherweight", "Diving Men's Synchronized Platform", "Gymnastics Men's Team All-Around", "Archery Women's Team", "Archery Men's Team", "Fencing Men's Foil, Team", "Fencing Women's epee, Team", "Weightlifting Men's Middleweight", "Weightlifting Women's Flyweight", "Weightlifting Men's Featherweight", "Weightlifting Women's Featherweight", "Fencing Women's Foil, Team"]
import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

earliest_year = medals_per_country_per_year["Year"].min()

latest_year = medals_per_country_per_year["Year"].max()

print(f"The earliest year in the dataset is {earliest_year}")

print(f"The latest year in the dataset is {latest_year}")
The earliest year in the dataset is 1896
The latest year in the dataset is 2016
import pandas as pd

selected_year = 2012
selected_sport = "Gymnastics"

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics_filtered = athlete_statistics[(athlete_statistics["Year"] == selected_year) & (athlete_statistics["Sport"] == selected_sport)]

youngest_athlete = athlete_statistics_filtered["Age"].min()

oldest_athlete = athlete_statistics_filtered["Age"].max()

print(f"The youngest athlete who competed in {selected_sport} in {selected_year} was {youngest_athlete:.0f} years old.")

print(f"The oldest athlete who competed in {selected_sport} in {selected_year} was{oldest_athlete:.0f} years old.")
The youngest athlete who competed in Gymnastics in 2012 was 15 years old.
The oldest athlete who competed in Gymnastics in 2012 was39 years old.

53.2.3 Plots

import pandas as pd
import plotly.express as px

selected_countries = ["UK", "USA", "Canada", "Australia", "China", "Russia", "France", "Germany", "Italy"]

selected_medal_type = "Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

px.line(medals_per_country_per_year[medals_per_country_per_year["Country"].isin(selected_countries)],
        x="Year", y=selected_medal_type, color="Country",
        title=f"{selected_medal_type} medals over time - subset of countries")
import pandas as pd
import plotly.express as px

chosen_country = "UK"
selected_medal_type = "Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

px.line(medals_per_country_per_year[medals_per_country_per_year["Country"] == chosen_country],
        y=selected_medal_type, x="Year",
        title=f"{selected_medal_type} medals for {chosen_country} over time")
import pandas as pd
import plotly.express as px

chosen_country = "UK"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

medals_per_country_per_year_long = medals_per_country_per_year.melt(id_vars=["Year", "Country", "NOC"])

px.line(medals_per_country_per_year_long[medals_per_country_per_year_long["Country"] == chosen_country],
        y="value", x="Year", color="variable",
        color_discrete_sequence=["orange", "silver", "gold", "blue"],
        title=f"Medals Won over Time - {chosen_country}")
import pandas as pd
import plotly.express as px

rows_to_display = 10

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

px.bar(total_medals_all_time.head(rows_to_display), x='Country', y='Total',
       title=f"Total Number of Medals since 1896 - Top {rows_to_display} Countries")
import pandas as pd
import plotly.express as px

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

total_medals_all_time_long = total_medals_all_time.drop(columns='Total').melt(id_vars="Country")

px.bar(total_medals_all_time_long,
        x="Country", y="value", color="variable",
        color_discrete_sequence=["orange", "silver", "gold"]
        )
import pandas as pd
import plotly.express as px

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

total_medals_all_time_long = total_medals_all_time.drop(columns='Total').melt(id_vars="Country")

total_medals_all_time_long['Total'] = total_medals_all_time_long['value'].groupby(total_medals_all_time_long['Country']).transform('sum')

total_medals_all_time_long['Percentage of Total'] = total_medals_all_time_long['value'] / total_medals_all_time_long['Total']

px.bar(total_medals_all_time_long[total_medals_all_time_long['Country'].isin(total_medals_all_time_long.head(rows_to_display)['Country'].values)],
       x="Country", y="Percentage of Total", color="variable",
       color_discrete_sequence=["orange", "silver", "gold"],
       title=f"Breakdown of Medals by type - Top {rows_to_display} countries by total all-time medal count")
import pandas as pd
import plotly.express as px

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

distinct_athletes_per_year = athlete_statistics[['Name', 'Year','Country','Sex', 'Age', 'Height', 'Weight']].drop_duplicates()

athlete_counts_by_sex_by_year = distinct_athletes_per_year.value_counts(['Sex', 'Year']).reset_index()

athlete_counts_by_sex_by_year['total_athletes_both_sex_in_year'] = athlete_counts_by_sex_by_year['count'].groupby(athlete_counts_by_sex_by_year['Year']).transform('sum')

athlete_counts_by_sex_by_year['Percentage of Athletes'] = athlete_counts_by_sex_by_year['count'] / athlete_counts_by_sex_by_year['total_athletes_both_sex_in_year']

px.bar(athlete_counts_by_sex_by_year,
       x="Year", y="Percentage of Athletes", color="Sex",
       title=f"Change in Gender Split Over Time")
import pandas as pd
import plotly.express as px

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

selected_sport = "Gymnastics"

athlete_statistics_sport = athlete_statistics[athlete_statistics["Sport"] == selected_sport]

events_per_year_by_sport = (
    athlete_statistics_sport[['Year', 'Event']]
    .drop_duplicates()
    .value_counts(['Year'])
    .reset_index()
)

px.line(events_per_year_by_sport.sort_values("Year"), x="Year", y="count",
            markers=True, title=f"{selected_sport} Events Per Olympics",
            range_x=[athlete_statistics.Year.min(), athlete_statistics.Year.max()])

53.2.4 Maps

The provided maps are imperfect due to the complexity of matching up country names across the olympic dataset and an unexplained bug affecting the colouring of certain countries in the interactive map.

For the purpose of the exercise, don’t worry about the slight issues with the map - the focus is just on displaying a map successfully within your dashboard.

import pandas as pd
import geopandas
import matplotlib.pyplot as plt

selected_year = 2012
selected_medal_type="Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

country_outlines = geopandas.read_file("countries_outlines.geojson")

medals_per_country_per_year_gdf = pd.concat([
    pd.merge(country_outlines, medals_per_country_per_year, left_on="id", right_on="NOC", how="inner"),
    pd.merge(country_outlines, medals_per_country_per_year, left_on="name", right_on="Country", how="inner")
]).drop_duplicates()

fig, ax = plt.subplots(figsize=(15, 8))
ax = medals_per_country_per_year_gdf[medals_per_country_per_year_gdf["Year"] == selected_year].plot(
    "Gold", legend=True, ax=ax
    )
ax.axis('off')
plt.title(f"Number of {selected_medal_type} Medals in {selected_year}")
Text(0.5, 1.0, 'Number of Bronze Medals in 2012')

import pandas as pd
import geopandas
import folium

selected_year = 2012
selected_medal_type="Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

country_outlines = geopandas.read_file("countries_outlines.geojson")

medals_per_country_per_year_gdf = pd.concat([
    pd.merge(country_outlines, medals_per_country_per_year, left_on="id", right_on="NOC", how="inner"),
    pd.merge(country_outlines, medals_per_country_per_year, left_on="name", right_on="Country", how="inner")
]).drop_duplicates()

#create base map
world_map_medals = folium.Map(
    location=[50.71671, -3.50668],
    zoom_start=2,
    tiles='cartodbpositron'
    )

# create and add choropleth map
choropleth = folium.Choropleth(
    geo_data=medals_per_country_per_year_gdf[medals_per_country_per_year_gdf["Year"] == selected_year], # dataframe with geometry in it
    data=medals_per_country_per_year_gdf[medals_per_country_per_year_gdf["Year"] == selected_year], # dataframe with data in - may be the same dataframe or a different one
    columns=['name', 'Gold'], # [key (field for geometry), field to plot]
    key_on='feature.properties.Country'
    )

choropleth = choropleth.add_to(world_map_medals)

choropleth = choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(
        ['name', 'Gold'],
        labels=True
        )
)

world_map_medals
Make this Notebook Trusted to load map: File -> Trust Notebook