12  Example Outputs

In this page, we’re going to make a series of tables and plots out of the olympic dataset.

12.1 Dataframes

import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

medals_per_country_per_year.sort_values("Total", ascending=False).head(15)
Year Country NOC Bronze Silver Gold Total
45 1904 USA USA 75.0 79.0 76.0 230.0
611 1980 Russia URS 46.0 69.0 80.0 195.0
665 1984 USA USA 30.0 61.0 82.0 173.0
81 1908 UK GBR 37.0 50.0 56.0 143.0
707 1988 Russia URS 46.0 31.0 54.0 131.0
596 1980 Germany GDR 42.0 37.0 47.0 126.0
574 1976 Russia URS 35.0 41.0 49.0 125.0
1265 2016 USA USA 38.0 37.0 46.0 121.0
771 1992 Russia EUN 29.0 38.0 45.0 112.0
215 1932 USA USA 30.0 36.0 44.0 110.0
1096 2008 USA USA 35.0 39.0 36.0 110.0
783 1992 USA USA 37.0 34.0 37.0 108.0
493 1968 USA USA 34.0 28.0 45.0 107.0
1181 2012 USA USA 29.0 28.0 46.0 103.0
398 1960 Russia URS 31.0 29.0 43.0 103.0
import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

medals_per_country_per_year.sort_values(["Gold", "Total"], ascending=True).head(15)
Year Country NOC Bronze Silver Gold Total
14 1900 Colombia COL 0.0 1.0 0.0 1.0
25 1900 Mexico MEX 1.0 0.0 0.0 1.0
65 1908 Austria AUT 1.0 0.0 0.0 1.0
111 1920 Greece GRE 0.0 1.0 0.0 1.0
114 1920 Luxembourg LUX 0.0 1.0 0.0 1.0
116 1920 New Zealand NZL 1.0 0.0 0.0 1.0
135 1924 Haiti HAI 1.0 0.0 0.0 1.0
139 1924 Japan JPN 1.0 0.0 0.0 1.0
141 1924 Monaco MON 1.0 0.0 0.0 1.0
143 1924 New Zealand NZL 1.0 0.0 0.0 1.0
146 1924 Portugal POR 1.0 0.0 0.0 1.0
147 1924 Romania ROU 1.0 0.0 0.0 1.0
160 1928 Chile CHI 0.0 1.0 0.0 1.0
168 1928 Haiti HAI 0.0 1.0 0.0 1.0
178 1928 Philippines PHI 1.0 0.0 0.0 1.0
import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

total_medals_all_time.head(15)
Country Bronze Silver Gold Total
123 USA 707.0 802.0 1035.0 2544.0
98 Russia 487.0 498.0 592.0 1577.0
41 Germany 490.0 457.0 442.0 1389.0
122 UK 298.0 316.0 278.0 892.0
38 France 282.0 255.0 233.0 770.0
57 Italy 198.0 191.0 219.0 608.0
20 China 154.0 163.0 228.0 545.0
4 Australia 197.0 171.0 150.0 518.0
110 Sweden 188.0 175.0 150.0 513.0
48 Hungary 172.0 154.0 178.0 504.0
60 Japan 161.0 134.0 142.0 437.0
37 Finland 120.0 86.0 104.0 310.0
18 Canada 137.0 104.0 64.0 305.0
97 Romania 120.0 95.0 88.0 303.0
82 Netherlands 114.0 95.0 88.0 297.0
import pandas as pd

medals_by_event = pd.read_csv("country_medals_by_event.csv")
medals_by_event
Year Sport Event Bronze Silver Gold
0 1896 Athletics Athletics Men's 1,500 metres France USA Australia
1 1896 Athletics Athletics Men's 100 metres Hungary Germany USA
2 1896 Athletics Athletics Men's 110 metres Hurdles Data Missing UK USA
3 1896 Athletics Athletics Men's 400 metres UK USA USA
4 1896 Athletics Athletics Men's 800 metres Greece Hungary Australia
... ... ... ... ... ... ...
5211 2016 Wrestling Wrestling Women's Flyweight, Freestyle Bulgaria Azerbaijan Japan
5212 2016 Wrestling Wrestling Women's Heavyweight, Freestyle China Kazakhstan Canada
5213 2016 Wrestling Wrestling Women's Light-Heavyweight, Freestyle Kazakhstan Russia Japan
5214 2016 Wrestling Wrestling Women's Lightweight, Freestyle India Russia Japan
5215 2016 Wrestling Wrestling Women's Middleweight, Freestyle Kazakhstan Belarus Japan

5216 rows × 6 columns

import pandas as pd

selected_year = 2012
selected_sport = "Judo"

medals_by_event = pd.read_csv("country_medals_by_event.csv")
medals_by_event
Year Sport Event Bronze Silver Gold
0 1896 Athletics Athletics Men's 1,500 metres France USA Australia
1 1896 Athletics Athletics Men's 100 metres Hungary Germany USA
2 1896 Athletics Athletics Men's 110 metres Hurdles Data Missing UK USA
3 1896 Athletics Athletics Men's 400 metres UK USA USA
4 1896 Athletics Athletics Men's 800 metres Greece Hungary Australia
... ... ... ... ... ... ...
5211 2016 Wrestling Wrestling Women's Flyweight, Freestyle Bulgaria Azerbaijan Japan
5212 2016 Wrestling Wrestling Women's Heavyweight, Freestyle China Kazakhstan Canada
5213 2016 Wrestling Wrestling Women's Light-Heavyweight, Freestyle Kazakhstan Russia Japan
5214 2016 Wrestling Wrestling Women's Lightweight, Freestyle India Russia Japan
5215 2016 Wrestling Wrestling Women's Middleweight, Freestyle Kazakhstan Belarus Japan

5216 rows × 6 columns

import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

most_events_per_year_by_athlete = athlete_statistics.value_counts(['Name', 'Country', 'Year']).reset_index()

most_events_per_year_by_athlete.head(15)
Name Country Year count
0 Robert Tait McKenzie Canada 1932 44
1 Alfrd (Arnold-) Hajs (Guttmann-) Hungary 1928 28
2 Miltiades Manno Hungary 1932 27
3 Alfred James Munnings UK 1948 25
4 Wilhelm (William) Hunt Diederich USA 1932 19
5 Acee Blue Eagle USA 1932 18
6 Jean Lucien Nicolas Jacoby Luxembourg 1936 17
7 Stanisaw Noakowski Poland 1928 17
8 Mahonri Mackintosh Young USA 1932 16
9 Pierre De Soete Belgium 1928 16
10 Georges-mile Fauvelle France 1928 16
11 ngel Zrraga Argelles Mexico 1928 16
12 Willis Augustus Lee, Jr. USA 1920 15
13 Charles Zeller Klauder USA 1928 14
14 Georg Vilhelm Lagerstedt Sweden 1936 13
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

most_events_per_year_by_athlete = athlete_statistics.value_counts(['Name', 'Country', 'Year']).reset_index()

athlete_name_most_events = most_events_per_year_by_athlete.head(1)['Name'].values[0]
athlete_name_most_events_year = most_events_per_year_by_athlete.head(1)['Year'].values[0]

athlete_statistics[(athlete_statistics["Name"] == athlete_name_most_events) &
                    (athlete_statistics["Year"] == athlete_name_most_events_year)].head(15)
Name Sex Age Height Weight Country NOC Year Sport Event Medal
32262 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Medals And... Bronze
32263 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32264 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32265 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32266 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32267 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32268 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32269 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32270 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32271 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32272 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32273 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32274 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32275 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
32276 Robert Tait McKenzie M 65.0 NaN NaN Canada CAN 1932 Art Competitions Art Competitions Mixed Sculpturing, Unknown Event NaN
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics.value_counts(['Name', 'Sex', 'Country']).reset_index()
Name Sex Country count
0 Robert Tait McKenzie M Canada 58
1 Heikki Ilmari Savolainen M Finland 39
2 Joseph "Josy" Stoffel M Luxembourg 38
3 Ioannis Theofilakis M Greece 36
4 Takashi Ono M Japan 33
... ... ... ... ...
116737 A. Brun F France 1
116738 A. Buydens M Belgium 1
116739 A. Charles Six M France 1
116740 A. Christory M France 1
116741 A. Darnis M France 1

116742 rows × 4 columns

import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

chosen_athlete = "Simone Arianne Biles"

athlete_statistics[athlete_statistics["Name"] == chosen_athlete]
Name Sex Age Height Weight Country NOC Year Sport Event Medal
203896 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Individual All-Around Gold
203897 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Team All-Around Gold
203898 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Floor Exercise Gold
203899 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Horse Vault Gold
203900 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Uneven Bars NaN
203901 Simone Arianne Biles F 19.0 143.0 47.0 USA USA 2016 Gymnastics Gymnastics Women's Balance Beam Bronze
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

search_string = "Ben"

athlete_statistics[athlete_statistics["Name"].str.contains(search_string)]
Name Sex Age Height Weight Country NOC Year Sport Event Medal
271 Benaissa Abed M 23.0 170.0 51.0 Algeria ALG 1988 Boxing Boxing Men's Flyweight NaN
327 Ben Ali Beghouach M 21.0 186.0 89.0 Algeria ALG 1988 Handball Handball Men's Handball NaN
328 Ben Ali Beghouach M 29.0 186.0 89.0 Algeria ALG 1996 Handball Handball Men's Handball NaN
348 Djillali Ben Brahim M 27.0 178.0 71.0 Algeria ALG 1980 Judo Judo Men's Lightweight NaN
349 Reda Ben Chehima M 26.0 175.0 75.0 Algeria ALG 2004 Fencing Fencing Men's Sabre, Individual NaN
... ... ... ... ... ... ... ... ... ... ... ...
219770 Lidija Benedeti-Lapajne F 25.0 175.0 64.0 Serbia YUG 1984 Athletics Athletics Women's High Jump NaN
219771 Marijan Bene M 25.0 170.0 68.0 Serbia YUG 1976 Boxing Boxing Men's Welterweight NaN
221821 Benson Mulomba M 22.0 177.0 54.0 Zambia ZAM 1972 Athletics Athletics Men's 800 metres NaN
221843 Benny Muziyo M 23.0 NaN NaN Zambia ZAM 2016 Boxing Boxing Men's Middleweight NaN
222158 Gavin Ben Sutherland M 37.0 186.0 78.0 Zimbabwe ZIM 2016 Archery Archery Men's Individual NaN

1617 rows × 11 columns

import pandas as pd

selected_year = 2012

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics[athlete_statistics["Year"] == selected_year][['Sport', 'Event']].drop_duplicates().reset_index(drop=True).head(15)
Sport Event
0 Athletics Athletics Men's 100 metres
1 Taekwondo Taekwondo Men's Welterweight
2 Boxing Boxing Men's Flyweight
3 Judo Judo Men's Half-Lightweight
4 Athletics Athletics Women's 100 metres
5 Taekwondo Taekwondo Men's Featherweight
6 Weightlifting Weightlifting Women's Lightweight
7 Swimming Swimming Women's 100 metres Butterfly
8 Weightlifting Weightlifting Men's Lightweight
9 Athletics Athletics Men's Shot Put
10 Swimming Swimming Men's 100 metres Freestyle
11 Weightlifting Weightlifting Men's Middle-Heavyweight
12 Judo Judo Women's Half-Lightweight
13 Shooting Shooting Men's Air Pistol, 10 metres
14 Shooting Shooting Men's Free Pistol, 50 metres
import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics[athlete_statistics["Year"] == selected_year][['Sport', 'Event']].drop_duplicates().value_counts('Sport').reset_index().head(15)
Sport count
0 Athletics 47
1 Swimming 34
2 Wrestling 18
3 Cycling 18
4 Canoeing 16
5 Weightlifting 15
6 Shooting 15
7 Judo 14
8 Gymnastics 14
9 Rowing 14
10 Boxing 13
11 Sailing 10
12 Fencing 10
13 Taekwondo 8
14 Diving 8

12.2 Useful Lists and Info From Datasets

import pandas as pd

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

all_sports = athlete_statistics["Sport"].drop_duplicates().tolist()

print(all_sports)
['Hockey', 'Football', 'Wrestling', 'Athletics', 'Taekwondo', 'Judo', 'Boxing', 'Weightlifting', 'Shooting', 'Fencing', 'Swimming', 'Sailing', 'Synchronized Swimming', 'Triathlon', 'Equestrianism', 'Cycling', 'Volleyball', 'Handball', 'Rowing', 'Table Tennis', 'Tennis', 'Gymnastics', 'Trampolining', 'Badminton', 'Canoeing', 'Beach Volleyball', 'Basketball', 'Diving', 'Rugby', 'Rugby Sevens', 'Polo', 'Modern Pentathlon', 'Archery', 'Water Polo', 'Art Competitions', 'Golf', 'Figure Skating', 'Softball', 'Baseball', 'Rhythmic Gymnastics', 'Tug-Of-War', 'Ice Hockey', 'Lacrosse', 'Basque Pelota', 'Cricket', 'Croquet', 'Motorboating', 'Racquets', 'Jeu De Paume', 'Alpinism', 'Aeronautics', 'Roque']
import pandas as pd

selected_year = 2012

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics_year = athlete_statistics[athlete_statistics["Year"] == selected_year]

all_events = athlete_statistics_year["Event"].drop_duplicates().tolist()

print(all_events)
["Athletics Men's 100 metres", "Taekwondo Men's Welterweight", "Boxing Men's Flyweight", "Judo Men's Half-Lightweight", "Athletics Women's 100 metres", "Taekwondo Men's Featherweight", "Weightlifting Women's Lightweight", "Swimming Women's 100 metres Butterfly", "Weightlifting Men's Lightweight", "Athletics Men's Shot Put", "Swimming Men's 100 metres Freestyle", "Weightlifting Men's Middle-Heavyweight", "Judo Women's Half-Lightweight", "Shooting Men's Air Pistol, 10 metres", "Shooting Men's Free Pistol, 50 metres", "Boxing Men's Welterweight", "Volleyball Women's Volleyball", "Athletics Men's 5,000 metres", "Athletics Women's Marathon", "Judo Women's Heavyweight", "Athletics Men's 3,000 metres Steeplechase", "Wrestling Men's Lightweight, Greco-Roman", "Boxing Men's Light-Heavyweight", "Weightlifting Men's Heavyweight", "Boxing Men's Heavyweight", "Boxing Men's Lightweight", "Taekwondo Men's Flyweight", "Athletics Men's Marathon", "Boxing Men's Light-Flyweight", "Fencing Women's Foil, Individual", "Cycling Men's Road Race, Individual", "Wrestling Men's Light-Heavyweight, Freestyle", "Athletics Men's 800 metres", "Athletics Men's 1,500 metres", "Fencing Women's Sabre, Individual", "Athletics Men's Triple Jump", "Boxing Men's Bantamweight", "Boxing Men's Middleweight", "Rowing Women's Single Sculls", "Wrestling Men's Welterweight, Greco-Roman", "Swimming Men's 200 metres Butterfly", "Swimming Women's 100 metres Backstroke", "Shooting Men's Trap", "Handball Women's Handball", "Basketball Women's Basketball", "Athletics Women's 800 metres", "Swimming Women's 50 metres Freestyle", "Canoeing Men's Canadian Singles, 200 metres", "Canoeing Men's Canadian Doubles, 1,000 metres", "Judo Women's Middleweight", "Canoeing Men's Canadian Singles, 1,000 metres", "Swimming Men's 400 metres Individual Medley", "Athletics Women's 400 metres", "Athletics Men's 200 metres", "Rowing Women's Coxless Pairs", "Sailing Men's One Person Dinghy", "Shooting Men's Small-Bore Rifle, Prone, 50 metres", "Volleyball Men's Volleyball", "Hockey Women's Hockey", "Swimming Women's 400 metres Individual Medley", "Hockey Men's Hockey", "Tennis Men's Singles", "Wrestling Women's Flyweight, Freestyle", "Swimming Women's 800 metres Freestyle", "Swimming Women's 10 kilometres Open Water", "Sailing Men's Two Person Dinghy", "Basketball Men's Basketball", "Judo Men's Middleweight", "Handball Men's Handball", "Athletics Men's 20 kilometres Walk", "Sailing Women's One Person Dinghy", "Rowing Men's Lightweight Double Sculls", "Athletics Men's Hammer Throw", "Athletics Women's Discus Throw", "Canoeing Men's Kayak Doubles, 200 metres", "Athletics Women's Hammer Throw", 'Tennis Mixed Doubles', "Tennis Women's Doubles", "Rowing Men's Single Sculls", "Beach Volleyball Women's Beach Volleyball", "Swimming Men's 50 metres Freestyle", "Swimming Men's 100 metres Backstroke", "Rowing Women's Lightweight Double Sculls", 'Equestrianism Mixed Jumping, Individual', "Athletics Men's Discus Throw", "Table Tennis Men's Singles", "Taekwondo Women's Flyweight", "Sailing Women's Windsurfer", "Judo Men's Half-Middleweight", "Gymnastics Men's Floor Exercise", "Gymnastics Men's Rings", "Sailing Women's Two Person Dinghy", "Tennis Men's Doubles", "Judo Women's Extra-Lightweight", "Swimming Men's 400 metres Freestyle", "Swimming Men's 1,500 metres Freestyle", "Gymnastics Women's Individual All-Around", "Gymnastics Women's Floor Exercise", "Gymnastics Women's Uneven Bars", "Gymnastics Women's Balance Beam", "Cycling Men's Omnium", "Cycling Men's BMX", "Sailing Men's Windsurfer", "Canoeing Men's Canadian Singles, Slalom", "Rowing Men's Double Sculls", "Synchronized Swimming Women's Duet", "Judo Men's Half-Heavyweight", "Cycling Men's Mountainbike, Cross-Country", "Triathlon Men's Olympic Distance", "Athletics Men's Javelin Throw", "Wrestling Men's Heavyweight, Greco-Roman", "Weightlifting Women's Light-Heavyweight", "Gymnastics Men's Individual All-Around", "Gymnastics Men's Horse Vault", "Gymnastics Men's Parallel Bars", "Gymnastics Men's Horizontal Bar", "Gymnastics Men's Pommelled Horse", "Judo Men's Extra-Lightweight", "Athletics Women's Javelin Throw", "Wrestling Men's Featherweight, Freestyle", "Wrestling Men's Middleweight, Greco-Roman", "Weightlifting Men's Light-Heavyweight", "Weightlifting Women's Super-Heavyweight", "Athletics Men's Long Jump", "Wrestling Men's Super-Heavyweight, Greco-Roman", "Wrestling Men's Welterweight, Freestyle", "Weightlifting Men's Super-Heavyweight", "Swimming Women's 100 metres Freestyle", "Athletics Men's 50 kilometres Walk", "Athletics Men's 4 x 100 metres Relay", "Synchronized Swimming Women's Team", "Judo Men's Heavyweight", "Shooting Women's Air Pistol, 10 metres", "Swimming Women's 4 x 200 metres Freestyle Relay", "Shooting Women's Trap", "Archery Women's Individual", "Swimming Women's 200 metres Freestyle", "Swimming Women's 400 metres Freestyle", "Shooting Men's Skeet", "Water Polo Women's Water Polo", "Water Polo Men's Water Polo", "Rowing Men's Lightweight Coxless Fours", "Cycling Men's Team Pursuit, 4,000 metres", "Athletics Women's 400 metres Hurdles", "Gymnastics Women's Team All-Around", "Rowing Men's Coxed Eights", "Athletics Women's Pole Vault", "Canoeing Women's Kayak Fours, 500 metres", "Diving Women's Platform", "Cycling Women's BMX", "Rowing Men's Coxless Pairs", "Athletics Women's 1,500 metres", "Diving Women's Synchronized Platform", 'Equestrianism Mixed Three-Day Event, Individual', 'Equestrianism Mixed Three-Day Event, Team', "Swimming Women's 4 x 100 metres Freestyle Relay", "Sailing Men's One Person Heavyweight Dinghy", "Shooting Men's Rapid-Fire Pistol, 25 metres", "Shooting Women's Sporting Pistol, 25 metres", "Rowing Men's Coxless Fours", "Rowing Women's Coxed Eights", "Badminton Women's Doubles", "Canoeing Men's Kayak Fours, 1,000 metres", "Athletics Men's 400 metres Hurdles", "Athletics Men's 4 x 400 metres Relay", "Diving Men's Platform", "Swimming Women's 200 metres Individual Medley", "Swimming Women's 4 x 100 metres Medley Relay", "Rowing Women's Double Sculls", "Sailing Women's Three Person Keelboat", "Triathlon Women's Olympic Distance", "Swimming Men's 4 x 100 metres Freestyle Relay", "Swimming Men's 4 x 100 metres Medley Relay", "Canoeing Men's Kayak Singles, Slalom", "Cycling Women's Team Pursuit", "Cycling Women's Omnium", "Modern Pentathlon Women's Individual", "Rowing Women's Quadruple Sculls", "Modern Pentathlon Men's Individual", "Boxing Women's Middleweight", "Canoeing Women's Kayak Doubles, 500 metres", "Rowing Men's Quadruple Sculls", "Swimming Women's 200 metres Breaststroke", "Canoeing Women's Kayak Singles, Slalom", "Table Tennis Men's Team", "Swimming Men's 200 metres Freestyle", "Swimming Men's 4 x 200 metres Freestyle Relay", "Trampolining Men's Individual", "Cycling Women's Road Race, Individual", "Cycling Women's Individual Time Trial", "Cycling Men's Team Sprint", "Shooting Men's Air Rifle, 10 metres", "Swimming Men's 100 metres Butterfly", "Swimming Men's 200 metres Individual Medley", "Swimming Women's 200 metres Butterfly", 'Equestrianism Mixed Dressage, Individual', 'Equestrianism Mixed Dressage, Team', 'Equestrianism Mixed Jumping, Team', "Cycling Women's Mountainbike, Cross-Country", "Swimming Women's 200 metres Backstroke", "Athletics Men's Pole Vault", "Boxing Men's Light-Welterweight", "Swimming Men's 10 kilometres Open Water", "Canoeing Men's Canadian Doubles, Slalom", "Sailing Men's Skiff", "Swimming Women's 100 metres Breaststroke", "Athletics Women's 3,000 metres Steeplechase", "Athletics Women's 20 kilometres Walk", "Swimming Men's 200 metres Backstroke", "Table Tennis Women's Singles", "Table Tennis Women's Team", "Weightlifting Women's Middleweight", "Boxing Men's Super-Heavyweight", "Shooting Women's Skeet", "Shooting Men's Double Trap", "Taekwondo Women's Welterweight", "Cycling Women's Team Sprint", "Athletics Women's 100 metres Hurdles", "Cycling Women's Sprint", "Cycling Women's Keirin", "Rhythmic Gymnastics Women's Individual", "Badminton Women's Singles", "Canoeing Women's Kayak Singles, 200 metres", "Canoeing Women's Kayak Singles, 500 metres", "Diving Women's Springboard", "Cycling Men's Sprint", "Cycling Men's Keirin", "Judo Women's Lightweight", "Swimming Men's 100 metres Breaststroke", "Swimming Men's 200 metres Breaststroke", "Cycling Men's Individual Time Trial", "Shooting Men's Small-Bore Rifle, Three Positions, 50 metres", "Shooting Women's Air Rifle, 10 metres", "Diving Women's Synchronized Springboard", "Canoeing Men's Kayak Doubles, 1,000 metres", "Badminton Men's Doubles", "Athletics Men's 400 metres", "Athletics Men's 10,000 metres", "Canoeing Men's Kayak Singles, 200 metres", "Canoeing Men's Kayak Singles, 1,000 metres", "Tennis Women's Singles", "Wrestling Men's Lightweight, Freestyle", "Shooting Women's Small-Bore Rifle, Three Positions, 50 metres", "Diving Men's Springboard", "Athletics Women's 5,000 metres", "Athletics Women's 10,000 metres", "Archery Men's Individual", "Athletics Women's Heptathlon", "Beach Volleyball Men's Beach Volleyball", "Judo Women's Half-Middleweight", "Wrestling Men's Light-Heavyweight, Greco-Roman", "Badminton Men's Singles", "Fencing Men's Foil, Individual", "Wrestling Men's Featherweight, Greco-Roman", "Wrestling Men's Middleweight, Freestyle", "Wrestling Men's Super-Heavyweight, Freestyle", "Judo Men's Lightweight", "Wrestling Men's Heavyweight, Freestyle", "Wrestling Women's Lightweight, Freestyle", "Weightlifting Men's Bantamweight", "Athletics Women's 4 x 100 metres Relay", "Athletics Men's High Jump", "Athletics Women's 200 metres", "Athletics Men's 110 metres Hurdles", "Athletics Women's Long Jump", "Judo Women's Half-Heavyweight", "Athletics Women's Triple Jump", "Athletics Women's High Jump", "Athletics Men's Decathlon", "Football Men's Football", "Fencing Men's Sabre, Individual", "Fencing Men's Sabre, Team", "Rhythmic Gymnastics Women's Group", "Athletics Women's 4 x 400 metres Relay", "Athletics Women's Shot Put", "Weightlifting Women's Heavyweight", "Gymnastics Women's Horse Vault", "Wrestling Women's Heavyweight, Freestyle", "Trampolining Women's Individual", "Football Women's Football", "Boxing Women's Lightweight", "Taekwondo Women's Heavyweight", "Boxing Women's Flyweight", "Sailing Men's Two Person Keelboat", "Fencing Men's epee, Individual", "Wrestling Women's Middleweight, Freestyle", "Taekwondo Men's Heavyweight", "Diving Men's Synchronized Springboard", 'Badminton Mixed Doubles', "Fencing Women's epee, Individual", "Taekwondo Women's Featherweight", "Diving Men's Synchronized Platform", "Gymnastics Men's Team All-Around", "Archery Women's Team", "Archery Men's Team", "Fencing Men's Foil, Team", "Fencing Women's epee, Team", "Weightlifting Men's Middleweight", "Weightlifting Women's Flyweight", "Weightlifting Men's Featherweight", "Weightlifting Women's Featherweight", "Fencing Women's Foil, Team"]
import pandas as pd

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

earliest_year = medals_per_country_per_year["Year"].min()

latest_year = medals_per_country_per_year["Year"].max()

print(f"The earliest year in the dataset is {earliest_year}")

print(f"The latest year in the dataset is {latest_year}")
The earliest year in the dataset is 1896
The latest year in the dataset is 2016
import pandas as pd

selected_year = 2012
selected_sport = "Gymnastics"

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

athlete_statistics_filtered = athlete_statistics[(athlete_statistics["Year"] == selected_year) & (athlete_statistics["Sport"] == selected_sport)]

youngest_athlete = athlete_statistics_filtered["Age"].min()

oldest_athlete = athlete_statistics_filtered["Age"].max()

print(f"The youngest athlete who competed in {selected_sport} in {selected_year} was {youngest_athlete:.0f} years old.")

print(f"The oldest athlete who competed in {selected_sport} in {selected_year} was{oldest_athlete:.0f} years old.")
The youngest athlete who competed in Gymnastics in 2012 was 15 years old.
The oldest athlete who competed in Gymnastics in 2012 was39 years old.

12.3 Plots

import pandas as pd
import plotly.express as px

selected_countries = ["UK", "USA", "Canada", "Australia", "China", "Russia", "France", "Germany", "Italy"]

selected_medal_type = "Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

px.line(medals_per_country_per_year[medals_per_country_per_year["Country"].isin(selected_countries)],
        x="Year", y=selected_medal_type, color="Country",
        title=f"{selected_medal_type} medals over time - subset of countries")
import pandas as pd
import plotly.express as px

chosen_country = "UK"
selected_medal_type = "Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

px.line(medals_per_country_per_year[medals_per_country_per_year["Country"] == chosen_country],
        y=selected_medal_type, x="Year",
        title=f"{selected_medal_type} medals for {chosen_country} over time")
import pandas as pd
import plotly.express as px

chosen_country = "UK"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

medals_per_country_per_year_long = medals_per_country_per_year.melt(id_vars=["Year", "Country", "NOC"])

px.line(medals_per_country_per_year_long[medals_per_country_per_year_long["Country"] == chosen_country],
        y="value", x="Year", color="variable",
        color_discrete_sequence=["orange", "silver", "gold", "blue"],
        title=f"Medals Won over Time - {chosen_country}")
import pandas as pd
import plotly.express as px

rows_to_display = 10

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

px.bar(total_medals_all_time.head(rows_to_display), x='Country', y='Total',
       title=f"Total Number of Medals since 1896 - Top {rows_to_display} Countries")
import pandas as pd
import plotly.express as px

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

total_medals_all_time_long = total_medals_all_time.drop(columns='Total').melt(id_vars="Country")

px.bar(total_medals_all_time_long,
        x="Country", y="value", color="variable",
        color_discrete_sequence=["orange", "silver", "gold"]
        )
import pandas as pd
import plotly.express as px

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

total_medals_all_time = (
    medals_per_country_per_year.drop(columns=["Year", "NOC"])
    .groupby('Country').sum()
    .reset_index()
    .sort_values('Total', ascending=False)
    )

total_medals_all_time_long = total_medals_all_time.drop(columns='Total').melt(id_vars="Country")

total_medals_all_time_long['Total'] = total_medals_all_time_long['value'].groupby(total_medals_all_time_long['Country']).transform('sum')

total_medals_all_time_long['Percentage of Total'] = total_medals_all_time_long['value'] / total_medals_all_time_long['Total']

px.bar(total_medals_all_time_long[total_medals_all_time_long['Country'].isin(total_medals_all_time_long.head(rows_to_display)['Country'].values)],
       x="Country", y="Percentage of Total", color="variable",
       color_discrete_sequence=["orange", "silver", "gold"],
       title=f"Breakdown of Medals by type - Top {rows_to_display} countries by total all-time medal count")
import pandas as pd
import plotly.express as px

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

distinct_athletes_per_year = athlete_statistics[['Name', 'Year','Country','Sex', 'Age', 'Height', 'Weight']].drop_duplicates()

athlete_counts_by_sex_by_year = distinct_athletes_per_year.value_counts(['Sex', 'Year']).reset_index()

athlete_counts_by_sex_by_year['total_athletes_both_sex_in_year'] = athlete_counts_by_sex_by_year['count'].groupby(athlete_counts_by_sex_by_year['Year']).transform('sum')

athlete_counts_by_sex_by_year['Percentage of Athletes'] = athlete_counts_by_sex_by_year['count'] / athlete_counts_by_sex_by_year['total_athletes_both_sex_in_year']

px.bar(athlete_counts_by_sex_by_year,
       x="Year", y="Percentage of Athletes", color="Sex",
       title=f"Change in Gender Split Over Time")
import pandas as pd
import plotly.express as px

athlete_statistics = pd.read_csv("athlete_details_eventwise.csv")

selected_sport = "Gymnastics"

athlete_statistics_sport = athlete_statistics[athlete_statistics["Sport"] == selected_sport]

events_per_year_by_sport = (
    athlete_statistics_sport[['Year', 'Event']]
    .drop_duplicates()
    .value_counts(['Year'])
    .reset_index()
)

px.line(events_per_year_by_sport.sort_values("Year"), x="Year", y="count",
            markers=True, title=f"{selected_sport} Events Per Olympics",
            range_x=[athlete_statistics.Year.min(), athlete_statistics.Year.max()])

12.4 Maps

import pandas as pd
import geopandas
import matplotlib.pyplot as plt

selected_year = 2012
selected_medal_type="Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

country_outlines = geopandas.read_file("countries_outlines.geojson")

medals_per_country_per_year_gdf = pd.concat([
    pd.merge(country_outlines, medals_per_country_per_year, left_on="id", right_on="NOC", how="inner"),
    pd.merge(country_outlines, medals_per_country_per_year, left_on="name", right_on="Country", how="inner")
]).drop_duplicates()

fig, ax = plt.subplots(figsize=(15, 8))
ax = medals_per_country_per_year_gdf[medals_per_country_per_year_gdf["Year"] == selected_year].plot(
    "Gold", legend=True, ax=ax
    )
ax.axis('off')
plt.title(f"Number of {selected_medal_type} Medals in {selected_year}")
Text(0.5, 1.0, 'Number of Bronze Medals in 2012')

import pandas as pd
import geopandas
import folium

selected_year = 2012
selected_medal_type="Bronze"

medals_per_country_per_year = pd.read_csv("medals_per_country_per_year.csv")

country_outlines = geopandas.read_file("countries_outlines.geojson")

medals_per_country_per_year_gdf = pd.concat([
    pd.merge(country_outlines, medals_per_country_per_year, left_on="id", right_on="NOC", how="inner"),
    pd.merge(country_outlines, medals_per_country_per_year, left_on="name", right_on="Country", how="inner")
]).drop_duplicates()

#create base map
world_map_medals = folium.Map(
    location=[50.71671, -3.50668],
    zoom_start=2,
    tiles='cartodbpositron'
    )

# create and add choropleth map
choropleth = folium.Choropleth(
    geo_data=medals_per_country_per_year_gdf[medals_per_country_per_year_gdf["Year"] == selected_year], # dataframe with geometry in it
    data=medals_per_country_per_year_gdf[medals_per_country_per_year_gdf["Year"] == selected_year], # dataframe with data in - may be the same dataframe or a different one
    columns=['name', 'Gold'], # [key (field for geometry), field to plot]
    key_on='feature.properties.Country'
    )

choropleth = choropleth.add_to(world_map_medals)

choropleth = choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(
        ['name', 'Gold'],
        labels=True
        )
)

world_map_medals
Make this Notebook Trusted to load map: File -> Trust Notebook