In this lesson you will implement linked selections in your visualization. Allowing a selection made on one plot to be reflected on others. To see how this works, the next visualization will contain two scatter plots: one that shows the 76ers’ two-point versus three-point field goal percentage and the other showing the 76ers’ team points versus opponent points on a game-by-game basis.
The goal is to be able to select data points on the left-side scatter plot and quickly be able to recognize if the corresponding datapoint on the right scatter plot is a win or loss.
You will first edit the file read_nba_data.py
to create a very similar DataFrame to that from the last example.
For additional details on linking plots can be found at Linking Plots in the Bokeh User Guide.
File: read_nba_data.py
import pandas as pd
# Read the csv files
player_stats = pd.read_csv('data/2017-18_playerBoxScore.csv',
parse_dates=['gmDate'])
team_stats = pd.read_csv('data/2017-18_teamBoxScore.csv',
parse_dates=['gmDate'])
standings = pd.read_csv('data/2017-18_standings.csv',
parse_dates=['stDate'])
# Create west_top_2
west_top_2 = (standings[(standings['teamAbbr'] == 'HOU') |
(standings['teamAbbr'] == 'GS')]
.loc[:, ['stDate', 'teamAbbr', 'gameWon']]
.sort_values(['teamAbbr', 'stDate']))
# Find players who took at least 1 three-point shot during the season
three_takers = player_stats[player_stats['play3PA'] > 0]
# Clean up the player names, placing them in a single column
three_takers['name'] = [f'{p["playFNm"]} {p["playLNm"]}'
for _, p in three_takers.iterrows()]
# Aggregate the total three-point attempts and makes for each player
three_takers = (three_takers.groupby('name')
.sum()
.loc[:,['play3PA', 'play3PM']]
.sort_values('play3PA', ascending=False))
# Filter out anyone who didn't take at least 100 three-point shots
three_takers = three_takers[three_takers['play3PA'] >= 100].reset_index()
# Add a column with a calculated three-point percentage (made/attempted)
three_takers['pct3PM'] = three_takers['play3PM'] / three_takers['play3PA']
# Philadelphia 76ers data isolated
phi_gm_stats = (team_stats[(team_stats['teamAbbr'] == 'PHI') &
(team_stats['seasTyp'] == 'Regular')]
.loc[:, ['gmDate',
'teamPTS',
'teamTRB',
'teamAST',
'teamTO',
'opptPTS',]]
.sort_values('gmDate'))
# Add game number
phi_gm_stats['game_num'] = range(1, len(phi_gm_stats)+1)
# Derive a win_loss column
win_loss = []
for _, row in phi_gm_stats.iterrows():
# If the 76ers score more poins, its a win
if row['teamPTS'] > row['opptPTS']:
win_loss.append('W')
else:
win_loss.append('L')
# Add the win_loss data to the DataFrame
phi_gm_stats['winLoss'] = win_loss
# Isolate relevant data for 76er Scatter Plots
phi_gm_stats_2 = (team_stats[(team_stats['teamAbbr'] == 'PHI') &
(team_stats['seasTyp'] == 'Regular')]
.loc[:, ['gmDate',
'team2P%',
'team3P%',
'teamPTS',
'opptPTS']]
.sort_values('gmDate'))
# Add game number
phi_gm_stats_2['game_num'] = range(1, len(phi_gm_stats_2) + 1)
# Derive a win_loss column
win_loss = []
for _, row in phi_gm_stats_2.iterrows():
# If the 76ers score more points, it's a win
if row['teamPTS'] > row['opptPTS']:
win_loss.append('W')
else:
win_loss.append('L')
# Add the win_loss data to the DataFrame
phi_gm_stats_2['winLoss'] = win_loss
File: LinkSelection.py
# Bokeh Libraries
from bokeh.plotting import figure, show
from bokeh.io import output_file
from bokeh.models import ColumnDataSource, CategoricalColorMapper, NumeralTickFormatter
from bokeh.layouts import gridplot
# Load in Data
from read_nba_data import phi_gm_stats_2
# Out to file
output_file('phi_gm_linked_selections.html',
title='76ers Percentages vs. Win-Loss')
# Store the data in a ColumnDataSource
gm_stats_cds = ColumnDataSource(phi_gm_stats_2)
# Create a CategoricalColorMapper that assigns a color to wins and losses
win_loss_mapper = CategoricalColorMapper(factors = ['W', 'L'],
palette=['green', 'red'])
# Specify the tools
toolList = ['lasso_select', 'tap', 'reset', 'save']
# Create a figure relating the percentages
pctFig = figure(title='2PT FG % vs 3PT FG %, 2017-18 Regular Season',
plot_height=400, plot_width=400, tools=toolList,
x_axis_label='2PT FG%', y_axis_label='3PT FG%')
# Draw with circle markers
pctFig.circle(x='team2P%', y='team3P%', source=gm_stats_cds,
size=12, color='black')
# Format the y-axis and x-axis tick labels as percentages
pctFig.xaxis[0].formatter = NumeralTickFormatter(format='00.0%')
pctFig.yaxis[0].formatter = NumeralTickFormatter(format='00.0%')
# Create a figure relating the totals
totFig = figure(title='Team Points vs Opponent Points, 2017-18 Regular Season',
plot_height=400, plot_width=400, tools=toolList,
x_axis_label='Team Points', y_axis_label='Opponent Points')
# Draw with square markers
totFig.square(x='teamPTS', y='opptPTS', source=gm_stats_cds, size=10,
color=dict(field='winLoss', transform=win_loss_mapper))
# Create layout
grid = gridplot([[pctFig, totFig]])
# Visualize
show(grid)