Capstone - Complete Analytics System
Pressing has become one of the most important tactical concepts in modern football. High-intensity pressing disrupts opponent build-up, creates turnovers in dangerous areas, and sets the tempo for possession-dominant styles. This chapter explores how to measure, analyze, and optimize pressing performance.
Learning Objectives
- Quantify pressing intensity and effectiveness
- Measure PPDA, pressure success rate, and counterpressing
- Analyze pressing triggers and team coordination
- Evaluate individual pressing contributions
- Build pressing style profiles for teams
Pressing Fundamentals
Pressing is the act of applying pressure to opponents when they have the ball, with the goal of winning possession or forcing errors. We can measure pressing through several key metrics.
PPDA
Passes Per Defensive Action
Opponent passes allowed per defensive action (tackle, interception, foul). Lower = more aggressive press.
League avg: 10-12
Pressure Events
Raw pressure count
Total number of pressing actions per match. Includes successful and unsuccessful pressures.
High press teams: 180+
High Press %
Final third pressures
Percentage of pressures occurring in opponent's defensive third. Higher = more aggressive positioning.
Aggressive: >40%
import pandas as pd
import numpy as np
from statsbombpy import sb
def calculate_ppda(events, team_name):
"""
Calculate Passes Per Defensive Action (PPDA).
Lower PPDA = more aggressive pressing.
"""
# Opponent passes in their defensive/middle third
opponent_passes = events[
(events['team'] != team_name) &
(events['type'] == 'Pass')
].copy()
# Extract x location
opponent_passes['x'] = opponent_passes['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else 0
)
opponent_passes_count = len(opponent_passes[opponent_passes['x'] < 60])
# Defensive actions in opponent's half
defensive_actions = events[
(events['team'] == team_name) &
(events['type'].isin(['Pressure', 'Duel', 'Interception', 'Foul Committed']))
].copy()
defensive_actions['x'] = defensive_actions['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else 0
)
defensive_actions_count = len(defensive_actions[defensive_actions['x'] > 60])
ppda = opponent_passes_count / defensive_actions_count if defensive_actions_count > 0 else np.nan
return ppda
# Load match data
events = sb.events(match_id=3773585)
# Calculate PPDA
liverpool_ppda = calculate_ppda(events, 'Liverpool')
print(f"Liverpool PPDA: {liverpool_ppda:.2f}")
def calculate_pressing_metrics(events, team_name):
"""
Calculate comprehensive pressing metrics for a team.
"""
team_events = events[events['team'] == team_name]
# Get all pressures
pressures = team_events[team_events['type'] == 'Pressure'].copy()
if len(pressures) == 0:
return None
# Extract locations
pressures['x'] = pressures['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else 60
)
# Calculate match minutes
match_minutes = events['minute'].max()
metrics = {
'total_pressures': len(pressures),
'pressures_per_90': len(pressures) / (match_minutes / 90),
# Pressure zones
'high_pressures': len(pressures[pressures['x'] > 80]),
'high_press_pct': (pressures['x'] > 80).mean() * 100,
'mid_pressures': len(pressures[(pressures['x'] >= 40) & (pressures['x'] <= 80)]),
'mid_press_pct': ((pressures['x'] >= 40) & (pressures['x'] <= 80)).mean() * 100,
'low_pressures': len(pressures[pressures['x'] < 40]),
'low_press_pct': (pressures['x'] < 40).mean() * 100,
# Pressure outcomes (if available)
'pressure_regains': len(pressures[pressures.get('counterpress', False) == True])
}
return metrics
liverpool_metrics = calculate_pressing_metrics(events, 'Liverpool')
print(pd.Series(liverpool_metrics))library(tidyverse)
library(StatsBombR)
# Calculate PPDA (Passes Per Defensive Action)
calculate_ppda <- function(events, team_name) {
# Get opponent passes in their own defensive third + middle third
opponent_passes <- events %>%
filter(
team.name != team_name,
type.name == "Pass",
location.x < 60 # Own half + some of middle third
) %>%
nrow()
# Get defensive actions by pressing team in same zone
defensive_actions <- events %>%
filter(
team.name == team_name,
type.name %in% c("Pressure", "Duel", "Interception", "Foul Committed"),
location.x > 60 # Opponent's half
) %>%
nrow()
ppda <- ifelse(defensive_actions > 0, opponent_passes / defensive_actions, NA)
return(ppda)
}
# Load match data
match_events <- get.matchFree(match_id)
# Calculate PPDA for both teams
home_ppda <- calculate_ppda(match_events, "Liverpool")
away_ppda <- calculate_ppda(match_events, "Manchester City")
cat("Liverpool PPDA:", round(home_ppda, 2), "\n")
cat("Manchester City PPDA:", round(away_ppda, 2), "\n")
# Calculate pressing metrics for a season
calculate_season_pressing <- function(all_events, team_name) {
team_events <- all_events %>%
filter(team.name == team_name)
opponent_events <- all_events %>%
filter(team.name != team_name)
# Total pressures
pressures <- team_events %>%
filter(type.name == "Pressure")
# Pressure metrics
metrics <- list(
total_pressures = nrow(pressures),
pressures_per_90 = nrow(pressures) / (sum(team_events$duration, na.rm = TRUE) / 5400) * 90,
# High press (final third)
high_pressures = sum(pressures$location.x > 80),
high_press_pct = mean(pressures$location.x > 80) * 100,
# Middle press
mid_pressures = sum(pressures$location.x >= 40 & pressures$location.x <= 80),
mid_press_pct = mean(pressures$location.x >= 40 & pressures$location.x <= 80) * 100,
# Low press
low_pressures = sum(pressures$location.x < 40),
low_press_pct = mean(pressures$location.x < 40) * 100,
# Pressure success rate
pressure_success_rate = mean(
pressures$counterpress == TRUE |
lead(team_events$possession_team.name) == team_name,
na.rm = TRUE
) * 100
)
return(as_tibble(metrics))
}
# Calculate for team
liverpool_pressing <- calculate_season_pressing(season_events, "Liverpool")
print(liverpool_pressing)Counterpressing (Gegenpressing)
Counterpressing refers to the immediate attempt to win the ball back after losing possession. It's a critical component of modern pressing systems, particularly associated with Jurgen Klopp and Pep Guardiola.
def analyze_counterpressing(events, team_name):
"""
Analyze counterpressing effectiveness.
"""
team_events = events[events['team'] == team_name].copy()
# Identify possession losses
# (unsuccessful passes, failed dribbles, shots not scored)
possession_losses = team_events[
(team_events['type'] == 'Pass') &
(team_events['pass_outcome'].notna()) |
(team_events['type'] == 'Dribble') &
(team_events['dribble_outcome'] != 'Complete') |
(team_events['type'] == 'Shot') &
(~team_events['shot_outcome'].isin(['Goal']))
].copy()
# Get pressures marked as counterpresses
counterpresses = team_events[
(team_events['type'] == 'Pressure') &
(team_events['counterpress'] == True)
].copy()
if len(possession_losses) == 0:
return None
# Extract locations
counterpresses['x'] = counterpresses['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else 60
)
metrics = {
'total_possession_losses': len(possession_losses),
'counterpresses': len(counterpresses),
'counterpress_rate': len(counterpresses) / len(possession_losses) * 100 if len(possession_losses) > 0 else 0,
# Zone breakdown
'high_zone_counterpresses': len(counterpresses[counterpresses['x'] > 80]),
'mid_zone_counterpresses': len(counterpresses[(counterpresses['x'] >= 40) & (counterpresses['x'] <= 80)]),
# Calculate regain success if data available
'counterpress_success_rate': (counterpresses.get('counterpress_successful', pd.Series([True]*len(counterpresses))).mean() * 100)
}
return metrics
counterpress_stats = analyze_counterpressing(events, 'Liverpool')
print(pd.Series(counterpress_stats))
def visualize_counterpresses(events, team_name):
"""
Create heatmap of counterpress locations.
"""
from mplsoccer import Pitch
counterpresses = events[
(events['team'] == team_name) &
(events['counterpress'] == True)
].copy()
counterpresses['x'] = counterpresses['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else 60
)
counterpresses['y'] = counterpresses['location'].apply(
lambda loc: loc[1] if isinstance(loc, list) else 40
)
pitch = Pitch(pitch_type='statsbomb', line_color='white', pitch_color='#1a1a1a')
fig, ax = pitch.draw(figsize=(12, 8))
pitch.kdeplot(counterpresses['x'], counterpresses['y'],
ax=ax, cmap='Reds', fill=True, levels=50, alpha=0.7)
ax.set_title(f'{team_name} Counterpress Locations', fontsize=14, color='white')
plt.tight_layout()
plt.show()
visualize_counterpresses(events, 'Liverpool')# Analyze counterpressing effectiveness
analyze_counterpressing <- function(events, team_name) {
# Identify possession losses
possession_losses <- events %>%
filter(team.name == team_name) %>%
filter(
type.name %in% c("Pass", "Dribble", "Shot") &
(
!is.na(pass.outcome.name) |
!is.na(dribble.outcome.name) |
shot.outcome.name %in% c("Saved", "Blocked", "Off T")
)
) %>%
mutate(lost_possession = TRUE)
# Find counterpresses (pressure within 5 seconds of loss)
counterpress_windows <- possession_losses %>%
select(match_id, loss_index = index, loss_time = timestamp,
loss_x = location.x, loss_y = location.y)
# Join with subsequent pressures
counterpresses <- events %>%
filter(team.name == team_name, type.name == "Pressure") %>%
inner_join(counterpress_windows, by = "match_id") %>%
mutate(
time_diff = as.numeric(difftime(timestamp, loss_time, units = "secs"))
) %>%
filter(time_diff > 0, time_diff <= 5) %>%
group_by(loss_index) %>%
slice_min(time_diff) %>%
ungroup()
# Calculate metrics
counterpress_metrics <- list(
total_losses = nrow(possession_losses),
counterpresses_attempted = nrow(counterpresses),
counterpress_rate = nrow(counterpresses) / nrow(possession_losses) * 100,
# Regain rate
successful_regains = counterpresses %>%
filter(counterpress == TRUE) %>%
nrow(),
regain_rate = counterpresses %>%
summarise(rate = mean(counterpress == TRUE, na.rm = TRUE) * 100) %>%
pull(rate),
# Average time to counterpress
avg_reaction_time = mean(counterpresses$time_diff),
# Zone analysis
high_zone_counterpresses = sum(counterpresses$location.x > 80),
mid_zone_counterpresses = sum(counterpresses$location.x >= 40 &
counterpresses$location.x <= 80)
)
return(counterpress_metrics)
}
# Calculate counterpressing metrics
counterpress_stats <- analyze_counterpressing(match_events, "Liverpool")
# Visualize counterpress locations
counterpress_map <- match_events %>%
filter(team.name == "Liverpool", counterpress == TRUE)
ggplot(counterpress_map) +
annotate_pitch(dimensions = pitch_statsbomb) +
geom_point(aes(x = location.x, y = location.y),
color = "red", size = 3, alpha = 0.6) +
labs(title = "Liverpool Counterpress Locations") +
theme_pitch() +
coord_flip()Pressing Triggers
Elite pressing teams don't press randomly - they identify specific "triggers" that signal good opportunities to win the ball. Common triggers include backward passes, poor first touches, and balls played to isolated players.
def identify_pressing_triggers(events, team_name):
"""
Identify what triggers pressing actions.
"""
team_pressures = events[
(events['team'] == team_name) &
(events['type'] == 'Pressure')
].copy()
# Get previous event for each pressure
events_sorted = events.sort_values(['match_id', 'index']).reset_index(drop=True)
triggers = []
for _, pressure in team_pressures.iterrows():
# Find previous event
prev_events = events_sorted[
(events_sorted['match_id'] == pressure['match_id']) &
(events_sorted['index'] < pressure['index'])
]
if len(prev_events) == 0:
triggers.append({'trigger': 'Game Start'})
continue
prev_event = prev_events.iloc[-1]
# Categorize trigger
if prev_event['type'] == 'Pass':
prev_x = prev_event['location'][0] if isinstance(prev_event['location'], list) else 60
press_x = pressure['location'][0] if isinstance(pressure['location'], list) else 60
prev_y = prev_event['location'][1] if isinstance(prev_event['location'], list) else 40
if (120 - prev_x) > (120 - press_x):
trigger = 'Backward Pass'
elif prev_y < 20 or prev_y > 60:
trigger = 'Ball to Wide Player'
elif prev_event.get('pass_type') == 'Goal Kick':
trigger = 'Goal Kick'
else:
trigger = 'Standard Pass'
elif prev_event['type'] == 'Ball Receipt':
trigger = 'First Touch'
elif prev_event['type'] == 'Dribble':
trigger = 'Dribble Attempt'
else:
trigger = 'Other'
triggers.append({
'trigger': trigger,
'success': pressure.get('counterpress', False)
})
triggers_df = pd.DataFrame(triggers)
# Aggregate
trigger_summary = (
triggers_df
.groupby('trigger')
.agg({
'trigger': 'count',
'success': 'mean'
})
.rename(columns={'trigger': 'count', 'success': 'success_rate'})
.reset_index()
)
trigger_summary['success_rate'] = trigger_summary['success_rate'] * 100
trigger_summary['pct'] = trigger_summary['count'] / trigger_summary['count'].sum() * 100
return trigger_summary.sort_values('count', ascending=False)
triggers = identify_pressing_triggers(events, 'Liverpool')
print(triggers)
def plot_trigger_analysis(triggers):
"""
Visualize pressing triggers and their effectiveness.
"""
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.barh(triggers['trigger'], triggers['count'],
color=plt.cm.RdYlGn(triggers['success_rate'] / 100))
# Add success rate labels
for bar, rate in zip(bars, triggers['success_rate']):
ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
f'{rate:.0f}%', va='center', fontsize=9)
ax.set_xlabel('Count')
ax.set_title('Pressing Triggers Analysis\n(Color indicates success rate)')
plt.tight_layout()
plt.show()
plot_trigger_analysis(triggers)# Identify pressing triggers
identify_pressing_triggers <- function(events, team_name) {
# Get all pressures with context
pressures <- events %>%
filter(team.name == team_name, type.name == "Pressure")
# Get the event that triggered the press (previous event)
pressure_context <- pressures %>%
mutate(prev_index = index - 1) %>%
left_join(
events %>% select(index, prev_type = type.name, prev_team = team.name,
prev_x = location.x, prev_y = location.y,
pass_type = pass.type.name,
pass_body_part = pass.body_part.name),
by = c("prev_index" = "index")
)
# Categorize triggers
trigger_analysis <- pressure_context %>%
mutate(
trigger = case_when(
# Backward pass
prev_type == "Pass" &
(120 - prev_x) > (120 - location.x) ~ "Backward Pass",
# Ball to wide areas
prev_type == "Pass" &
(prev_y < 20 | prev_y > 60) ~ "Ball to Wide Player",
# Ball to isolated player (no support)
prev_type == "Pass" ~ "Standard Pass",
# Goalkeeper distribution
prev_type == "Pass" &
pass_type == "Goal Kick" ~ "Goal Kick",
# Poor first touch
prev_type == "Ball Receipt*" ~ "First Touch",
# Dribble attempt
prev_type == "Dribble" ~ "Dribble Attempt",
TRUE ~ "Other"
)
) %>%
group_by(trigger) %>%
summarise(
count = n(),
pct = n() / nrow(pressures) * 100,
success_rate = mean(counterpress == TRUE, na.rm = TRUE) * 100
) %>%
arrange(desc(count))
return(trigger_analysis)
}
triggers <- identify_pressing_triggers(match_events, "Liverpool")
print(triggers)
# Visualize trigger effectiveness
ggplot(triggers, aes(x = reorder(trigger, count), y = count)) +
geom_bar(stat = "identity", aes(fill = success_rate)) +
geom_text(aes(label = paste0(round(success_rate, 0), "%")),
hjust = -0.1, size = 3) +
scale_fill_gradient(low = "coral", high = "forestgreen",
name = "Success Rate") +
coord_flip() +
labs(
title = "Pressing Triggers Analysis",
subtitle = "Frequency and success rate by trigger type",
x = "Trigger Type", y = "Count"
) +
theme_minimal()
# Analyze pressing by opponent player position
press_by_position <- events %>%
filter(team.name == "Liverpool", type.name == "Pressure") %>%
left_join(
events %>%
filter(team.name != "Liverpool") %>%
select(index, target_position = position.name),
by = c("index" = "index")
) %>%
group_by(target_position) %>%
summarise(
pressures = n(),
success_rate = mean(counterpress == TRUE, na.rm = TRUE) * 100
) %>%
arrange(desc(pressures))Individual Pressing Contributions
While pressing is a team activity, individual players contribute differently. Analyzing per-player pressing metrics helps identify the best pressers and those who may need improvement.
def calculate_player_pressing(events, team_name):
"""
Calculate individual pressing metrics for each player.
"""
team_events = events[events['team'] == team_name]
# Get player minutes
player_minutes = (
team_events
.groupby(['player', 'position'])
.agg({
'minute': ['min', 'max']
})
.reset_index()
)
player_minutes.columns = ['player', 'position', 'start_min', 'end_min']
player_minutes['minutes'] = player_minutes['end_min'] - player_minutes['start_min']
player_minutes = player_minutes[player_minutes['minutes'] > 10]
# Get pressures
pressures = team_events[team_events['type'] == 'Pressure'].copy()
pressures['x'] = pressures['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else 60
)
# Calculate per-player metrics
player_stats = (
pressures
.groupby('player')
.agg({
'id': 'count',
'counterpress': lambda x: x.sum() if x.dtype == bool else 0,
'x': ['mean', lambda x: (x > 80).sum(),
lambda x: ((x >= 40) & (x <= 80)).sum(),
lambda x: (x < 40).sum()]
})
.reset_index()
)
player_stats.columns = [
'player', 'total_pressures', 'successful_pressures',
'avg_pressure_x', 'high_pressures', 'mid_pressures', 'low_pressures'
]
# Merge with minutes
player_stats = player_stats.merge(player_minutes[['player', 'position', 'minutes']],
on='player', how='left')
# Calculate rates
player_stats['pressures_per_90'] = (
player_stats['total_pressures'] / (player_stats['minutes'] / 90)
)
player_stats['success_rate'] = (
player_stats['successful_pressures'] / player_stats['total_pressures'] * 100
)
player_stats['high_press_pct'] = (
player_stats['high_pressures'] / player_stats['total_pressures'] * 100
)
return player_stats.sort_values('pressures_per_90', ascending=False)
player_pressing = calculate_player_pressing(events, 'Liverpool')
print(player_pressing[['player', 'position', 'pressures_per_90', 'success_rate', 'high_press_pct']].head(10))
def plot_pressing_by_position(player_stats):
"""
Compare pressing contributions by position.
"""
position_stats = (
player_stats
.groupby('position')
.agg({
'pressures_per_90': 'mean',
'success_rate': 'mean',
'high_press_pct': 'mean'
})
.reset_index()
.sort_values('pressures_per_90', ascending=True)
)
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.barh(position_stats['position'], position_stats['pressures_per_90'],
color='steelblue')
# Add success rate as text
for bar, rate in zip(bars, position_stats['success_rate']):
ax.text(bar.get_width() + 0.5, bar.get_y() + bar.get_height()/2,
f'{rate:.0f}% success', va='center', fontsize=9)
ax.set_xlabel('Pressures per 90')
ax.set_title('Pressing by Position')
plt.tight_layout()
plt.show()
plot_pressing_by_position(player_pressing)# Calculate individual pressing metrics
calculate_player_pressing <- function(events, team_name) {
team_events <- events %>%
filter(team.name == team_name)
# Get player minutes
player_minutes <- team_events %>%
group_by(player.name, position.name) %>%
summarise(
minutes = (max(minute) - min(minute)),
.groups = "drop"
) %>%
filter(minutes > 10) # Minimum 10 minutes
# Calculate pressing metrics per player
player_pressing <- team_events %>%
filter(type.name == "Pressure") %>%
group_by(player.name) %>%
summarise(
total_pressures = n(),
successful_pressures = sum(counterpress == TRUE, na.rm = TRUE),
# Zone breakdown
high_pressures = sum(location.x > 80),
mid_pressures = sum(location.x >= 40 & location.x <= 80),
low_pressures = sum(location.x < 40),
# Average pressure location
avg_pressure_x = mean(location.x),
avg_pressure_y = mean(location.y),
.groups = "drop"
) %>%
left_join(player_minutes, by = "player.name") %>%
mutate(
pressures_per_90 = total_pressures / (minutes / 90),
success_rate = successful_pressures / total_pressures * 100,
high_press_pct = high_pressures / total_pressures * 100
) %>%
arrange(desc(pressures_per_90))
return(player_pressing)
}
player_pressing <- calculate_player_pressing(match_events, "Liverpool")
# Visualize top pressers
top_pressers <- player_pressing %>%
filter(minutes >= 45) %>%
slice_head(n = 10)
ggplot(top_pressers, aes(x = reorder(player.name, pressures_per_90),
y = pressures_per_90)) +
geom_bar(stat = "identity", aes(fill = success_rate)) +
scale_fill_gradient(low = "coral", high = "forestgreen",
name = "Success %") +
coord_flip() +
labs(
title = "Top Pressers (Per 90 Minutes)",
x = "", y = "Pressures per 90"
) +
theme_minimal()
# Pressing profiles by position
position_pressing <- player_pressing %>%
group_by(position.name) %>%
summarise(
avg_pressures_p90 = mean(pressures_per_90),
avg_success_rate = mean(success_rate),
avg_high_press_pct = mean(high_press_pct),
.groups = "drop"
) %>%
arrange(desc(avg_pressures_p90))
print(position_pressing)Team Pressing Profiles
Different teams employ different pressing philosophies. We can create profiles that characterize each team's pressing style.
def create_pressing_profiles(league_data):
"""
Create pressing profiles for all teams in a league.
"""
teams = league_data['team'].unique()
profiles = []
for team in teams:
team_events = league_data[league_data['team'] == team]
pressures = team_events[team_events['type'] == 'Pressure']
if len(pressures) == 0:
continue
pressures['x'] = pressures['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else 60
)
pressures['y'] = pressures['location'].apply(
lambda loc: loc[1] if isinstance(loc, list) else 40
)
match_minutes = team_events['minute'].max()
profile = {
'team': team,
'ppda': calculate_ppda(league_data, team),
'pressures_per_90': len(pressures) / (match_minutes / 90),
'high_press_pct': (pressures['x'] > 80).mean() * 100,
'avg_pressure_height': pressures['x'].mean(),
'counterpress_rate': (pressures.get('counterpress', False).mean() * 100
if 'counterpress' in pressures.columns else 0),
'wide_press_pct': ((pressures['y'] < 20) | (pressures['y'] > 60)).mean() * 100
}
profiles.append(profile)
profiles_df = pd.DataFrame(profiles)
# Classify style
def classify_style(row):
if row['ppda'] < 8 and row['high_press_pct'] > 35:
return 'Gegenpressing'
elif row['ppda'] < 10 and row['high_press_pct'] > 25:
return 'High Press'
elif row['ppda'] < 12:
return 'Mid Block'
else:
return 'Low Block'
profiles_df['pressing_style'] = profiles_df.apply(classify_style, axis=1)
return profiles_df
# Create profiles
profiles = create_pressing_profiles(league_events)
def plot_pressing_profiles(profiles):
"""
Scatter plot of team pressing profiles.
"""
fig, ax = plt.subplots(figsize=(12, 8))
styles = profiles['pressing_style'].unique()
colors = plt.cm.Set1(np.linspace(0, 1, len(styles)))
style_colors = dict(zip(styles, colors))
for style in styles:
style_teams = profiles[profiles['pressing_style'] == style]
ax.scatter(style_teams['ppda'], style_teams['high_press_pct'],
s=style_teams['pressures_per_90'] * 3,
c=[style_colors[style]], label=style, alpha=0.7)
# Add team labels
for _, row in profiles.iterrows():
ax.annotate(row['team'], (row['ppda'], row['high_press_pct']),
fontsize=8, alpha=0.8)
ax.set_xlabel('PPDA (lower = more aggressive)')
ax.set_ylabel('High Press %')
ax.set_title('Team Pressing Profiles')
ax.legend(title='Style')
ax.invert_xaxis() # Lower PPDA = more aggressive
plt.tight_layout()
plt.show()
plot_pressing_profiles(profiles)# Create team pressing profiles
create_pressing_profiles <- function(all_team_events) {
profiles <- all_team_events %>%
group_by(team.name) %>%
summarise(
# Intensity
ppda = calculate_team_ppda(.),
pressures_per_90 = sum(type.name == "Pressure") /
(sum(duration, na.rm = TRUE) / 5400) * 90,
# Positioning
high_press_pct = mean(type.name == "Pressure" &
location.x > 80, na.rm = TRUE) * 100,
avg_pressure_height = mean(location.x[type.name == "Pressure"], na.rm = TRUE),
# Effectiveness
counterpress_rate = sum(counterpress == TRUE, na.rm = TRUE) /
sum(type.name == "Pressure") * 100,
# Style
wide_press_pct = mean(
type.name == "Pressure" &
(location.y < 20 | location.y > 60),
na.rm = TRUE
) * 100,
.groups = "drop"
)
# Classify pressing style
profiles <- profiles %>%
mutate(
pressing_style = case_when(
ppda < 8 & high_press_pct > 35 ~ "Gegenpressing",
ppda < 10 & high_press_pct > 25 ~ "High Press",
ppda >= 10 & ppda < 12 ~ "Mid Block",
ppda >= 12 ~ "Low Block",
TRUE ~ "Balanced"
),
intensity_tier = ntile(pressures_per_90, 4)
)
return(profiles)
}
# Create profiles for league
league_pressing_profiles <- create_pressing_profiles(league_events)
# Visualize pressing styles
ggplot(league_pressing_profiles, aes(x = ppda, y = high_press_pct)) +
geom_point(aes(size = pressures_per_90, color = pressing_style), alpha = 0.7) +
geom_text(aes(label = team.name), vjust = -1, size = 3) +
scale_color_brewer(palette = "Set1") +
labs(
title = "Team Pressing Profiles",
x = "PPDA (lower = more aggressive)",
y = "High Press %",
size = "Pressures/90",
color = "Style"
) +
theme_minimal()
# Radar comparison of pressing profiles
create_pressing_radar <- function(profiles, teams) {
radar_data <- profiles %>%
filter(team.name %in% teams) %>%
select(team.name, ppda, pressures_per_90, high_press_pct,
counterpress_rate, avg_pressure_height) %>%
mutate(
# Normalize to 0-100 scale
ppda_norm = (max(ppda) - ppda) / (max(ppda) - min(ppda)) * 100,
intensity_norm = pressures_per_90 / max(pressures_per_90) * 100,
height_norm = avg_pressure_height / max(avg_pressure_height) * 100,
success_norm = counterpress_rate
)
# Create radar plot
library(fmsb)
radar_matrix <- radar_data %>%
select(team.name, ppda_norm, intensity_norm, high_press_pct,
success_norm, height_norm) %>%
column_to_rownames("team.name")
# Add max and min rows
radar_matrix <- rbind(
rep(100, 5),
rep(0, 5),
radar_matrix
)
radarchart(radar_matrix, pcol = 1:length(teams), plty = 1, plwd = 2)
legend("topright", legend = teams, col = 1:length(teams), lty = 1, lwd = 2)
}Pressing Style Categories
- Gegenpressing: Immediate counterpress after loss, very low PPDA (<8), high press %
- High Press: Sustained pressure in opponent's third, PPDA 8-10
- Mid Block: Organized pressing in middle third, PPDA 10-12
- Low Block: Deep defensive shape, selective pressing, PPDA >12
Practice Exercises
Exercise 25.1: Complete PPDA and Pressing Intensity Analysis System
Task: Build a comprehensive pressing intensity analysis system that calculates PPDA and related metrics for all teams across a league season, with rolling averages, trend analysis, and comparative visualizations.
Requirements:
- Calculate PPDA for each match and aggregate season averages per team
- Compute rolling PPDA over last 5 and 10 matches to track form
- Break down pressing intensity by zone (high, mid, low thirds)
- Analyze PPDA differences in home vs away matches
- Create league rankings with confidence intervals
- Generate visualization showing PPDA trends over the season
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsbombpy import sb
# ============================================
# COMPLETE PPDA ANALYSIS SYSTEM
# ============================================
def calculate_match_ppda(events, team_name):
"""
Calculate PPDA and zone metrics for a single match.
"""
# Helper to extract x coordinate
def get_x(loc):
return loc[0] if isinstance(loc, list) else np.nan
events = events.copy()
events['x'] = events['location'].apply(get_x)
# Opponent passes in their defensive zone
opponent_passes = len(events[
(events['team'] != team_name) &
(events['type'] == 'Pass') &
(events['x'] < 60)
])
# Our defensive actions in opponent's half
defensive_types = ['Pressure', 'Tackle', 'Interception', 'Foul Committed', 'Duel']
defensive_actions = len(events[
(events['team'] == team_name) &
(events['type'].isin(defensive_types)) &
(events['x'] > 60)
])
# PPDA
ppda = opponent_passes / defensive_actions if defensive_actions > 0 else np.nan
# Zone breakdown
team_def = events[
(events['team'] == team_name) &
(events['type'].isin(['Pressure', 'Tackle', 'Interception']))
]
total_def = len(team_def)
high_third = len(team_def[team_def['x'] > 80])
mid_third = len(team_def[(team_def['x'] >= 40) & (team_def['x'] <= 80)])
low_third = len(team_def[team_def['x'] < 40])
return {
'ppda': ppda,
'opponent_passes': opponent_passes,
'defensive_actions': defensive_actions,
'high_third_pct': high_third / total_def * 100 if total_def > 0 else 0,
'mid_third_pct': mid_third / total_def * 100 if total_def > 0 else 0,
'low_third_pct': low_third / total_def * 100 if total_def > 0 else 0
}
def calculate_season_ppda(competition_id, season_id):
"""
Calculate PPDA for all matches in a season.
"""
matches = sb.matches(competition_id=competition_id, season_id=season_id)
results = []
for _, match in matches.iterrows():
try:
events = sb.events(match_id=match['match_id'])
except:
continue
# Home team
home_ppda = calculate_match_ppda(events, match['home_team'])
home_ppda.update({
'team': match['home_team'],
'opponent': match['away_team'],
'match_id': match['match_id'],
'match_date': match['match_date'],
'venue': 'Home',
'match_week': match.get('match_week', 0)
})
results.append(home_ppda)
# Away team
away_ppda = calculate_match_ppda(events, match['away_team'])
away_ppda.update({
'team': match['away_team'],
'opponent': match['home_team'],
'match_id': match['match_id'],
'match_date': match['match_date'],
'venue': 'Away',
'match_week': match.get('match_week', 0)
})
results.append(away_ppda)
return pd.DataFrame(results)
def add_rolling_metrics(ppda_data):
"""
Add rolling averages and trend metrics.
"""
ppda_data = ppda_data.sort_values(['team', 'match_date']).copy()
def add_rolling(group):
group['ppda_rolling_5'] = group['ppda'].rolling(5, min_periods=3).mean()
group['ppda_rolling_10'] = group['ppda'].rolling(10, min_periods=5).mean()
group['ppda_trend'] = group['ppda_rolling_5'] - group['ppda_rolling_5'].shift(5)
group['match_number'] = range(1, len(group) + 1)
return group
return ppda_data.groupby('team').apply(add_rolling).reset_index(drop=True)
def generate_ppda_rankings(ppda_data):
"""
Generate league rankings with statistics.
"""
rankings = (
ppda_data
.groupby('team')
.agg({
'ppda': ['count', 'mean', 'std', 'median', 'min', 'max'],
'high_third_pct': 'mean'
})
.reset_index()
)
rankings.columns = ['team', 'matches', 'avg_ppda', 'ppda_sd', 'ppda_median',
'min_ppda', 'max_ppda', 'high_third_pct']
# Home/Away split
home_ppda = ppda_data[ppda_data['venue'] == 'Home'].groupby('team')['ppda'].mean()
away_ppda = ppda_data[ppda_data['venue'] == 'Away'].groupby('team')['ppda'].mean()
rankings = rankings.merge(
home_ppda.reset_index().rename(columns={'ppda': 'home_ppda'}),
on='team'
).merge(
away_ppda.reset_index().rename(columns={'ppda': 'away_ppda'}),
on='team'
)
# Confidence interval
rankings['ci_lower'] = (
rankings['avg_ppda'] - 1.96 * rankings['ppda_sd'] / np.sqrt(rankings['matches'])
)
rankings['ci_upper'] = (
rankings['avg_ppda'] + 1.96 * rankings['ppda_sd'] / np.sqrt(rankings['matches'])
)
# Style classification
def classify_style(ppda):
if ppda < 8:
return 'Gegenpressing'
elif ppda < 10:
return 'High Press'
elif ppda < 12:
return 'Mid Block'
return 'Low Block'
rankings['pressing_style'] = rankings['avg_ppda'].apply(classify_style)
rankings['home_away_diff'] = rankings['home_ppda'] - rankings['away_ppda']
rankings['ppda_rank'] = rankings['avg_ppda'].rank()
return rankings.sort_values('avg_ppda')
def plot_ppda_trends(ppda_data, teams_to_show=None):
"""
Visualize PPDA trends over the season.
"""
fig, ax = plt.subplots(figsize=(14, 8))
plot_data = ppda_data.copy()
if teams_to_show:
plot_data = plot_data[plot_data['team'].isin(teams_to_show)]
for team in plot_data['team'].unique():
team_data = plot_data[plot_data['team'] == team]
ax.plot(team_data['match_number'], team_data['ppda_rolling_5'],
label=team, linewidth=2, alpha=0.8)
# Reference lines
for level, label in [(8, 'Gegenpressing'), (10, 'High Press'), (12, 'Mid Block')]:
ax.axhline(y=level, linestyle='--', alpha=0.4, color='gray')
ax.text(1, level - 0.3, label, fontsize=9, alpha=0.7)
ax.set_xlabel('Match Number', fontsize=12)
ax.set_ylabel('PPDA (5-match rolling avg)', fontsize=12)
ax.set_title('PPDA Trend Throughout Season\n(Lower = More Aggressive Pressing)',
fontsize=14, fontweight='bold')
ax.legend(loc='upper right')
ax.invert_yaxis()
plt.tight_layout()
plt.show()
def plot_ppda_rankings(rankings):
"""
Create league PPDA rankings visualization.
"""
fig, ax = plt.subplots(figsize=(12, 10))
# Color mapping
style_colors = {
'Gegenpressing': '#d73027',
'High Press': '#fc8d59',
'Mid Block': '#91bfdb',
'Low Block': '#4575b4'
}
colors = [style_colors[s] for s in rankings['pressing_style']]
bars = ax.barh(rankings['team'], rankings['avg_ppda'], color=colors, alpha=0.8)
# Error bars
ax.errorbar(rankings['avg_ppda'], rankings['team'],
xerr=[rankings['avg_ppda'] - rankings['ci_lower'],
rankings['ci_upper'] - rankings['avg_ppda']],
fmt='none', color='black', capsize=3)
# Reference lines
for level in [8, 10, 12]:
ax.axvline(x=level, linestyle='--', alpha=0.4, color='gray')
ax.set_xlabel('Average PPDA', fontsize=12)
ax.set_title('League PPDA Rankings\n(with 95% confidence intervals)',
fontsize=14, fontweight='bold')
# Legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=c, label=s) for s, c in style_colors.items()]
ax.legend(handles=legend_elements, loc='lower right')
plt.tight_layout()
plt.show()
def generate_ppda_report(ppda_data, rankings):
"""
Print comprehensive PPDA report.
"""
print("\n" + "=" * 60)
print("PPDA ANALYSIS REPORT")
print("=" * 60 + "\n")
print("LEAGUE OVERVIEW:")
print("-" * 45)
print(f"League Average PPDA: {ppda_data['ppda'].mean():.2f}")
print(f"Most Aggressive: {rankings.iloc[0]['team']} ({rankings.iloc[0]['avg_ppda']:.2f})")
print(f"Least Aggressive: {rankings.iloc[-1]['team']} ({rankings.iloc[-1]['avg_ppda']:.2f})")
print("\n\nPRESSING STYLE DISTRIBUTION:")
print(rankings['pressing_style'].value_counts())
print("\n\nTOP 10 MOST AGGRESSIVE TEAMS:")
print("-" * 45)
print(rankings.head(10)[['team', 'avg_ppda', 'ppda_sd', 'high_third_pct', 'pressing_style']].to_string(index=False))
print("\n\nHOME vs AWAY ANALYSIS:")
print("-" * 45)
print("Teams pressing more aggressively at home:")
home_aggressive = rankings[rankings['home_away_diff'] < 0].sort_values('home_away_diff')
print(home_aggressive.head(5)[['team', 'home_ppda', 'away_ppda', 'home_away_diff']].to_string(index=False))
# Run analysis
ppda_data = calculate_season_ppda(competition_id=11, season_id=90)
ppda_data = add_rolling_metrics(ppda_data)
rankings = generate_ppda_rankings(ppda_data)
# Generate outputs
generate_ppda_report(ppda_data, rankings)
plot_ppda_trends(ppda_data, teams_to_show=rankings.head(4)['team'].tolist())
plot_ppda_rankings(rankings)library(tidyverse)
library(zoo)
library(StatsBombR)
# ============================================
# COMPLETE PPDA ANALYSIS SYSTEM
# ============================================
# Calculate PPDA for a single match
calculate_match_ppda <- function(events, team_name) {
# Opponent passes allowed in their own half + middle third
opponent_passes <- events %>%
filter(
team.name != team_name,
type.name == "Pass",
!is.na(location.x),
location.x < 60 # Their defensive zone (attacking into our half)
) %>%
nrow()
# Our defensive actions in their half
defensive_actions <- events %>%
filter(
team.name == team_name,
type.name %in% c("Pressure", "Tackle", "Interception",
"Foul Committed", "Duel"),
!is.na(location.x),
location.x > 60 # In opponent's half
) %>%
nrow()
# PPDA calculation
ppda <- ifelse(defensive_actions > 0, opponent_passes / defensive_actions, NA)
# Zone-specific metrics
high_def_actions <- events %>%
filter(team.name == team_name,
type.name %in% c("Pressure", "Tackle", "Interception"),
location.x > 80) %>%
nrow()
mid_def_actions <- events %>%
filter(team.name == team_name,
type.name %in% c("Pressure", "Tackle", "Interception"),
location.x >= 40 & location.x <= 80) %>%
nrow()
low_def_actions <- events %>%
filter(team.name == team_name,
type.name %in% c("Pressure", "Tackle", "Interception"),
location.x < 40) %>%
nrow()
total_def_actions <- high_def_actions + mid_def_actions + low_def_actions
return(tibble(
ppda = ppda,
opponent_passes = opponent_passes,
defensive_actions = defensive_actions,
high_third_pct = high_def_actions / total_def_actions * 100,
mid_third_pct = mid_def_actions / total_def_actions * 100,
low_third_pct = low_def_actions / total_def_actions * 100
))
}
# Calculate season PPDA for all teams
calculate_season_ppda <- function(all_matches, all_events) {
results <- tibble()
for (i in 1:nrow(all_matches)) {
match <- all_matches[i, ]
match_events <- all_events %>%
filter(match_id == match$match_id)
if (nrow(match_events) == 0) next
# Calculate for home team
home_ppda <- calculate_match_ppda(match_events, match$home_team.home_team_name)
home_ppda$team <- match$home_team.home_team_name
home_ppda$opponent <- match$away_team.away_team_name
home_ppda$match_id <- match$match_id
home_ppda$match_date <- match$match_date
home_ppda$venue <- "Home"
home_ppda$match_week <- match$match_week
# Calculate for away team
away_ppda <- calculate_match_ppda(match_events, match$away_team.away_team_name)
away_ppda$team <- match$away_team.away_team_name
away_ppda$opponent <- match$home_team.home_team_name
away_ppda$match_id <- match$match_id
away_ppda$match_date <- match$match_date
away_ppda$venue <- "Away"
away_ppda$match_week <- match$match_week
results <- bind_rows(results, home_ppda, away_ppda)
}
return(results)
}
# Calculate rolling averages and trends
add_rolling_metrics <- function(ppda_data) {
ppda_data %>%
arrange(team, match_date) %>%
group_by(team) %>%
mutate(
ppda_rolling_5 = rollmean(ppda, k = 5, fill = NA, align = "right"),
ppda_rolling_10 = rollmean(ppda, k = 10, fill = NA, align = "right"),
ppda_trend = ppda_rolling_5 - lag(ppda_rolling_5, 5),
match_number = row_number()
) %>%
ungroup()
}
# Generate league rankings
generate_ppda_rankings <- function(ppda_data) {
rankings <- ppda_data %>%
group_by(team) %>%
summarise(
matches = n(),
avg_ppda = mean(ppda, na.rm = TRUE),
ppda_sd = sd(ppda, na.rm = TRUE),
ppda_median = median(ppda, na.rm = TRUE),
home_ppda = mean(ppda[venue == "Home"], na.rm = TRUE),
away_ppda = mean(ppda[venue == "Away"], na.rm = TRUE),
high_third_pct = mean(high_third_pct, na.rm = TRUE),
min_ppda = min(ppda, na.rm = TRUE),
max_ppda = max(ppda, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
# Confidence interval
ci_lower = avg_ppda - 1.96 * ppda_sd / sqrt(matches),
ci_upper = avg_ppda + 1.96 * ppda_sd / sqrt(matches),
# Rank
ppda_rank = rank(avg_ppda),
# Style classification
pressing_style = case_when(
avg_ppda < 8 ~ "Gegenpressing",
avg_ppda < 10 ~ "High Press",
avg_ppda < 12 ~ "Mid Block",
TRUE ~ "Low Block"
),
home_away_diff = home_ppda - away_ppda
) %>%
arrange(avg_ppda)
return(rankings)
}
# Visualization: PPDA trend over season
plot_ppda_trends <- function(ppda_data, teams_to_show = NULL) {
plot_data <- ppda_data
if (!is.null(teams_to_show)) {
plot_data <- plot_data %>% filter(team %in% teams_to_show)
}
ggplot(plot_data, aes(x = match_number, y = ppda_rolling_5, color = team)) +
geom_line(linewidth = 1.2, alpha = 0.8) +
geom_hline(yintercept = c(8, 10, 12), linetype = "dashed", alpha = 0.5) +
annotate("text", x = 1, y = 7.5, label = "Gegenpressing", size = 3, hjust = 0) +
annotate("text", x = 1, y = 9, label = "High Press", size = 3, hjust = 0) +
annotate("text", x = 1, y = 11, label = "Mid Block", size = 3, hjust = 0) +
annotate("text", x = 1, y = 13, label = "Low Block", size = 3, hjust = 0) +
scale_color_brewer(palette = "Set1") +
labs(
title = "PPDA Trend Throughout Season",
subtitle = "5-match rolling average (lower = more aggressive)",
x = "Match Number", y = "PPDA (Rolling 5)",
color = "Team"
) +
theme_minimal() +
theme(legend.position = "bottom")
}
# Visualization: League rankings
plot_ppda_rankings <- function(rankings) {
ggplot(rankings, aes(x = reorder(team, -avg_ppda), y = avg_ppda)) +
geom_bar(stat = "identity", aes(fill = pressing_style), alpha = 0.8) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3) +
geom_hline(yintercept = c(8, 10, 12), linetype = "dashed", alpha = 0.5) +
scale_fill_manual(values = c("Gegenpressing" = "#d73027",
"High Press" = "#fc8d59",
"Mid Block" = "#91bfdb",
"Low Block" = "#4575b4")) +
coord_flip() +
labs(
title = "League PPDA Rankings",
subtitle = "With 95% confidence intervals",
x = "", y = "Average PPDA",
fill = "Style"
) +
theme_minimal()
}
# Generate comprehensive report
generate_ppda_report <- function(ppda_data, rankings) {
cat("\n", rep("=", 60), "\n", sep = "")
cat("PPDA ANALYSIS REPORT\n")
cat(rep("=", 60), "\n\n", sep = "")
cat("LEAGUE OVERVIEW:\n")
cat("-", rep("-", 40), "\n", sep = "")
cat("League Average PPDA:", round(mean(ppda_data$ppda, na.rm = TRUE), 2), "\n")
cat("Most Aggressive (Lowest PPDA):", rankings$team[1],
"(", round(rankings$avg_ppda[1], 2), ")\n")
cat("Least Aggressive (Highest PPDA):", tail(rankings$team, 1),
"(", round(tail(rankings$avg_ppda, 1), 2), ")\n\n")
cat("PRESSING STYLE DISTRIBUTION:\n")
print(table(rankings$pressing_style))
cat("\n\nTOP 10 MOST AGGRESSIVE TEAMS:\n")
cat("-", rep("-", 40), "\n", sep = "")
print(rankings %>%
head(10) %>%
select(team, avg_ppda, ppda_sd, high_third_pct, pressing_style))
cat("\n\nHOME vs AWAY ANALYSIS:\n")
cat("-", rep("-", 40), "\n", sep = "")
cat("Teams pressing more aggressively at home:\n")
print(rankings %>%
filter(home_away_diff < 0) %>%
select(team, home_ppda, away_ppda, home_away_diff) %>%
arrange(home_away_diff) %>%
head(5))
}
# Load data and run analysis
comps <- FreeCompetitions()
matches <- FreeMatches(Competitions = comps %>% filter(competition_id == 11))
events <- free_allevents(MatchesDF = matches, Atea = TRUE)
# Run analysis pipeline
ppda_data <- calculate_season_ppda(matches, events)
ppda_data <- add_rolling_metrics(ppda_data)
rankings <- generate_ppda_rankings(ppda_data)
# Generate outputs
generate_ppda_report(ppda_data, rankings)
print(plot_ppda_trends(ppda_data, teams_to_show = head(rankings$team, 4)))
print(plot_ppda_rankings(rankings))Exercise 25.2: Counterpressing Effectiveness Analyzer
Task: Build a comprehensive counterpressing analysis system that identifies possession losses, tracks counterpress attempts, measures regain success rates, and identifies the most effective counterpressing players and zones.
Requirements:
- Identify all possession loss events (failed passes, unsuccessful dribbles, dispossessions)
- Track counterpressing attempts within 5 seconds of loss
- Calculate regain success rate by zone, player, and loss type
- Measure average time to counterpress and time to regain
- Identify optimal counterpressing zones with highest success rates
- Generate player-level counterpress ratings and heatmaps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mplsoccer import Pitch
from statsbombpy import sb
# ============================================
# COUNTERPRESSING EFFECTIVENESS ANALYZER
# ============================================
def identify_possession_losses(events, team_name):
"""
Identify all possession loss events.
"""
team_events = events[events['team'] == team_name].copy()
# Failed passes
failed_passes = team_events[
(team_events['type'] == 'Pass') &
(team_events['pass_outcome'].notna()) &
(team_events['pass_outcome'].isin(['Incomplete', 'Out', 'Pass Offside', 'Unknown']))
].copy()
failed_passes['loss_type'] = 'Failed Pass'
# Dispossessions
dispossessions = team_events[
team_events['type'].isin(['Dispossessed', 'Miscontrol'])
].copy()
dispossessions['loss_type'] = 'Dispossessed'
# Failed dribbles
failed_dribbles = team_events[
(team_events['type'] == 'Dribble') &
(team_events['dribble_outcome'] != 'Complete')
].copy()
failed_dribbles['loss_type'] = 'Failed Dribble'
# Combine
all_losses = pd.concat([failed_passes, dispossessions, failed_dribbles])
# Extract coordinates
all_losses['loss_x'] = all_losses['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else np.nan
)
all_losses['loss_y'] = all_losses['location'].apply(
lambda loc: loc[1] if isinstance(loc, list) else np.nan
)
# Classify zone
def classify_zone(x):
if x > 80:
return 'Attacking Third'
elif x >= 40:
return 'Middle Third'
return 'Defensive Third'
all_losses['loss_zone'] = all_losses['loss_x'].apply(classify_zone)
return all_losses[['match_id', 'index', 'minute', 'second', 'player',
'loss_x', 'loss_y', 'loss_type', 'loss_zone']]
def find_counterpresses(events, losses, team_name, window_seconds=5):
"""
Find counterpress events within window after loss.
"""
pressures = events[
(events['team'] == team_name) &
(events['type'] == 'Pressure')
].copy()
pressures['press_x'] = pressures['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else np.nan
)
pressures['press_y'] = pressures['location'].apply(
lambda loc: loc[1] if isinstance(loc, list) else np.nan
)
counterpresses = []
for _, loss in losses.iterrows():
loss_time = loss['minute'] * 60 + loss['second']
# Find pressures after this loss
subsequent_pressures = pressures[
(pressures['match_id'] == loss['match_id']) &
(pressures['index'] > loss['index'])
].copy()
subsequent_pressures['press_time'] = (
subsequent_pressures['minute'] * 60 + subsequent_pressures['second']
)
subsequent_pressures['time_diff'] = subsequent_pressures['press_time'] - loss_time
# Filter within window
valid_presses = subsequent_pressures[
(subsequent_pressures['time_diff'] > 0) &
(subsequent_pressures['time_diff'] <= window_seconds)
]
if len(valid_presses) > 0:
first_press = valid_presses.iloc[0]
# Calculate distance
distance = np.sqrt(
(first_press['press_x'] - loss['loss_x'])**2 +
(first_press['press_y'] - loss['loss_y'])**2
)
counterpresses.append({
'loss_index': loss['index'],
'loss_x': loss['loss_x'],
'loss_y': loss['loss_y'],
'loss_type': loss['loss_type'],
'loss_zone': loss['loss_zone'],
'press_index': first_press['index'],
'press_player': first_press['player'],
'press_x': first_press['press_x'],
'press_y': first_press['press_y'],
'time_diff': first_press['time_diff'],
'distance': distance,
'counterpress_flag': first_press.get('counterpress', False)
})
return pd.DataFrame(counterpresses)
def check_regain_success(events, counterpresses, team_name):
"""
Check if counterpress led to ball regain.
"""
regain_events = events[
events['type'].isin(['Ball Recovery', 'Interception', 'Tackle'])
].copy()
results = []
for _, cp in counterpresses.iterrows():
# Find next regain event
subsequent_regains = regain_events[
(regain_events['index'] > cp['press_index'])
].head(1)
if len(subsequent_regains) > 0:
regain = subsequent_regains.iloc[0]
success = regain['team'] == team_name
else:
success = False
cp_dict = cp.to_dict()
cp_dict['regain_success'] = success
results.append(cp_dict)
return pd.DataFrame(results)
def calculate_counterpress_metrics(losses, counterpresses):
"""
Calculate comprehensive counterpress metrics.
"""
overall = {
'total_losses': len(losses),
'counterpress_attempts': len(counterpresses),
'counterpress_rate': len(counterpresses) / len(losses) * 100 if len(losses) > 0 else 0,
'successful_regains': counterpresses['regain_success'].sum(),
'regain_rate': counterpresses['regain_success'].mean() * 100 if len(counterpresses) > 0 else 0,
'avg_time_to_press': counterpresses['time_diff'].mean(),
'avg_distance_pressed': counterpresses['distance'].mean()
}
# By zone
by_zone = (
counterpresses
.groupby('loss_zone')
.agg({
'loss_index': 'count',
'regain_success': ['sum', 'mean'],
'time_diff': 'mean'
})
.reset_index()
)
by_zone.columns = ['zone', 'attempts', 'regains', 'regain_rate', 'avg_reaction_time']
by_zone['regain_rate'] *= 100
# By player
by_player = (
counterpresses
.groupby('press_player')
.agg({
'loss_index': 'count',
'regain_success': ['sum', 'mean'],
'time_diff': 'mean'
})
.reset_index()
)
by_player.columns = ['player', 'actions', 'regains', 'regain_rate', 'avg_reaction']
by_player['regain_rate'] *= 100
by_player = by_player.sort_values('actions', ascending=False)
# By loss type
by_loss_type = (
counterpresses
.groupby('loss_type')
.agg({
'loss_index': 'count',
'regain_success': 'mean'
})
.reset_index()
)
by_loss_type.columns = ['loss_type', 'attempts', 'regain_rate']
by_loss_type['regain_rate'] *= 100
return {
'overall': overall,
'by_zone': by_zone,
'by_player': by_player,
'by_loss_type': by_loss_type
}
def visualize_counterpress_zones(counterpresses):
"""
Create heatmap of counterpress locations and success.
"""
pitch = Pitch(pitch_type='statsbomb', line_color='white', pitch_color='#1a472a')
fig, ax = pitch.draw(figsize=(14, 9))
# Separate by outcome
successful = counterpresses[counterpresses['regain_success'] == True]
failed = counterpresses[counterpresses['regain_success'] == False]
# Plot
ax.scatter(successful['press_x'], successful['press_y'],
c='lime', s=80, alpha=0.7, label=f'Regained ({len(successful)})', zorder=5)
ax.scatter(failed['press_x'], failed['press_y'],
c='red', s=80, alpha=0.5, label=f'Failed ({len(failed)})', zorder=4)
# Zone lines
ax.axvline(x=40, color='white', linestyle='--', alpha=0.5)
ax.axvline(x=80, color='white', linestyle='--', alpha=0.5)
# Zone success rates
for zone, x_pos in [('Def Third', 20), ('Mid Third', 60), ('Att Third', 100)]:
zone_data = counterpresses[
(counterpresses['press_x'] > x_pos - 20) &
(counterpresses['press_x'] <= x_pos + 20)
]
if len(zone_data) > 0:
rate = zone_data['regain_success'].mean() * 100
ax.text(x_pos, 75, f'{rate:.0f}%', ha='center', fontsize=14,
color='white', fontweight='bold')
ax.legend(loc='upper right', fontsize=10)
ax.set_title('Counterpress Locations & Success Rate by Zone',
fontsize=14, fontweight='bold', color='white')
plt.tight_layout()
plt.show()
def generate_counterpress_report(metrics, team_name):
"""
Print comprehensive counterpress report.
"""
print("\n" + "=" * 65)
print(f"COUNTERPRESSING ANALYSIS: {team_name}")
print("=" * 65 + "\n")
o = metrics['overall']
print("OVERALL METRICS:")
print("-" * 50)
print(f"Total Possession Losses: {o['total_losses']}")
print(f"Counterpress Attempts: {o['counterpress_attempts']} ({o['counterpress_rate']:.1f}% of losses)")
print(f"Successful Regains: {o['successful_regains']} ({o['regain_rate']:.1f}% success)")
print(f"Avg Time to Press: {o['avg_time_to_press']:.2f} seconds")
print(f"Avg Distance Pressed: {o['avg_distance_pressed']:.1f} meters")
print("\n\nBY ZONE:")
print("-" * 50)
print(metrics['by_zone'].to_string(index=False))
print("\n\nTOP COUNTERPRESSING PLAYERS:")
print("-" * 50)
print(metrics['by_player'].head(10).to_string(index=False))
print("\n\nBY LOSS TYPE:")
print("-" * 50)
print(metrics['by_loss_type'].to_string(index=False))
# Main execution
events = sb.events(match_id=3773585)
team_name = 'Liverpool'
losses = identify_possession_losses(events, team_name)
counterpresses = find_counterpresses(events, losses, team_name)
counterpresses = check_regain_success(events, counterpresses, team_name)
metrics = calculate_counterpress_metrics(losses, counterpresses)
generate_counterpress_report(metrics, team_name)
visualize_counterpress_zones(counterpresses)library(tidyverse)
library(lubridate)
library(StatsBombR)
# ============================================
# COUNTERPRESSING EFFECTIVENESS ANALYZER
# ============================================
# Identify possession loss events
identify_possession_losses <- function(events, team_name) {
team_events <- events %>%
filter(team.name == team_name) %>%
arrange(index)
# Failed passes
failed_passes <- team_events %>%
filter(
type.name == "Pass",
!is.na(pass.outcome.name),
pass.outcome.name %in% c("Incomplete", "Out", "Pass Offside", "Unknown")
)
# Failed dribbles
failed_dribbles <- team_events %>%
filter(
type.name == "Dribble",
dribble.outcome.name != "Complete"
)
# Dispossessions
dispossessions <- team_events %>%
filter(type.name %in% c("Dispossessed", "Miscontrols"))
# Bad touches leading to loss
bad_touches <- team_events %>%
filter(type.name == "Miscontrol")
# Combine all losses
all_losses <- bind_rows(
failed_passes %>% mutate(loss_type = "Failed Pass"),
failed_dribbles %>% mutate(loss_type = "Failed Dribble"),
dispossessions %>% mutate(loss_type = "Dispossessed"),
bad_touches %>% mutate(loss_type = "Miscontrol")
) %>%
select(
match_id, index, timestamp, minute, second,
player.name, position.name,
location.x, location.y,
loss_type
) %>%
mutate(
loss_zone = case_when(
location.x > 80 ~ "Attacking Third",
location.x >= 40 ~ "Middle Third",
TRUE ~ "Defensive Third"
)
)
return(all_losses)
}
# Find counterpress events after each loss
find_counterpresses <- function(events, losses, team_name, window_seconds = 5) {
# Get all pressures by the team
pressures <- events %>%
filter(team.name == team_name, type.name == "Pressure") %>%
select(
match_id, pressure_index = index,
pressure_timestamp = timestamp,
pressure_minute = minute, pressure_second = second,
pressure_player = player.name,
pressure_x = location.x, pressure_y = location.y,
counterpress
)
# Join losses with subsequent pressures
counterpress_attempts <- losses %>%
inner_join(pressures, by = "match_id") %>%
mutate(
# Calculate time difference
loss_time = minute * 60 + second,
pressure_time = pressure_minute * 60 + pressure_second,
time_diff = pressure_time - loss_time
) %>%
filter(
time_diff > 0,
time_diff <= window_seconds
) %>%
group_by(match_id, index) %>%
slice_min(time_diff) %>% # First pressure after loss
ungroup() %>%
mutate(
# Calculate distance
distance = sqrt((pressure_x - location.x)^2 + (pressure_y - location.y)^2)
)
return(counterpress_attempts)
}
# Check if counterpress led to regain
check_regain_success <- function(events, counterpress_attempts, team_name) {
# Get next possession change
possession_changes <- events %>%
filter(type.name %in% c("Ball Recovery", "Interception", "Tackle")) %>%
select(match_id, regain_index = index, regain_team = team.name)
# For each counterpress, check if team regained
counterpress_with_outcome <- counterpress_attempts %>%
left_join(
possession_changes,
by = "match_id"
) %>%
filter(regain_index > pressure_index) %>%
group_by(match_id, index) %>%
slice_min(regain_index) %>%
ungroup() %>%
mutate(
regain_success = regain_team == team_name,
regain_time = (regain_index - pressure_index) * 0.04 # Approximate frame time
)
return(counterpress_with_outcome)
}
# Calculate counterpress metrics
calculate_counterpress_metrics <- function(losses, counterpresses_with_outcome) {
# Overall metrics
overall <- tibble(
total_losses = nrow(losses),
counterpress_attempts = nrow(counterpresses_with_outcome),
counterpress_rate = nrow(counterpresses_with_outcome) / nrow(losses) * 100,
successful_regains = sum(counterpresses_with_outcome$regain_success, na.rm = TRUE),
regain_rate = mean(counterpresses_with_outcome$regain_success, na.rm = TRUE) * 100,
avg_time_to_press = mean(counterpresses_with_outcome$time_diff, na.rm = TRUE),
avg_distance_pressed = mean(counterpresses_with_outcome$distance, na.rm = TRUE)
)
# By zone
by_zone <- counterpresses_with_outcome %>%
group_by(loss_zone) %>%
summarise(
attempts = n(),
regains = sum(regain_success, na.rm = TRUE),
regain_rate = mean(regain_success, na.rm = TRUE) * 100,
avg_reaction_time = mean(time_diff, na.rm = TRUE),
.groups = "drop"
)
# By player
by_player <- counterpresses_with_outcome %>%
group_by(pressure_player) %>%
summarise(
counterpress_actions = n(),
successful_regains = sum(regain_success, na.rm = TRUE),
regain_rate = mean(regain_success, na.rm = TRUE) * 100,
avg_reaction_time = mean(time_diff, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(counterpress_actions))
# By loss type
by_loss_type <- counterpresses_with_outcome %>%
group_by(loss_type) %>%
summarise(
attempts = n(),
regain_rate = mean(regain_success, na.rm = TRUE) * 100,
.groups = "drop"
)
return(list(
overall = overall,
by_zone = by_zone,
by_player = by_player,
by_loss_type = by_loss_type
))
}
# Visualize counterpress zones
visualize_counterpress_zones <- function(counterpresses_with_outcome) {
ggplot() +
# Pitch
annotate("rect", xmin = 0, xmax = 120, ymin = 0, ymax = 80,
fill = "#228B22", alpha = 0.3) +
annotate("segment", x = 40, xend = 40, y = 0, yend = 80,
linetype = "dashed", color = "white") +
annotate("segment", x = 80, xend = 80, y = 0, yend = 80,
linetype = "dashed", color = "white") +
# Counterpress locations
geom_point(data = counterpresses_with_outcome,
aes(x = pressure_x, y = pressure_y, color = regain_success),
size = 3, alpha = 0.6) +
# Zone success rates
stat_summary_2d(data = counterpresses_with_outcome,
aes(x = pressure_x, y = pressure_y, z = as.numeric(regain_success)),
fun = mean, bins = 10, alpha = 0.5) +
scale_color_manual(values = c("TRUE" = "green", "FALSE" = "red"),
labels = c("Failed", "Success")) +
labs(
title = "Counterpress Locations and Success",
x = "Pitch Length", y = "Pitch Width",
color = "Outcome"
) +
coord_fixed() +
theme_minimal()
}
# Generate comprehensive report
generate_counterpress_report <- function(metrics, team_name) {
cat("\n", rep("=", 65), "\n", sep = "")
cat("COUNTERPRESSING ANALYSIS:", team_name, "\n")
cat(rep("=", 65), "\n\n", sep = "")
cat("OVERALL METRICS:\n")
cat("-", rep("-", 45), "\n", sep = "")
cat("Total Possession Losses:", metrics$overall$total_losses, "\n")
cat("Counterpress Attempts:", metrics$overall$counterpress_attempts,
"(", round(metrics$overall$counterpress_rate, 1), "% of losses)\n")
cat("Successful Regains:", metrics$overall$successful_regains,
"(", round(metrics$overall$regain_rate, 1), "% success)\n")
cat("Avg Time to Press:", round(metrics$overall$avg_time_to_press, 2), "seconds\n")
cat("Avg Distance Pressed:", round(metrics$overall$avg_distance_pressed, 1), "meters\n")
cat("\n\nBY ZONE:\n")
cat("-", rep("-", 45), "\n", sep = "")
print(metrics$by_zone)
cat("\n\nTOP COUNTERPRESSING PLAYERS:\n")
cat("-", rep("-", 45), "\n", sep = "")
print(head(metrics$by_player, 10))
cat("\n\nBY LOSS TYPE:\n")
cat("-", rep("-", 45), "\n", sep = "")
print(metrics$by_loss_type)
}
# Main execution
events <- get.matchFree(match_id)
team_name <- "Liverpool"
losses <- identify_possession_losses(events, team_name)
counterpresses <- find_counterpresses(events, losses, team_name)
counterpresses_outcome <- check_regain_success(events, counterpresses, team_name)
metrics <- calculate_counterpress_metrics(losses, counterpresses_outcome)
generate_counterpress_report(metrics, team_name)
print(visualize_counterpress_zones(counterpresses_outcome))Exercise 25.3: Team Pressing Profile Comparison Dashboard
Task: Build a comprehensive team pressing profile system that classifies pressing styles, creates radar chart comparisons, and generates tactical insights based on pressing characteristics.
Requirements:
- Calculate multi-dimensional pressing metrics for each team (PPDA, intensity, positioning, effectiveness)
- Classify teams into pressing style categories (Gegenpressing, High Press, Mid Block, Low Block)
- Create normalized radar charts comparing pressing profiles of multiple teams
- Identify pressing strengths and weaknesses for each team
- Generate tactical recommendations based on pressing analysis
- Create visual dashboard with multiple comparison views
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from math import pi
from statsbombpy import sb
# ============================================
# TEAM PRESSING PROFILE COMPARISON DASHBOARD
# ============================================
def calculate_pressing_profile(events, team_name):
"""
Calculate comprehensive pressing profile for a team.
"""
team_events = events[events['team'] == team_name]
opponent_events = events[events['team'] != team_name]
# Extract coordinates
def get_x(loc):
return loc[0] if isinstance(loc, list) else np.nan
match_minutes = events['minute'].max()
# PPDA
opp_passes = len(opponent_events[
(opponent_events['type'] == 'Pass')
])
opp_passes_their_half = len(opponent_events[
(opponent_events['type'] == 'Pass') &
(opponent_events['location'].apply(get_x) < 60)
])
def_actions = team_events[
team_events['type'].isin(['Pressure', 'Tackle', 'Interception', 'Foul Committed'])
].copy()
def_actions['x'] = def_actions['location'].apply(get_x)
def_actions_opp_half = len(def_actions[def_actions['x'] > 60])
ppda = opp_passes_their_half / def_actions_opp_half if def_actions_opp_half > 0 else np.nan
# Pressures
pressures = team_events[team_events['type'] == 'Pressure'].copy()
pressures['x'] = pressures['location'].apply(get_x)
pressures['y'] = pressures['location'].apply(
lambda loc: loc[1] if isinstance(loc, list) else np.nan
)
n_pressures = len(pressures)
if n_pressures > 0:
high_press_pct = (pressures['x'] > 80).mean() * 100
mid_press_pct = ((pressures['x'] >= 40) & (pressures['x'] <= 80)).mean() * 100
low_press_pct = (pressures['x'] < 40).mean() * 100
avg_press_height = pressures['x'].mean()
press_width_spread = pressures['y'].std()
wide_press_pct = ((pressures['y'] < 20) | (pressures['y'] > 60)).mean() * 100
central_press_pct = ((pressures['y'] >= 20) & (pressures['y'] <= 60)).mean() * 100
counterpress_rate = pressures.get('counterpress', pd.Series([False]*len(pressures))).mean() * 100
else:
high_press_pct = mid_press_pct = low_press_pct = 0
avg_press_height = press_width_spread = wide_press_pct = central_press_pct = 0
counterpress_rate = 0
pressures_per_90 = n_pressures / (match_minutes / 90) if match_minutes > 0 else 0
# Efficiency
turnovers = len(team_events[team_events['type'].isin(['Ball Recovery', 'Interception'])])
pressing_efficiency = turnovers / n_pressures * 100 if n_pressures > 0 else 0
# Engagement rate
opp_touches = len(opponent_events[
opponent_events['type'].isin(['Pass', 'Carry', 'Dribble', 'Shot'])
])
defensive_engagement = len(def_actions) / opp_touches * 100 if opp_touches > 0 else 0
return {
'team': team_name,
'ppda': ppda,
'pressures_per_90': pressures_per_90,
'high_press_pct': high_press_pct,
'mid_press_pct': mid_press_pct,
'low_press_pct': low_press_pct,
'avg_press_height': avg_press_height,
'counterpress_rate': counterpress_rate,
'pressing_efficiency': pressing_efficiency,
'defensive_engagement': defensive_engagement,
'wide_press_pct': wide_press_pct,
'central_press_pct': central_press_pct,
'press_width_spread': press_width_spread
}
def classify_pressing_style(profile):
"""
Classify team pressing style and intensity.
"""
# Style
if profile['ppda'] < 8 and profile['high_press_pct'] > 35 and profile['counterpress_rate'] > 15:
profile['pressing_style'] = 'Gegenpressing'
elif profile['ppda'] < 10 and profile['high_press_pct'] > 25:
profile['pressing_style'] = 'High Press'
elif profile['ppda'] < 12:
profile['pressing_style'] = 'Mid Block'
else:
profile['pressing_style'] = 'Low Block'
# Intensity
if profile['pressures_per_90'] > 180:
profile['intensity_level'] = 'Very High'
elif profile['pressures_per_90'] > 150:
profile['intensity_level'] = 'High'
elif profile['pressures_per_90'] > 120:
profile['intensity_level'] = 'Medium'
else:
profile['intensity_level'] = 'Low'
return profile
def normalize_profiles(profiles_df):
"""
Normalize metrics for radar chart (0-100 scale).
"""
df = profiles_df.copy()
# PPDA: invert (lower is better)
df['ppda_norm'] = 100 - (df['ppda'] - df['ppda'].min()) / (df['ppda'].max() - df['ppda'].min()) * 100
for col, new_col in [
('pressures_per_90', 'intensity_norm'),
('high_press_pct', 'high_press_norm'),
('counterpress_rate', 'counterpress_norm'),
('pressing_efficiency', 'efficiency_norm'),
('defensive_engagement', 'engagement_norm')
]:
df[new_col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min()) * 100
return df
def create_radar_comparison(profiles_df, teams_to_compare):
"""
Create radar chart comparing team pressing profiles.
"""
metrics = ['ppda_norm', 'intensity_norm', 'high_press_norm',
'counterpress_norm', 'efficiency_norm', 'engagement_norm']
labels = ['PPDA\n(inverted)', 'Intensity', 'High Press %',
'Counterpress', 'Efficiency', 'Engagement']
# Number of variables
num_vars = len(metrics)
angles = [n / float(num_vars) * 2 * pi for n in range(num_vars)]
angles += angles[:1] # Complete the loop
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
colors = ['#E41A1C', '#377EB8', '#4DAF4A', '#984EA3']
for idx, team in enumerate(teams_to_compare):
team_data = profiles_df[profiles_df['team'] == team]
if len(team_data) == 0:
continue
values = team_data[metrics].values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, 'o-', linewidth=2, label=team, color=colors[idx])
ax.fill(angles, values, alpha=0.25, color=colors[idx])
ax.set_xticks(angles[:-1])
ax.set_xticklabels(labels, size=10)
ax.set_ylim(0, 100)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
ax.set_title('Pressing Profile Comparison', size=14, fontweight='bold', y=1.08)
plt.tight_layout()
plt.show()
def analyze_strengths_weaknesses(profile, all_profiles_df):
"""
Identify team strengths and weaknesses vs league average.
"""
league_avg = all_profiles_df[['ppda', 'pressures_per_90', 'high_press_pct',
'counterpress_rate', 'pressing_efficiency']].mean()
comparisons = {
'ppda_vs_league': (league_avg['ppda'] - profile['ppda']) / league_avg['ppda'] * 100,
'intensity_vs_league': (profile['pressures_per_90'] - league_avg['pressures_per_90']) / league_avg['pressures_per_90'] * 100,
'high_press_vs_league': (profile['high_press_pct'] - league_avg['high_press_pct']) / league_avg['high_press_pct'] * 100,
'counterpress_vs_league': (profile['counterpress_rate'] - league_avg['counterpress_rate']) / league_avg['counterpress_rate'] * 100,
'efficiency_vs_league': (profile['pressing_efficiency'] - league_avg['pressing_efficiency']) / league_avg['pressing_efficiency'] * 100
}
strengths = []
weaknesses = []
if comparisons['ppda_vs_league'] > 15:
strengths.append('Aggressive pressing (low PPDA)')
elif comparisons['ppda_vs_league'] < -15:
weaknesses.append('Passive pressing (high PPDA)')
if comparisons['intensity_vs_league'] > 15:
strengths.append('High pressing volume')
elif comparisons['intensity_vs_league'] < -15:
weaknesses.append('Low pressing volume')
if comparisons['high_press_vs_league'] > 15:
strengths.append('Strong high press')
elif comparisons['high_press_vs_league'] < -15:
weaknesses.append('Weak high press')
if comparisons['counterpress_vs_league'] > 15:
strengths.append('Effective counterpressing')
elif comparisons['counterpress_vs_league'] < -15:
weaknesses.append('Poor counterpressing')
if comparisons['efficiency_vs_league'] > 15:
strengths.append('Efficient pressing (high regain)')
elif comparisons['efficiency_vs_league'] < -15:
weaknesses.append('Inefficient pressing')
return {'strengths': strengths, 'weaknesses': weaknesses, 'comparisons': comparisons}
def generate_tactical_insights(profile, analysis):
"""
Generate tactical recommendations based on profile.
"""
insights = []
if profile['pressing_style'] == 'Gegenpressing':
insights.append('Maintain high defensive line to support aggressive pressing')
insights.append('Focus on quick transitions after winning ball')
elif profile['pressing_style'] == 'Low Block':
insights.append('Consider situational higher pressing against weaker opponents')
insights.append('Prioritize counter-attacking efficiency')
if 'Poor counterpressing' in analysis['weaknesses']:
insights.append('Train immediate reactions after possession loss')
if 'Weak high press' in analysis['weaknesses']:
insights.append('Consider pressing triggers to optimize timing')
return insights
def generate_pressing_dashboard(all_profiles_df, focus_team):
"""
Generate comprehensive pressing profile dashboard.
"""
print("\n" + "=" * 70)
print(f"PRESSING PROFILE DASHBOARD: {focus_team}")
print("=" * 70 + "\n")
profile = all_profiles_df[all_profiles_df['team'] == focus_team].iloc[0].to_dict()
print("PROFILE SUMMARY:")
print("-" * 55)
print(f"Pressing Style: {profile['pressing_style']}")
print(f"Intensity Level: {profile['intensity_level']}")
print(f"PPDA: {profile['ppda']:.2f}")
print(f"Pressures per 90: {profile['pressures_per_90']:.1f}")
print(f"High Press %: {profile['high_press_pct']:.1f}%")
print(f"Counterpress Rate: {profile['counterpress_rate']:.1f}%")
print(f"Pressing Efficiency: {profile['pressing_efficiency']:.1f}%")
analysis = analyze_strengths_weaknesses(profile, all_profiles_df)
print("\n\nSTRENGTHS:")
print("-" * 55)
for s in analysis['strengths']:
print(f" + {s}")
if not analysis['strengths']:
print(" No significant strengths vs league average")
print("\n\nWEAKNESSES:")
print("-" * 55)
for w in analysis['weaknesses']:
print(f" - {w}")
if not analysis['weaknesses']:
print(" No significant weaknesses vs league average")
insights = generate_tactical_insights(profile, analysis)
print("\n\nTACTICAL RECOMMENDATIONS:")
print("-" * 55)
for i, insight in enumerate(insights, 1):
print(f" {i}. {insight}")
rankings = all_profiles_df.sort_values('ppda').reset_index(drop=True)
rank = rankings[rankings['team'] == focus_team].index[0] + 1
print(f"\n\nLEAGUE RANKING:")
print("-" * 55)
print(f"PPDA Rank: {rank} of {len(all_profiles_df)}")
# Example usage
# events = sb.events(match_id=3773585)
# teams = events['team'].unique()
# profiles = [classify_pressing_style(calculate_pressing_profile(events, t)) for t in teams]
# profiles_df = normalize_profiles(pd.DataFrame(profiles))
# generate_pressing_dashboard(profiles_df, 'Liverpool')
# create_radar_comparison(profiles_df, list(teams))library(tidyverse)
library(fmsb)
library(patchwork)
library(scales)
# ============================================
# TEAM PRESSING PROFILE COMPARISON DASHBOARD
# ============================================
# Calculate comprehensive pressing profile
calculate_pressing_profile <- function(events, team_name) {
team_events <- events %>% filter(team.name == team_name)
opponent_events <- events %>% filter(team.name != team_name)
# Get match duration
match_minutes <- max(events$minute, na.rm = TRUE)
# PPDA calculation
opp_passes <- opponent_events %>%
filter(type.name == "Pass", !is.na(location.x), location.x < 60) %>%
nrow()
def_actions <- team_events %>%
filter(type.name %in% c("Pressure", "Tackle", "Interception", "Foul Committed"),
location.x > 60) %>%
nrow()
ppda <- ifelse(def_actions > 0, opp_passes / def_actions, NA)
# Pressure events
pressures <- team_events %>% filter(type.name == "Pressure")
n_pressures <- nrow(pressures)
# Zone analysis
if (n_pressures > 0) {
high_press_pct <- mean(pressures$location.x > 80, na.rm = TRUE) * 100
mid_press_pct <- mean(pressures$location.x >= 40 & pressures$location.x <= 80, na.rm = TRUE) * 100
low_press_pct <- mean(pressures$location.x < 40, na.rm = TRUE) * 100
avg_press_height <- mean(pressures$location.x, na.rm = TRUE)
press_width_spread <- sd(pressures$location.y, na.rm = TRUE)
} else {
high_press_pct <- mid_press_pct <- low_press_pct <- avg_press_height <- press_width_spread <- NA
}
# Counterpress rate
counterpress_rate <- mean(pressures$counterpress == TRUE, na.rm = TRUE) * 100
# Intensity metrics
pressures_per_90 <- n_pressures / (match_minutes / 90)
# Defensive actions per opponent touch
opp_touches <- opponent_events %>%
filter(type.name %in% c("Pass", "Carry", "Dribble", "Shot")) %>%
nrow()
defensive_engagement <- def_actions / opp_touches * 100
# Pressing success (turnovers won)
turnovers_won <- team_events %>%
filter(type.name %in% c("Ball Recovery", "Interception")) %>%
nrow()
pressing_efficiency <- turnovers_won / n_pressures * 100
# Wide vs central pressing
if (n_pressures > 0) {
wide_press_pct <- mean(pressures$location.y < 20 | pressures$location.y > 60, na.rm = TRUE) * 100
central_press_pct <- mean(pressures$location.y >= 20 & pressures$location.y <= 60, na.rm = TRUE) * 100
} else {
wide_press_pct <- central_press_pct <- NA
}
# Build profile
profile <- tibble(
team = team_name,
ppda = ppda,
pressures_per_90 = pressures_per_90,
high_press_pct = high_press_pct,
mid_press_pct = mid_press_pct,
low_press_pct = low_press_pct,
avg_press_height = avg_press_height,
counterpress_rate = counterpress_rate,
pressing_efficiency = pressing_efficiency,
defensive_engagement = defensive_engagement,
wide_press_pct = wide_press_pct,
central_press_pct = central_press_pct,
press_width_spread = press_width_spread
)
return(profile)
}
# Classify pressing style
classify_pressing_style <- function(profile) {
profile %>%
mutate(
pressing_style = case_when(
ppda < 8 & high_press_pct > 35 & counterpress_rate > 15 ~ "Gegenpressing",
ppda < 10 & high_press_pct > 25 ~ "High Press",
ppda >= 10 & ppda < 12 ~ "Mid Block",
ppda >= 12 | high_press_pct < 15 ~ "Low Block",
TRUE ~ "Balanced"
),
intensity_level = case_when(
pressures_per_90 > 180 ~ "Very High",
pressures_per_90 > 150 ~ "High",
pressures_per_90 > 120 ~ "Medium",
TRUE ~ "Low"
)
)
}
# Normalize metrics for radar chart
normalize_for_radar <- function(profiles) {
# Metrics to include in radar
radar_metrics <- c("ppda", "pressures_per_90", "high_press_pct",
"counterpress_rate", "pressing_efficiency",
"defensive_engagement")
# Normalize each metric 0-100
normalized <- profiles %>%
mutate(
# PPDA: lower is better, so invert
ppda_norm = 100 - rescale(ppda, to = c(0, 100)),
intensity_norm = rescale(pressures_per_90, to = c(0, 100)),
high_press_norm = rescale(high_press_pct, to = c(0, 100)),
counterpress_norm = rescale(counterpress_rate, to = c(0, 100)),
efficiency_norm = rescale(pressing_efficiency, to = c(0, 100)),
engagement_norm = rescale(defensive_engagement, to = c(0, 100))
)
return(normalized)
}
# Create radar chart comparison
create_radar_comparison <- function(profiles, teams_to_compare) {
# Filter and prepare data
radar_data <- profiles %>%
filter(team %in% teams_to_compare) %>%
select(team, ends_with("_norm"))
# Prepare for fmsb
radar_matrix <- radar_data %>%
select(-team) %>%
as.data.frame()
rownames(radar_matrix) <- radar_data$team
colnames(radar_matrix) <- c("PPDA\n(inverted)", "Intensity", "High Press %",
"Counterpress", "Efficiency", "Engagement")
# Add max and min rows
radar_matrix <- rbind(
rep(100, 6), # max
rep(0, 6), # min
radar_matrix
)
# Create radar
colors <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3")[1:length(teams_to_compare)]
par(mar = c(1, 1, 2, 1))
radarchart(
radar_matrix,
pcol = colors,
pfcol = alpha(colors, 0.3),
plwd = 2,
plty = 1,
cglcol = "grey",
cglty = 1,
axislabcol = "grey",
vlcex = 0.8,
title = "Pressing Profile Comparison"
)
legend("bottomright",
legend = teams_to_compare,
col = colors,
lty = 1,
lwd = 2,
bty = "n")
}
# Identify strengths and weaknesses
analyze_strengths_weaknesses <- function(profile, league_profiles) {
# Calculate league averages
league_avg <- league_profiles %>%
summarise(across(where(is.numeric), mean, na.rm = TRUE))
# Compare to league
comparison <- profile %>%
mutate(
ppda_vs_league = (league_avg$ppda - ppda) / league_avg$ppda * 100,
intensity_vs_league = (pressures_per_90 - league_avg$pressures_per_90) / league_avg$pressures_per_90 * 100,
high_press_vs_league = (high_press_pct - league_avg$high_press_pct) / league_avg$high_press_pct * 100,
counterpress_vs_league = (counterpress_rate - league_avg$counterpress_rate) / league_avg$counterpress_rate * 100,
efficiency_vs_league = (pressing_efficiency - league_avg$pressing_efficiency) / league_avg$pressing_efficiency * 100
)
# Identify strengths (>15% above average) and weaknesses (>15% below)
strengths <- c()
weaknesses <- c()
if (comparison$ppda_vs_league > 15) strengths <- c(strengths, "Aggressive pressing (low PPDA)")
if (comparison$ppda_vs_league < -15) weaknesses <- c(weaknesses, "Passive pressing (high PPDA)")
if (comparison$intensity_vs_league > 15) strengths <- c(strengths, "High pressing volume")
if (comparison$intensity_vs_league < -15) weaknesses <- c(weaknesses, "Low pressing volume")
if (comparison$high_press_vs_league > 15) strengths <- c(strengths, "Strong high press")
if (comparison$high_press_vs_league < -15) weaknesses <- c(weaknesses, "Weak high press")
if (comparison$counterpress_vs_league > 15) strengths <- c(strengths, "Effective counterpressing")
if (comparison$counterpress_vs_league < -15) weaknesses <- c(weaknesses, "Poor counterpressing")
if (comparison$efficiency_vs_league > 15) strengths <- c(strengths, "Efficient pressing (high regain)")
if (comparison$efficiency_vs_league < -15) weaknesses <- c(weaknesses, "Inefficient pressing")
return(list(
strengths = strengths,
weaknesses = weaknesses,
vs_league = comparison
))
}
# Generate tactical recommendations
generate_tactical_insights <- function(profile, analysis) {
insights <- c()
# Based on style
if (profile$pressing_style == "Gegenpressing") {
insights <- c(insights, "Maintain high defensive line to support aggressive pressing")
insights <- c(insights, "Focus on quick transitions after winning ball in dangerous areas")
} else if (profile$pressing_style == "Low Block") {
insights <- c(insights, "Consider situational higher pressing against weaker opponents")
insights <- c(insights, "Prioritize counter-attacking efficiency when winning ball deep")
}
# Based on weaknesses
if ("Poor counterpressing" %in% analysis$weaknesses) {
insights <- c(insights, "Train immediate reactions after possession loss")
insights <- c(insights, "Improve collective pressing coordination")
}
if ("Weak high press" %in% analysis$weaknesses) {
insights <- c(insights, "Consider pressing triggers to optimize high press timing")
insights <- c(insights, "Ensure forwards are fit for pressing demands")
}
return(insights)
}
# Generate full dashboard
generate_pressing_dashboard <- function(all_profiles, focus_team) {
cat("\n", rep("=", 70), "\n", sep = "")
cat("PRESSING PROFILE DASHBOARD:", focus_team, "\n")
cat(rep("=", 70), "\n\n", sep = "")
team_profile <- all_profiles %>% filter(team == focus_team)
# Profile summary
cat("PROFILE SUMMARY:\n")
cat("-", rep("-", 50), "\n", sep = "")
cat("Pressing Style:", team_profile$pressing_style, "\n")
cat("Intensity Level:", team_profile$intensity_level, "\n")
cat("PPDA:", round(team_profile$ppda, 2), "\n")
cat("Pressures per 90:", round(team_profile$pressures_per_90, 1), "\n")
cat("High Press %:", round(team_profile$high_press_pct, 1), "%\n")
cat("Counterpress Rate:", round(team_profile$counterpress_rate, 1), "%\n")
cat("Pressing Efficiency:", round(team_profile$pressing_efficiency, 1), "%\n")
# Strengths and weaknesses
analysis <- analyze_strengths_weaknesses(team_profile, all_profiles)
cat("\n\nSTRENGTHS:\n")
cat("-", rep("-", 50), "\n", sep = "")
if (length(analysis$strengths) > 0) {
for (s in analysis$strengths) cat(" + ", s, "\n")
} else {
cat(" No significant strengths vs league average\n")
}
cat("\n\nWEAKNESSES:\n")
cat("-", rep("-", 50), "\n", sep = "")
if (length(analysis$weaknesses) > 0) {
for (w in analysis$weaknesses) cat(" - ", w, "\n")
} else {
cat(" No significant weaknesses vs league average\n")
}
# Tactical insights
insights <- generate_tactical_insights(team_profile, analysis)
cat("\n\nTACTICAL RECOMMENDATIONS:\n")
cat("-", rep("-", 50), "\n", sep = "")
for (i in seq_along(insights)) {
cat(" ", i, ". ", insights[i], "\n", sep = "")
}
cat("\n\nLEAGUE RANKING:\n")
cat("-", rep("-", 50), "\n", sep = "")
rankings <- all_profiles %>%
arrange(ppda) %>%
mutate(rank = row_number()) %>%
filter(team == focus_team)
cat("PPDA Rank:", rankings$rank, "of", nrow(all_profiles), "\n")
}
# Example usage with multiple teams
# all_profiles <- bind_rows(lapply(unique(events$team.name), function(t) {
# calculate_pressing_profile(events, t)
# }))
# all_profiles <- classify_pressing_style(all_profiles)
# all_profiles <- normalize_for_radar(all_profiles)
# generate_pressing_dashboard(all_profiles, "Liverpool")
# create_radar_comparison(all_profiles, c("Liverpool", "Manchester City", "Arsenal"))Summary
Key Takeaways
- PPDA measures pressing intensity - lower values indicate more aggressive pressing
- Counterpressing immediately after losing the ball is a key differentiator for elite pressing teams
- Pressing triggers (backward passes, isolated players, poor touches) help teams press intelligently rather than blindly
- Individual contributions vary by position, with forwards and pressing midfielders typically leading in pressures per 90
- Team profiles reveal distinct pressing philosophies from Gegenpressing to low-block containment