Capstone - Complete Analytics System
Opposition analysis provides the tactical intelligence teams need to prepare for upcoming matches. By systematically analyzing opponents' playing patterns, strengths, weaknesses, and key players, coaches can develop targeted game plans.
Learning Objectives
- Build comprehensive opposition reports using event data
- Identify opponent strengths, weaknesses, and tendencies
- Analyze set piece routines and defensive vulnerabilities
- Create automated scouting dashboards
- Generate tactical recommendations from data
Team Style Analysis
Understanding how an opponent plays is the foundation of opposition analysis. We quantify playing style through metrics that capture build-up patterns, pressing intensity, and attacking approach.
import pandas as pd
import numpy as np
from statsbombpy import sb
# Load opponent's recent matches
matches = sb.matches(competition_id=11, season_id=90) # La Liga
barcelona_matches = matches[
(matches['home_team'] == 'Barcelona') |
(matches['away_team'] == 'Barcelona')
].head(10)
# Get events for all matches
all_events = []
for match_id in barcelona_matches['match_id']:
events = sb.events(match_id=match_id)
all_events.append(events)
opponent_events = pd.concat(all_events, ignore_index=True)
def calculate_team_style(events, team_name):
"""
Calculate comprehensive team style metrics.
"""
team_events = events[events['team'] == team_name].copy()
total_events = len(events)
# Calculate match minutes for per-90 normalization
match_minutes = team_events['minute'].max() * len(barcelona_matches['match_id'].unique())
style = {}
# Possession characteristics
style['possession_share'] = len(team_events) / total_events * 100
passes = team_events[team_events['type'] == 'Pass']
style['avg_pass_length'] = passes['pass_length'].mean()
style['long_ball_pct'] = (passes['pass_length'] > 30).mean() * 100
# Directness - progressive passes
passes['is_progressive'] = (
(120 - passes['location'].apply(lambda x: x[0] if isinstance(x, list) else 0)) -
(120 - passes['pass_end_location'].apply(lambda x: x[0] if isinstance(x, list) else 0))
) > 10
style['progressive_pass_pct'] = passes['is_progressive'].mean() * 100
# Pressing intensity
pressures = team_events[team_events['type'] == 'Pressure']
style['pressures_per_90'] = len(pressures) / (match_minutes / 90)
# High press (final third)
pressure_locations = pressures['location'].apply(
lambda x: x[0] if isinstance(x, list) else 0
)
style['high_press_pct'] = (pressure_locations > 80).mean() * 100
# Attacking approach
shots = team_events[team_events['type'] == 'Shot']
style['shots_per_90'] = len(shots) / (match_minutes / 90)
style['xg_per_shot'] = shots['shot_statsbomb_xg'].mean()
# Width
possession_events = team_events[team_events['type'].isin(['Pass', 'Carry', 'Dribble'])]
locations_y = possession_events['location'].apply(
lambda x: x[1] if isinstance(x, list) else 40
)
style['avg_touch_width'] = locations_y.std()
# Crosses
crosses = passes[passes['pass_cross'] == True]
style['crosses_per_90'] = len(crosses) / (match_minutes / 90)
return pd.Series(style)
barcelona_style = calculate_team_style(opponent_events, 'Barcelona')
# Create style radar
def create_style_radar(team_style, team_name, league_avg):
"""
Create radar chart comparing team style to league average.
"""
categories = list(team_style.index)
n = len(categories)
# Normalize values (0-100 scale based on league range)
normalized_team = []
normalized_avg = []
for cat in categories:
team_val = team_style[cat]
avg_val = league_avg[cat]
max_val = max(team_val, avg_val) * 1.5
normalized_team.append(team_val / max_val * 100)
normalized_avg.append(avg_val / max_val * 100)
# Create radar
angles = [n / float(len(categories)) * 2 * np.pi for n in range(len(categories))]
angles += angles[:1]
normalized_team += normalized_team[:1]
normalized_avg += normalized_avg[:1]
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))
ax.plot(angles, normalized_team, 'o-', linewidth=2, label=team_name, color='#e74c3c')
ax.fill(angles, normalized_team, alpha=0.25, color='#e74c3c')
ax.plot(angles, normalized_avg, 'o-', linewidth=2, label='League Average', color='gray')
ax.fill(angles, normalized_avg, alpha=0.1, color='gray')
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, size=10)
ax.set_ylim(0, 100)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
plt.title(f'{team_name} Playing Style Profile', size=14, y=1.08)
plt.tight_layout()
plt.show()
# Calculate Barcelona's style metrics (example data)
barcelona_style = {
'possession': 67.5,
'progressive_passes': 12.3,
'pressures': 145.2,
'high_press_pct': 42.5,
'shots': 15.8,
'xg_per_shot': 0.12
}
# Calculate league average for comparison
league_avg = {
'possession': 50.0,
'progressive_passes': 8.5,
'pressures': 120.0,
'high_press_pct': 32.0,
'shots': 12.0,
'xg_per_shot': 0.10
}
create_style_radar(barcelona_style, 'Barcelona', league_avg)library(tidyverse)
library(StatsBombR)
# Load opponent's recent matches
opponent_matches <- FreeMatches(Competitions) %>%
filter(home_team.home_team_name == "Barcelona" |
away_team.away_team_name == "Barcelona") %>%
head(10) # Last 10 matches
opponent_events <- map_dfr(opponent_matches$match_id, get.matchFree)
# Calculate team style metrics
calculate_team_style <- function(events, team_name) {
team_events <- events %>%
filter(team.name == team_name)
possession_events <- team_events %>%
filter(type.name %in% c("Pass", "Carry", "Dribble"))
style_metrics <- list(
# Build-up characteristics
possession_share = nrow(team_events) / nrow(events) * 100,
avg_pass_length = team_events %>%
filter(type.name == "Pass") %>%
pull(pass.length) %>%
mean(na.rm = TRUE),
long_ball_pct = team_events %>%
filter(type.name == "Pass") %>%
summarise(pct = mean(pass.length > 30, na.rm = TRUE) * 100) %>%
pull(pct),
# Directness
progressive_pass_pct = team_events %>%
filter(type.name == "Pass") %>%
summarise(pct = mean(
(120 - location.x) - (120 - pass.end_location.x) > 10, na.rm = TRUE
) * 100) %>%
pull(pct),
# Pressing intensity
pressures_per_90 = team_events %>%
filter(type.name == "Pressure") %>%
nrow() / (sum(team_events$duration, na.rm = TRUE) / 5400) * 90,
high_press_pct = team_events %>%
filter(type.name == "Pressure") %>%
summarise(pct = mean(location.x > 80, na.rm = TRUE) * 100) %>%
pull(pct),
# Attacking approach
shots_per_90 = team_events %>%
filter(type.name == "Shot") %>%
nrow() / (sum(team_events$duration, na.rm = TRUE) / 5400) * 90,
xg_per_shot = team_events %>%
filter(type.name == "Shot") %>%
summarise(avg = mean(shot.statsbomb_xg, na.rm = TRUE)) %>%
pull(avg),
# Width
avg_touch_width = possession_events %>%
summarise(width = sd(location.y, na.rm = TRUE)) %>%
pull(width),
# Crosses
crosses_per_90 = team_events %>%
filter(type.name == "Pass", pass.cross == TRUE) %>%
nrow() / (sum(team_events$duration, na.rm = TRUE) / 5400) * 90
)
return(as_tibble(style_metrics))
}
barcelona_style <- calculate_team_style(opponent_events, "Barcelona")
# Compare to league average
league_avg <- all_teams %>%
summarise(across(everything(), mean))
style_comparison <- bind_rows(
barcelona_style %>% mutate(team = "Barcelona"),
league_avg %>% mutate(team = "League Avg")
) %>%
pivot_longer(-team, names_to = "metric", values_to = "value") %>%
pivot_wider(names_from = team, values_from = value) %>%
mutate(
difference = Barcelona - `League Avg`,
pct_diff = difference / `League Avg` * 100
)
print(style_comparison)Identifying Weaknesses
Every team has exploitable weaknesses. Data analysis can reveal defensive vulnerabilities, pressing weaknesses, and situational problems that can be targeted.
def analyze_defensive_weaknesses(events, team_name):
"""
Identify defensive vulnerabilities for opposition analysis.
"""
# Events where opponent is attacking
defensive_events = events[events['team'] != team_name].copy()
weaknesses = {}
# Goals conceded by zone
shots_conceded = defensive_events[defensive_events['type'] == 'Shot'].copy()
shots_conceded['location_x'] = shots_conceded['location'].apply(
lambda x: x[0] if isinstance(x, list) else 60
)
shots_conceded['location_y'] = shots_conceded['location'].apply(
lambda x: x[1] if isinstance(x, list) else 40
)
shots_conceded['attack_zone'] = pd.cut(
shots_conceded['location_x'],
bins=[0, 40, 80, 120],
labels=['Low Block', 'Mid Block', 'High Press']
)
shots_conceded['attack_side'] = pd.cut(
shots_conceded['location_y'],
bins=[0, 27, 53, 80],
labels=['Left', 'Central', 'Right']
)
weaknesses['goals_by_zone'] = (
shots_conceded
.groupby(['attack_zone', 'attack_side'])
.agg({
'shot_statsbomb_xg': ['sum', 'count'],
'shot_outcome': lambda x: (x == 'Goal').sum()
})
.reset_index()
)
# Transition vulnerability
transition_patterns = ['From Counter', 'From Throw In']
transition_shots = shots_conceded[
shots_conceded['play_pattern'].isin(transition_patterns)
]
weaknesses['transition_vulnerability'] = {
'shots': len(transition_shots),
'xg': transition_shots['shot_statsbomb_xg'].sum(),
'goals': (transition_shots['shot_outcome'] == 'Goal').sum()
}
# Set piece vulnerability
set_piece_patterns = ['From Corner', 'From Free Kick']
set_piece_shots = shots_conceded[
shots_conceded['play_pattern'].isin(set_piece_patterns)
]
weaknesses['set_piece_vulnerability'] = {
'shots': len(set_piece_shots),
'xg': set_piece_shots['shot_statsbomb_xg'].sum(),
'goals': (set_piece_shots['shot_outcome'] == 'Goal').sum()
}
# Defensive duels analysis
team_events = events[events['team'] == team_name]
duel_events = team_events[
team_events['type'].isin(['Duel', 'Dribbled Past'])
].copy()
duel_events['location_x'] = duel_events['location'].apply(
lambda x: x[0] if isinstance(x, list) else 60
)
duel_events['zone'] = pd.cut(
duel_events['location_x'],
bins=[0, 40, 80, 120],
labels=['Defensive Third', 'Middle Third', 'Attacking Third']
)
duel_events['duel_lost'] = (
duel_events['duel_outcome'].isin(['Lost', 'Lost In Play', 'Lost Out']) |
(duel_events['type'] == 'Dribbled Past')
)
weaknesses['duel_analysis'] = (
duel_events
.groupby('zone')
.agg({
'duel_lost': ['sum', 'count']
})
.reset_index()
)
# Player-specific weaknesses
weaknesses['player_weaknesses'] = (
team_events
.groupby(['player', 'position'])
.agg({
'type': [
lambda x: (x == 'Dribbled Past').sum(),
lambda x: (x == 'Foul Committed').sum(),
lambda x: (x == 'Error').sum()
]
})
.reset_index()
)
return weaknesses
barcelona_weaknesses = analyze_defensive_weaknesses(opponent_events, 'Barcelona')
# Visualize vulnerability heatmap
def plot_vulnerability_zones(weaknesses):
"""
Create heatmap of defensive vulnerabilities.
"""
zone_data = weaknesses['goals_by_zone']
# Reshape for heatmap
pivot_data = zone_data.pivot(
index='attack_zone',
columns='attack_side',
values=('shot_statsbomb_xg', 'sum')
)
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(pivot_data.values, cmap='YlOrRd')
ax.set_xticks(range(len(pivot_data.columns)))
ax.set_yticks(range(len(pivot_data.index)))
ax.set_xticklabels(pivot_data.columns)
ax.set_yticklabels(pivot_data.index)
# Add text annotations
for i in range(len(pivot_data.index)):
for j in range(len(pivot_data.columns)):
text = f'{pivot_data.values[i, j]:.2f}'
ax.text(j, i, text, ha='center', va='center', color='white')
ax.set_title('Defensive Vulnerability Zones (xG Conceded)')
plt.colorbar(im)
plt.tight_layout()
plt.show()
plot_vulnerability_zones(barcelona_weaknesses)# Analyze defensive weaknesses
analyze_defensive_weaknesses <- function(events, team_name) {
# Get events where opponent is attacking
defensive_events <- events %>%
filter(team.name != team_name)
# Goals conceded by zone
goals_conceded <- defensive_events %>%
filter(type.name == "Shot", shot.outcome.name == "Goal") %>%
mutate(
attack_zone = case_when(
location.x < 40 ~ "Low Block",
location.x < 80 ~ "Mid Block",
TRUE ~ "High Press"
),
attack_side = case_when(
location.y < 27 ~ "Left",
location.y > 53 ~ "Right",
TRUE ~ "Central"
)
) %>%
group_by(attack_zone, attack_side) %>%
summarise(goals = n(), xg = sum(shot.statsbomb_xg), .groups = "drop")
# Vulnerability to transitions
transition_danger <- defensive_events %>%
filter(
type.name == "Shot",
play_pattern.name %in% c("From Counter", "From Throw In")
) %>%
summarise(
transition_shots = n(),
transition_xg = sum(shot.statsbomb_xg),
transition_goals = sum(shot.outcome.name == "Goal")
)
# Defensive duels lost
duels_lost <- events %>%
filter(
team.name == team_name,
type.name %in% c("Duel", "Dribbled Past")
) %>%
mutate(
duel_lost = coalesce(duel.outcome.name %in% c("Lost", "Lost In Play", "Lost Out"), FALSE) |
type.name == "Dribbled Past"
) %>%
group_by(
zone = case_when(
location.x < 40 ~ "Defensive Third",
location.x < 80 ~ "Middle Third",
TRUE ~ "Attacking Third"
)
) %>%
summarise(
total_duels = n(),
duels_lost = sum(duel_lost),
loss_rate = duels_lost / total_duels * 100
)
# Set piece vulnerability
set_piece_conceded <- defensive_events %>%
filter(
type.name == "Shot",
play_pattern.name %in% c("From Corner", "From Free Kick")
) %>%
summarise(
set_piece_shots = n(),
set_piece_xg = sum(shot.statsbomb_xg),
set_piece_goals = sum(shot.outcome.name == "Goal")
)
# Player-specific weaknesses (who gets beaten most)
player_weaknesses <- events %>%
filter(team.name == team_name) %>%
group_by(player.name, position.name) %>%
summarise(
dribbled_past = sum(type.name == "Dribbled Past"),
fouls = sum(type.name == "Foul Committed"),
errors = sum(type.name == "Error"),
.groups = "drop"
) %>%
arrange(desc(dribbled_past + errors))
return(list(
goals_by_zone = goals_conceded,
transition_vulnerability = transition_danger,
duel_analysis = duels_lost,
set_piece_vulnerability = set_piece_conceded,
player_weaknesses = player_weaknesses
))
}
barcelona_weaknesses <- analyze_defensive_weaknesses(opponent_events, "Barcelona")
# Visualize weak zones
ggplot(barcelona_weaknesses$goals_by_zone,
aes(x = attack_side, y = attack_zone, fill = xg)) +
geom_tile() +
geom_text(aes(label = paste0("Goals: ", goals, "\nxG: ", round(xg, 2))),
color = "white") +
scale_fill_gradient(low = "yellow", high = "red") +
labs(
title = "Barcelona Defensive Vulnerability Zones",
x = "Attack Side", y = "Defensive Phase"
) +
theme_minimal()Key Player Analysis
Identifying and neutralizing opposition key players is essential. We analyze which players are most influential and how to limit their impact.
def analyze_key_players(events, team_name):
"""
Identify and analyze opposition key players.
"""
team_events = events[events['team'] == team_name].copy()
# Calculate influence metrics per player
player_stats = (
team_events
.groupby(['player', 'position'])
.agg({
'minute': ['min', 'max'],
'id': 'count', # touches
'type': lambda x: (x == 'Pass').sum(), # passes
'pass_outcome': lambda x: (x.isna() | (x == 'Complete')).mean() * 100,
'shot_statsbomb_xg': 'sum',
'shot_outcome': lambda x: (x == 'Goal').sum()
})
.reset_index()
)
# Flatten column names
player_stats.columns = [
'player', 'position', 'min_minute', 'max_minute',
'touches', 'passes', 'pass_completion', 'xg', 'goals'
]
player_stats['minutes'] = player_stats['max_minute'] - player_stats['min_minute']
# Additional metrics
for player in player_stats['player'].unique():
player_events = team_events[team_events['player'] == player]
player_stats.loc[player_stats['player'] == player, 'progressive_passes'] = len(
player_events[
(player_events['type'] == 'Pass') &
(player_events['location'].apply(lambda x: x[0] if isinstance(x, list) else 0) -
player_events['pass_end_location'].apply(lambda x: x[0] if isinstance(x, list) else 0) > 10)
]
)
player_stats.loc[player_stats['player'] == player, 'key_passes'] = (
player_events['pass_shot_assist'] == True
).sum()
player_stats.loc[player_stats['player'] == player, 'pressures'] = (
player_events['type'] == 'Pressure'
).sum()
# Calculate influence scores
player_stats['attacking_influence'] = (
(player_stats['xg'] * 10 +
player_stats['key_passes'] * 2 +
player_stats['progressive_passes'] * 0.5) /
(player_stats['minutes'] / 90)
)
player_stats['overall_influence'] = player_stats['attacking_influence']
# Top threats
top_threats = (
player_stats[player_stats['minutes'] > 45]
.nlargest(5, 'overall_influence')
)
return {
'influence_ranking': player_stats.sort_values('overall_influence', ascending=False),
'top_threats': top_threats
}
key_players = analyze_key_players(opponent_events, 'Barcelona')
def create_player_heatmap(events, player_name):
"""
Create touch heatmap for a specific player.
"""
from mplsoccer import Pitch
player_events = events[events['player'] == player_name].copy()
# Extract locations
player_events['x'] = player_events['location'].apply(
lambda loc: loc[0] if isinstance(loc, list) else np.nan
)
player_events['y'] = player_events['location'].apply(
lambda loc: loc[1] if isinstance(loc, list) else np.nan
)
player_events = player_events.dropna(subset=['x', 'y'])
# Create pitch
pitch = Pitch(pitch_type='statsbomb', line_color='white', pitch_color='#1a1a1a')
fig, ax = pitch.draw(figsize=(12, 8))
# Add heatmap
pitch.kdeplot(
player_events['x'], player_events['y'],
ax=ax, cmap='hot', fill=True, levels=50, alpha=0.7
)
ax.set_title(f'{player_name} - Touch Heatmap', fontsize=14, color='white')
plt.tight_layout()
plt.show()
# Create threat report
def generate_threat_report(key_players):
"""
Generate tactical recommendations for neutralizing key players.
"""
report = []
for _, player in key_players['top_threats'].iterrows():
recommendation = {
'player': player['player'],
'position': player['position'],
'threat_level': 'HIGH' if player['overall_influence'] > 5 else 'MEDIUM',
'key_strength': 'Chance creation' if player['key_passes'] > 2 else 'Ball progression',
'recommendation': ''
}
if player['position'] in ['Center Forward', 'Striker']:
recommendation['recommendation'] = (
"Man-mark in box, deny service, press when receiving back to goal"
)
elif player['position'] in ['Attacking Midfield', 'Right Wing', 'Left Wing']:
recommendation['recommendation'] = (
"Double up when on ball, prevent turns, force to weaker foot"
)
else:
recommendation['recommendation'] = (
"Press early, prevent easy progression, force long balls"
)
report.append(recommendation)
return pd.DataFrame(report)
threat_report = generate_threat_report(key_players)
print(threat_report)# Identify key players and their influence
analyze_key_players <- function(events, team_name) {
team_events <- events %>%
filter(team.name == team_name)
# Calculate influence metrics per player
player_influence <- team_events %>%
group_by(player.name, position.name) %>%
summarise(
minutes = max(minute) - min(minute),
touches = n(),
passes = sum(type.name == "Pass"),
pass_completion = mean(pass.outcome.name == "Complete" |
is.na(pass.outcome.name), na.rm = TRUE) * 100,
progressive_passes = sum(type.name == "Pass" &
((120 - location.x) - (120 - pass.end_location.x)) > 10,
na.rm = TRUE),
key_passes = sum(pass.shot_assist == TRUE, na.rm = TRUE),
assists = sum(pass.goal_assist == TRUE, na.rm = TRUE),
shots = sum(type.name == "Shot"),
xg = sum(shot.statsbomb_xg, na.rm = TRUE),
goals = sum(type.name == "Shot" & shot.outcome.name == "Goal"),
dribbles_completed = sum(type.name == "Dribble" &
dribble.outcome.name == "Complete"),
carries = sum(type.name == "Carry"),
pressures = sum(type.name == "Pressure"),
tackles = sum(type.name == "Duel" & duel.type.name == "Tackle"),
interceptions = sum(type.name == "Interception"),
.groups = "drop"
) %>%
mutate(
# Calculate composite influence score
attacking_influence = (xg * 10 + key_passes * 2 + progressive_passes * 0.5) /
(minutes / 90),
defensive_influence = (pressures + tackles + interceptions) / (minutes / 90),
overall_influence = attacking_influence + defensive_influence * 0.5
) %>%
arrange(desc(overall_influence))
# Identify most dangerous players
danger_players <- player_influence %>%
filter(minutes > 45) %>% # Minimum playing time
slice_head(n = 5)
# Analyze how key players receive the ball
key_player_receiving <- team_events %>%
filter(player.name %in% danger_players$player.name) %>%
filter(type.name == "Ball Receipt*") %>%
mutate(
receive_zone = case_when(
location.x > 80 & location.y < 27 ~ "Right Halfspace",
location.x > 80 & location.y > 53 ~ "Left Halfspace",
location.x > 80 ~ "Central",
location.x > 60 ~ "Build-up Zone",
TRUE ~ "Deep"
)
) %>%
group_by(player.name, receive_zone) %>%
summarise(receipts = n(), .groups = "drop")
return(list(
influence_ranking = player_influence,
top_threats = danger_players,
receiving_patterns = key_player_receiving
))
}
key_players <- analyze_key_players(opponent_events, "Barcelona")
# Visualize key player zones
create_player_heatmap <- function(events, player_name) {
player_events <- events %>%
filter(player.name == player_name) %>%
filter(!is.na(location.x), !is.na(location.y))
ggplot(player_events, aes(x = location.x, y = location.y)) +
annotate_pitch(dimensions = pitch_statsbomb, colour = "grey80") +
stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha = 0.6) +
scale_fill_viridis_c(option = "plasma") +
labs(
title = paste(player_name, "- Touch Heatmap"),
subtitle = "Opposition Analysis"
) +
theme_pitch() +
coord_flip()
}Set Piece Scouting
Detailed analysis of opposition set piece routines reveals patterns that can be defended or exploited.
def analyze_set_pieces(events, team_name):
"""
Analyze opposition set piece routines.
"""
team_events = events[events['team'] == team_name].copy()
analysis = {}
# Corner analysis
corners = team_events[
(team_events['type'] == 'Pass') &
(team_events['pass_type'] == 'Corner')
].copy()
if len(corners) > 0:
# Categorize corner deliveries
corners['corner_type'] = corners['pass_technique'].map({
'Inswinging': 'Inswinger',
'Outswinging': 'Outswinger'
}).fillna('Driven')
corners['location_y'] = corners['location'].apply(
lambda x: x[1] if isinstance(x, list) else 40
)
corners['corner_side'] = np.where(corners['location_y'] < 40, 'Right', 'Left')
# Target zone
corners['end_x'] = corners['pass_end_location'].apply(
lambda x: x[0] if isinstance(x, list) else 100
)
corners['end_y'] = corners['pass_end_location'].apply(
lambda x: x[1] if isinstance(x, list) else 40
)
corners['target_zone'] = np.where(
corners['end_y'] > 44, 'Far Post',
np.where(corners['end_y'] < 36, 'Near Post', 'Central')
)
analysis['corner_patterns'] = (
corners
.groupby(['corner_type', 'target_zone', 'corner_side'])
.size()
.reset_index(name='count')
)
analysis['preferred_delivery'] = corners['corner_type'].mode()[0]
analysis['most_targeted_zone'] = corners['target_zone'].mode()[0]
# Free kick analysis
free_kicks = team_events[
(team_events['type'] == 'Pass') &
(team_events['play_pattern'] == 'From Free Kick')
].copy()
if len(free_kicks) > 0:
free_kicks['fk_x'] = free_kicks['location'].apply(
lambda x: x[0] if isinstance(x, list) else 60
)
free_kicks['fk_zone'] = pd.cut(
free_kicks['fk_x'],
bins=[0, 80, 100, 120],
labels=['Build-up', 'Crossing Position', 'Shooting Range']
)
analysis['free_kick_zones'] = (
free_kicks
.groupby('fk_zone')
.size()
.reset_index(name='count')
)
return analysis
set_piece_analysis = analyze_set_pieces(opponent_events, 'Barcelona')
def visualize_corner_patterns(corner_data):
"""
Visualize corner delivery patterns.
"""
fig, ax = plt.subplots(figsize=(10, 6))
pivot_data = corner_data.pivot_table(
index='target_zone',
columns='corner_type',
values='count',
aggfunc='sum',
fill_value=0
)
pivot_data.plot(kind='bar', ax=ax)
ax.set_xlabel('Target Zone')
ax.set_ylabel('Count')
ax.set_title('Opposition Corner Delivery Patterns')
ax.legend(title='Delivery Type')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
if 'corner_patterns' in set_piece_analysis:
visualize_corner_patterns(set_piece_analysis['corner_patterns'])# Analyze opposition set piece routines
analyze_set_pieces <- function(events, team_name) {
team_events <- events %>%
filter(team.name == team_name)
# Corner analysis
corners <- team_events %>%
filter(type.name == "Pass", pass.type.name == "Corner")
corner_analysis <- corners %>%
mutate(
corner_type = case_when(
pass.technique.name == "Inswinging" ~ "Inswinger",
pass.technique.name == "Outswinging" ~ "Outswinger",
pass.length < 15 ~ "Short",
TRUE ~ "Driven"
),
target_zone = case_when(
pass.end_location.x > 114 & pass.end_location.y > 44 ~ "Far Post",
pass.end_location.x > 114 & pass.end_location.y < 36 ~ "Near Post",
pass.end_location.x > 114 ~ "Central",
TRUE ~ "Edge of Box"
),
corner_side = ifelse(location.y < 40, "Right", "Left")
) %>%
group_by(corner_type, target_zone, corner_side) %>%
summarise(
count = n(),
shot_created = sum(!is.na(pass.shot_assist) | !is.na(pass.goal_assist)),
.groups = "drop"
)
# Free kick analysis
free_kicks <- team_events %>%
filter(
type.name == "Pass",
play_pattern.name == "From Free Kick"
) %>%
mutate(
fk_zone = case_when(
location.x > 100 ~ "Shooting Range",
location.x > 80 ~ "Crossing Position",
TRUE ~ "Build-up"
),
delivery_type = case_when(
pass.cross == TRUE ~ "Cross",
pass.length < 10 ~ "Short",
TRUE ~ "Direct"
)
)
# Defensive set piece organization
defensive_corners <- events %>%
filter(
team.name != team_name,
type.name == "Pass",
pass.type.name == "Corner"
)
defensive_system <- defensive_corners %>%
# Analyze team positioning from clearing actions
left_join(
events %>%
filter(team.name == team_name, type.name == "Clearance") %>%
select(match_id, index, clearance_location_x = location.x,
clearance_location_y = location.y, clearing_player = player.name),
by = c("match_id")
) %>%
filter(index < index + 10) # Within 10 events of corner
return(list(
corner_delivery = corner_analysis,
free_kick_patterns = free_kicks,
defensive_organization = defensive_system,
# Key stats
summary = list(
corners_taken = nrow(corners),
preferred_delivery = corner_analysis %>%
group_by(corner_type) %>%
summarise(n = sum(count)) %>%
arrange(desc(n)) %>%
slice(1) %>%
pull(corner_type),
most_targeted_zone = corner_analysis %>%
group_by(target_zone) %>%
summarise(n = sum(count)) %>%
arrange(desc(n)) %>%
slice(1) %>%
pull(target_zone)
)
))
}
set_piece_analysis <- analyze_set_pieces(opponent_events, "Barcelona")
# Visualize corner delivery patterns
visualize_corner_patterns <- function(corners_data) {
ggplot(corners_data, aes(x = target_zone, y = count, fill = corner_type)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~corner_side) +
labs(
title = "Opposition Corner Delivery Patterns",
x = "Target Zone", y = "Count", fill = "Delivery Type"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}Generating Tactical Recommendations
The final step is translating data insights into actionable tactical recommendations for coaches and players.
def generate_opposition_report(team_name, style, weaknesses, key_players, set_pieces):
"""
Generate comprehensive opposition analysis report.
"""
report = {
'team': team_name,
'summary': '',
'recommendations': {}
}
# Determine style label
if style['possession_share'] > 55 and style['avg_pass_length'] < 18:
style_label = 'possession-based'
elif style['long_ball_pct'] > 15:
style_label = 'direct'
elif style['pressures_per_90'] > 150:
style_label = 'high-pressing'
else:
style_label = 'balanced'
# Identify main threat
if style['crosses_per_90'] > 5:
main_threat = 'wide areas and crosses'
elif style['xg_per_shot'] > 0.12:
main_threat = 'high-quality chances centrally'
else:
main_threat = 'patient build-up play'
# Executive summary
report['summary'] = (
f"{team_name} play a {style_label} style with "
f"{style['possession_share']:.0f}% possession. "
f"They are most dangerous through {main_threat}."
)
# Pressing recommendations
if style['high_press_pct'] > 40:
report['recommendations']['against_press'] = {
'situation': 'Against their high press',
'recommendation': 'Use long balls to bypass press, target channels behind fullbacks',
'key_players': 'Goalkeeper and CBs need to be comfortable under pressure'
}
else:
report['recommendations']['in_possession'] = {
'situation': 'Against their mid-block',
'recommendation': 'Patient build-up, switches of play to create overloads',
'key_players': 'Midfielders need to find pockets of space'
}
# Defensive recommendations
report['recommendations']['defensive'] = []
if style['crosses_per_90'] > 5:
report['recommendations']['defensive'].append(
'Expect crossing - ensure good box coverage'
)
if weaknesses['transition_vulnerability']['xg'] > 2:
report['recommendations']['offensive'] = {
'transitions': 'Exploit on transitions - they concede from counters'
}
# Key player strategy
if len(key_players['top_threats']) > 0:
top_threat = key_players['top_threats'].iloc[0]
report['recommendations']['key_player'] = {
'player': top_threat['player'],
'position': top_threat['position'],
'strategy': generate_player_strategy(top_threat)
}
# Set piece tactics
if set_pieces:
report['recommendations']['set_pieces'] = {
'corners': f"Most corners are {set_pieces.get('preferred_delivery', 'varied')} "
f"targeting the {set_pieces.get('most_targeted_zone', 'central area')}",
'defensive_priority': 'Assign marker to their tallest player at corners'
}
return report
def generate_player_strategy(player):
"""
Generate neutralization strategy for key player.
"""
position = player['position']
if position in ['Center Forward', 'Striker']:
return "Man-mark in box, deny service, press when receiving back to goal"
elif position in ['Attacking Midfield', 'Right Wing', 'Left Wing']:
return "Double up when on ball, prevent turns, force to weaker foot"
else:
return "Press early, prevent easy progression, force long balls"
# Generate report
opposition_report = generate_opposition_report(
'Barcelona',
barcelona_style,
barcelona_weaknesses,
key_players,
set_piece_analysis
)
print("OPPOSITION ANALYSIS REPORT")
print("=" * 50)
print(f"\n{opposition_report['summary']}\n")
print("TACTICAL RECOMMENDATIONS:")
for category, rec in opposition_report['recommendations'].items():
print(f"\n{category.upper()}:")
if isinstance(rec, dict):
for k, v in rec.items():
print(f" - {k}: {v}")
elif isinstance(rec, list):
for item in rec:
print(f" - {item}")# Generate comprehensive opposition report
generate_opposition_report <- function(team_name, style, weaknesses, key_players, set_pieces) {
report <- list()
# Executive summary
report$summary <- paste0(
team_name, " play a ", determine_style_label(style), " style ",
"with ", round(style$possession_share, 0), "% possession. ",
"They are most dangerous through ", identify_main_threat(style), ". ",
"Key vulnerabilities include ", identify_main_weakness(weaknesses), "."
)
# Tactical recommendations
report$recommendations <- list()
# Pressing recommendations
if (style$high_press_pct > 40) {
report$recommendations$pressing <- list(
situation = "Against their high press",
recommendation = "Use long balls to bypass press, target channels behind fullbacks",
key_players = "Goalkeeper and CBs need to be comfortable under pressure"
)
} else {
report$recommendations$pressing <- list(
situation = "Against their mid-block",
recommendation = "Patient build-up, look for switches of play to create overloads",
key_players = "Midfielders need to find pockets of space"
)
}
# Defensive recommendations
if (style$crosses_per_90 > 5) {
report$recommendations$defensive$wide_areas <- "Expect crossing - ensure good box coverage, mark aerially strong players"
}
if (weaknesses$transition_vulnerability$transition_xg > 2) {
report$recommendations$offensive$transitions <- "Exploit on transitions - they concede dangerous chances from counter-attacks"
}
# Key player neutralization
top_threat <- key_players$top_threats[1, ]
report$recommendations$key_player <- list(
player = top_threat$player.name,
position = top_threat$position.name,
strategy = generate_player_strategy(top_threat)
)
# Set piece tactics
report$recommendations$set_pieces <- list(
corners = paste("Most corners are", set_pieces$summary$preferred_delivery,
"targeting the", set_pieces$summary$most_targeted_zone),
defensive_priority = "Assign marker to their tallest player at corners"
)
return(report)
}
# Helper functions
determine_style_label <- function(style) {
if (style$possession_share > 55 && style$avg_pass_length < 18) {
return("possession-based")
} else if (style$long_ball_pct > 15) {
return("direct")
} else if (style$pressures_per_90 > 150) {
return("high-pressing")
} else {
return("balanced")
}
}
identify_main_threat <- function(style) {
if (style$crosses_per_90 > 5) {
return("wide areas and crosses")
} else if (style$xg_per_shot > 0.12) {
return("high-quality chances centrally")
} else {
return("patient build-up play")
}
}
# Generate and print report
opposition_report <- generate_opposition_report(
"Barcelona",
barcelona_style,
barcelona_weaknesses,
key_players,
set_piece_analysis
)
cat(opposition_report$summary)Practice Exercises
Exercise 23.1: Complete Opposition Analysis Report Generator
Task: Build an automated opposition report generator that analyzes a team's last 5-10 matches and produces a comprehensive tactical report including playing style metrics, weakness identification, key player analysis, and actionable recommendations.
Requirements:
- Calculate 15+ style metrics (possession, directness, pressing, width, etc.)
- Compare opponent to league average
- Identify top 3 exploitable weaknesses
- Profile top 5 dangerous players
- Generate text-based tactical recommendations
import pandas as pd
import numpy as np
from collections import defaultdict
# Simulate match event data
np.random.seed(42)
teams = ["Manchester City", "Liverpool", "Arsenal", "Chelsea",
"Man United", "Tottenham", "Newcastle", "Brighton"]
def simulate_match_events(team_name, n_matches=10):
"""Simulate event data for a team across multiple matches."""
all_events = []
for match_id in range(1, n_matches + 1):
n_events = np.random.randint(400, 601)
events = pd.DataFrame({
'match_id': match_id,
'team': team_name,
'minute': sorted(np.random.randint(1, 96, n_events)),
'event_type': np.random.choice(
['Pass', 'Carry', 'Shot', 'Dribble', 'Pressure', 'Duel',
'Clearance', 'Interception', 'Cross', 'Foul'],
n_events,
p=[0.55, 0.12, 0.03, 0.05, 0.08, 0.07, 0.04, 0.03, 0.02, 0.01]
),
'location_x': np.random.uniform(0, 120, n_events),
'location_y': np.random.uniform(0, 80, n_events),
'end_location_x': np.random.uniform(0, 120, n_events),
'end_location_y': np.random.uniform(0, 80, n_events),
'pass_length': np.random.uniform(2, 50, n_events),
'pass_outcome': np.random.choice(['Complete', 'Incomplete'], n_events, p=[0.8, 0.2]),
'player_name': np.random.choice([f'Player_{i}' for i in range(1, 15)], n_events)
})
# Add shot-specific columns
shots_mask = events['event_type'] == 'Shot'
events['shot_xg'] = np.where(shots_mask, np.random.uniform(0.02, 0.6, n_events), np.nan)
events['shot_outcome'] = np.where(
shots_mask,
np.random.choice(['Goal', 'Saved', 'Off Target', 'Blocked'], n_events, p=[0.1, 0.4, 0.3, 0.2]),
None
)
dribble_mask = events['event_type'] == 'Dribble'
events['dribble_outcome'] = np.where(
dribble_mask,
np.random.choice(['Complete', 'Incomplete'], n_events, p=[0.55, 0.45]),
None
)
all_events.append(events)
return pd.concat(all_events, ignore_index=True)
# Generate data for all teams
all_team_data = pd.concat([simulate_match_events(team) for team in teams], ignore_index=True)
def calculate_style_metrics(team_events):
"""Calculate comprehensive style metrics for a team."""
passes = team_events[team_events['event_type'] == 'Pass']
shots = team_events[team_events['event_type'] == 'Shot']
pressures = team_events[team_events['event_type'] == 'Pressure']
crosses = team_events[team_events['event_type'] == 'Cross']
dribbles = team_events[team_events['event_type'] == 'Dribble']
n_matches = team_events['match_id'].nunique()
return {
# Possession & Build-up
'passes_per_90': len(passes) / n_matches,
'pass_completion': (passes['pass_outcome'] == 'Complete').mean() * 100,
'avg_pass_length': passes['pass_length'].mean(),
'long_ball_pct': (passes['pass_length'] > 30).mean() * 100,
'progressive_pass_pct': ((passes['end_location_x'] - passes['location_x']) > 10).mean() * 100,
# Possession zones
'possession_own_third': (team_events['location_x'] < 40).mean() * 100,
'possession_mid_third': ((team_events['location_x'] >= 40) & (team_events['location_x'] < 80)).mean() * 100,
'possession_final_third': (team_events['location_x'] >= 80).mean() * 100,
# Width
'avg_width': team_events['location_y'].std(),
'left_side_pct': (team_events['location_y'] < 30).mean() * 100,
'right_side_pct': (team_events['location_y'] > 50).mean() * 100,
# Pressing
'pressures_per_90': len(pressures) / n_matches,
'high_press_pct': (pressures['location_x'] > 80).mean() * 100 if len(pressures) > 0 else 0,
# Attacking
'shots_per_90': len(shots) / n_matches,
'xg_per_shot': shots['shot_xg'].mean() if len(shots) > 0 else 0,
'xg_per_90': shots['shot_xg'].sum() / n_matches if len(shots) > 0 else 0,
'shot_accuracy': shots['shot_outcome'].isin(['Goal', 'Saved']).mean() * 100 if len(shots) > 0 else 0,
# Crosses & Dribbles
'crosses_per_90': len(crosses) / n_matches,
'dribbles_per_90': len(dribbles) / n_matches,
'dribble_success': (dribbles['dribble_outcome'] == 'Complete').mean() * 100 if len(dribbles) > 0 else 0,
# Directness
'directness_index': (shots['shot_xg'].sum() / len(passes)) * 1000 if len(passes) > 0 else 0
}
# Calculate metrics for all teams
league_metrics = {team: calculate_style_metrics(all_team_data[all_team_data['team'] == team])
for team in teams}
league_metrics_df = pd.DataFrame(league_metrics).T.reset_index().rename(columns={'index': 'team'})
# Calculate league average
league_avg = {k: np.mean([m[k] for m in league_metrics.values()]) for k in league_metrics[teams[0]].keys()}
def generate_opposition_report(opponent_name, team_data, league_metrics_dict, league_avg):
"""Generate comprehensive opposition analysis report."""
opponent_events = team_data[team_data['team'] == opponent_name]
opponent_style = league_metrics_dict[opponent_name]
print("\n" + "=" * 72)
print(f" OPPOSITION ANALYSIS REPORT: {opponent_name}")
print("=" * 72 + "\n")
# 1. Style Profile
print("1. PLAYING STYLE PROFILE")
print("-" * 50)
# Determine style label
if (opponent_style['possession_final_third'] > league_avg['possession_final_third'] and
opponent_style['avg_pass_length'] < league_avg['avg_pass_length']):
style_label = "Possession-Based"
elif opponent_style['long_ball_pct'] > league_avg['long_ball_pct'] + 5:
style_label = "Direct"
elif opponent_style['high_press_pct'] > league_avg['high_press_pct'] + 10:
style_label = "High-Pressing"
elif opponent_style['crosses_per_90'] > league_avg['crosses_per_90'] + 2:
style_label = "Wing-Focused"
else:
style_label = "Balanced"
print(f"Style Classification: {style_label}\n")
# Key metrics comparison
key_metrics = ['pass_completion', 'progressive_pass_pct', 'pressures_per_90',
'high_press_pct', 'shots_per_90', 'xg_per_shot', 'crosses_per_90']
print("Key Metrics vs League Average:")
for metric in key_metrics:
opp_val = opponent_style[metric]
league_val = league_avg[metric]
direction = "ABOVE" if opp_val > league_val else "BELOW"
print(f" {metric.replace('_', ' '):<25}: {opp_val:.1f} (League: {league_val:.1f}) - {direction}")
# 2. Weakness Analysis
print("\n2. EXPLOITABLE WEAKNESSES")
print("-" * 50)
weaknesses = []
if opponent_style['high_press_pct'] < league_avg['high_press_pct'] - 5:
weaknesses.append("LOW PRESS - Space available in midfield. Build from back confidently.")
if opponent_style['pass_completion'] < league_avg['pass_completion'] - 3:
weaknesses.append("TURNOVER PRONE - Press high to force errors and create transitions.")
if opponent_style['left_side_pct'] < opponent_style['right_side_pct'] - 10:
weaknesses.append("WEAK LEFT SIDE - Attack down their left; overload with RW + RB.")
if opponent_style['dribble_success'] < league_avg['dribble_success'] - 5:
weaknesses.append("POOR DRIBBLE DEFENSE - 1v1 situations favourable.")
if not weaknesses:
print(" No major weaknesses identified.")
else:
for i, weakness in enumerate(weaknesses, 1):
print(f" {i}. {weakness}")
# 3. Key Player Threats
print("\n3. KEY PLAYER THREATS")
print("-" * 50)
player_stats = opponent_events.groupby('player_name').agg({
'event_type': 'count',
'shot_xg': 'sum',
'shot_outcome': lambda x: (x == 'Goal').sum(),
}).reset_index()
player_stats.columns = ['player', 'events', 'xg', 'goals']
# Add key passes
key_passes = opponent_events[
(opponent_events['event_type'] == 'Pass') &
(opponent_events['end_location_x'] > 100)
].groupby('player_name').size().reset_index(name='key_passes')
player_stats = player_stats.merge(key_passes, left_on='player', right_on='player_name', how='left')
player_stats['key_passes'] = player_stats['key_passes'].fillna(0)
player_stats['threat_score'] = player_stats['xg'] * 5 + player_stats['key_passes'] * 0.5
top_threats = player_stats.nlargest(5, 'threat_score')
for i, (_, p) in enumerate(top_threats.iterrows(), 1):
print(f" {i}. {p['player']} - Threat Score: {p['threat_score']:.1f}")
print(f" xG: {p['xg']:.2f} | Goals: {int(p['goals'])} | Key Passes: {int(p['key_passes'])}\n")
# 4. Tactical Recommendations
print("4. TACTICAL RECOMMENDATIONS")
print("-" * 50)
print("\n IN POSSESSION:")
if opponent_style['high_press_pct'] > league_avg['high_press_pct']:
print(" - Play through their press with quick combinations")
print(" - Use wide areas to escape pressure")
print(" - Goalkeeper comfortable playing with feet essential")
else:
print(" - Build patiently; they don't press high")
print(" - Look for progressive passes into midfield pockets")
print(" - Switch play to exploit width")
print("\n OUT OF POSSESSION:")
if opponent_style['long_ball_pct'] > league_avg['long_ball_pct']:
print(" - Win second balls; strong aerial presence needed")
print(" - Compress midfield to limit space for knockdowns")
else:
print(" - Press in middle third to disrupt build-up")
print(" - Man-mark their key playmaker")
print("\n" + "=" * 72)
return {
'style': opponent_style,
'weaknesses': weaknesses,
'threats': top_threats
}
# Generate report for Manchester City
report = generate_opposition_report("Manchester City", all_team_data, league_metrics, league_avg)library(tidyverse)
# Simulate comprehensive match event data
set.seed(42)
# Generate team and league data
teams <- c("Manchester City", "Liverpool", "Arsenal", "Chelsea",
"Man United", "Tottenham", "Newcastle", "Brighton")
# Function to simulate match events
simulate_match_events <- function(team_name, n_matches = 10) {
map_dfr(1:n_matches, function(m) {
n_events <- sample(400:600, 1)
tibble(
match_id = m,
team = team_name,
minute = sort(sample(1:95, n_events, replace = TRUE)),
event_type = sample(c("Pass", "Carry", "Shot", "Dribble", "Pressure", "Duel",
"Clearance", "Interception", "Cross", "Foul"),
n_events, replace = TRUE,
prob = c(0.55, 0.12, 0.03, 0.05, 0.08, 0.07, 0.04, 0.03, 0.02, 0.01)),
location_x = runif(n_events, 0, 120),
location_y = runif(n_events, 0, 80),
end_location_x = runif(n_events, 0, 120),
end_location_y = runif(n_events, 0, 80),
pass_length = runif(n_events, 2, 50),
pass_outcome = sample(c("Complete", "Incomplete"), n_events, replace = TRUE, prob = c(0.8, 0.2)),
shot_xg = ifelse(event_type == "Shot", runif(n_events, 0.02, 0.6), NA),
shot_outcome = ifelse(event_type == "Shot",
sample(c("Goal", "Saved", "Off Target", "Blocked"),
n_events, replace = TRUE, prob = c(0.1, 0.4, 0.3, 0.2)), NA),
dribble_outcome = ifelse(event_type == "Dribble",
sample(c("Complete", "Incomplete"), n_events, replace = TRUE, prob = c(0.55, 0.45)), NA),
player_name = sample(paste0("Player_", 1:14), n_events, replace = TRUE)
)
})
}
# Generate data for all teams
all_team_data <- map_dfr(teams, ~simulate_match_events(.x))
# Comprehensive style metrics calculation
calculate_style_metrics <- function(team_events) {
passes <- team_events %>% filter(event_type == "Pass")
shots <- team_events %>% filter(event_type == "Shot")
pressures <- team_events %>% filter(event_type == "Pressure")
crosses <- team_events %>% filter(event_type == "Cross")
dribbles <- team_events %>% filter(event_type == "Dribble")
n_matches <- n_distinct(team_events$match_id)
tibble(
# Possession & Build-up
passes_per_90 = nrow(passes) / n_matches,
pass_completion = mean(passes$pass_outcome == "Complete") * 100,
avg_pass_length = mean(passes$pass_length),
long_ball_pct = mean(passes$pass_length > 30) * 100,
progressive_pass_pct = mean(passes$end_location_x - passes$location_x > 10) * 100,
backward_pass_pct = mean(passes$end_location_x - passes$location_x < -5) * 100,
# Possession zones
possession_own_third = mean(team_events$location_x < 40) * 100,
possession_mid_third = mean(team_events$location_x >= 40 & team_events$location_x < 80) * 100,
possession_final_third = mean(team_events$location_x >= 80) * 100,
# Width
avg_width = sd(team_events$location_y),
left_side_pct = mean(team_events$location_y < 30) * 100,
right_side_pct = mean(team_events$location_y > 50) * 100,
# Pressing
pressures_per_90 = nrow(pressures) / n_matches,
high_press_pct = mean(pressures$location_x > 80) * 100,
mid_press_pct = mean(pressures$location_x >= 40 & pressures$location_x <= 80) * 100,
# Attacking
shots_per_90 = nrow(shots) / n_matches,
xg_per_shot = mean(shots$shot_xg, na.rm = TRUE),
xg_per_90 = sum(shots$shot_xg, na.rm = TRUE) / n_matches,
shot_accuracy = mean(shots$shot_outcome %in% c("Goal", "Saved"), na.rm = TRUE) * 100,
# Crosses & Dribbles
crosses_per_90 = nrow(crosses) / n_matches,
dribbles_per_90 = nrow(dribbles) / n_matches,
dribble_success = mean(dribbles$dribble_outcome == "Complete", na.rm = TRUE) * 100,
# Directness
directness_index = (sum(shots$shot_xg, na.rm = TRUE) / nrow(passes)) * 1000
)
}
# Calculate metrics for all teams
league_metrics <- map_dfr(teams, function(t) {
team_events <- all_team_data %>% filter(team == t)
metrics <- calculate_style_metrics(team_events)
metrics$team <- t
metrics
})
# League average
league_avg <- league_metrics %>%
summarise(across(where(is.numeric), mean, na.rm = TRUE)) %>%
mutate(team = "League Average")
# Opposition report generator
generate_opposition_report <- function(opponent_name, team_data, league_metrics, league_avg) {
opponent_events <- team_data %>% filter(team == opponent_name)
opponent_style <- league_metrics %>% filter(team == opponent_name)
cat("\n")
cat("=" , rep("=", 70), "\n", sep = "")
cat(" OPPOSITION ANALYSIS REPORT: ", opponent_name, "\n", sep = "")
cat("=" , rep("=", 70), "\n\n", sep = "")
# 1. Style Profile
cat("1. PLAYING STYLE PROFILE\n")
cat(rep("-", 50), "\n", sep = "")
style_label <- case_when(
opponent_style$possession_final_third > league_avg$possession_final_third &
opponent_style$avg_pass_length < league_avg$avg_pass_length ~ "Possession-Based",
opponent_style$long_ball_pct > league_avg$long_ball_pct + 5 ~ "Direct",
opponent_style$high_press_pct > league_avg$high_press_pct + 10 ~ "High-Pressing",
opponent_style$crosses_per_90 > league_avg$crosses_per_90 + 2 ~ "Wing-Focused",
TRUE ~ "Balanced"
)
cat("Style Classification:", style_label, "\n\n")
# Key metrics vs league
key_metrics <- c("pass_completion", "progressive_pass_pct", "pressures_per_90",
"high_press_pct", "shots_per_90", "xg_per_shot", "crosses_per_90")
cat("Key Metrics vs League Average:\n")
for (metric in key_metrics) {
opp_val <- opponent_style[[metric]]
league_val <- league_avg[[metric]]
diff <- opp_val - league_val
direction <- ifelse(diff > 0, "ABOVE", "BELOW")
cat(sprintf(" %-25s: %.1f (League: %.1f) - %s\n",
gsub("_", " ", metric), opp_val, league_val, direction))
}
# 2. Weakness Analysis
cat("\n2. EXPLOITABLE WEAKNESSES\n")
cat(rep("-", 50), "\n", sep = "")
weaknesses <- list()
# Check for weaknesses
if (opponent_style$high_press_pct < league_avg$high_press_pct - 5) {
weaknesses$press <- "LOW PRESS - Space available in midfield. Build from back confidently."
}
if (opponent_style$pass_completion < league_avg$pass_completion - 3) {
weaknesses$turnovers <- "TURNOVER PRONE - Press high to force errors and create transitions."
}
if (opponent_style$left_side_pct < opponent_style$right_side_pct - 10) {
weaknesses$left_side <- "WEAK LEFT SIDE - Attack down their left; overload with RW + RB."
}
if (opponent_style$possession_own_third > league_avg$possession_own_third + 5) {
weaknesses$deep_play <- "DEEP BUILD-UP - High press can force long balls and turnovers."
}
if (opponent_style$dribble_success < league_avg$dribble_success - 5) {
weaknesses$dribble_def <- "POOR DRIBBLE DEFENSE - 1v1 situations favourable; encourage attackers to take on players."
}
if (length(weaknesses) == 0) {
cat(" No major weaknesses identified.\n")
} else {
for (i in seq_along(weaknesses)) {
cat(sprintf(" %d. %s\n", i, weaknesses[[i]]))
}
}
# 3. Key Player Analysis
cat("\n3. KEY PLAYER THREATS\n")
cat(rep("-", 50), "\n", sep = "")
player_threats <- opponent_events %>%
group_by(player_name) %>%
summarise(
events = n(),
shots = sum(event_type == "Shot"),
xg = sum(shot_xg, na.rm = TRUE),
goals = sum(shot_outcome == "Goal", na.rm = TRUE),
key_passes = sum(event_type == "Pass" & end_location_x > 100, na.rm = TRUE),
progressive_carries = sum(event_type == "Carry" & (end_location_x - location_x) > 15),
pressures = sum(event_type == "Pressure"),
.groups = "drop"
) %>%
mutate(
threat_score = xg * 5 + key_passes * 0.5 + progressive_carries * 0.3
) %>%
arrange(desc(threat_score)) %>%
head(5)
for (i in 1:nrow(player_threats)) {
p <- player_threats[i, ]
cat(sprintf(" %d. %s - Threat Score: %.1f\n", i, p$player_name, p$threat_score))
cat(sprintf(" xG: %.2f | Shots: %d | Key Passes: %d\n\n", p$xg, p$shots, p$key_passes))
}
# 4. Tactical Recommendations
cat("4. TACTICAL RECOMMENDATIONS\n")
cat(rep("-", 50), "\n", sep = "")
cat("\n IN POSSESSION:\n")
if (opponent_style$high_press_pct > league_avg$high_press_pct) {
cat(" - Play through their press with quick combinations\n")
cat(" - Use wide areas to escape pressure\n")
cat(" - Goalkeeper comfortable playing with feet essential\n")
} else {
cat(" - Build patiently; they don't press high\n")
cat(" - Look for progressive passes into midfield pockets\n")
cat(" - Switch play to exploit width\n")
}
cat("\n OUT OF POSSESSION:\n")
if (opponent_style$long_ball_pct > league_avg$long_ball_pct) {
cat(" - Win second balls; strong aerial presence needed\n")
cat(" - Compress midfield to limit space for knockdowns\n")
cat(" - Fullbacks stay tight to wingers\n")
} else {
cat(" - Press in middle third to disrupt build-up\n")
cat(" - Man-mark their key playmaker\n")
cat(" - Force play into wide areas\n")
}
cat("\n SET PIECES:\n")
cat(" - Assign dedicated markers for aerial threats\n")
cat(" - Watch for short corner routines\n")
cat("\n")
cat("=" , rep("=", 70), "\n", sep = "")
return(invisible(list(
style = opponent_style,
weaknesses = weaknesses,
threats = player_threats
)))
}
# Generate report for Manchester City
report <- generate_opposition_report("Manchester City", all_team_data, league_metrics, league_avg)
# Radar chart comparison
create_style_radar <- function(team_name, league_metrics, league_avg) {
# Select key metrics for radar
radar_metrics <- c("pass_completion", "progressive_pass_pct", "pressures_per_90",
"high_press_pct", "xg_per_90", "directness_index")
team_vals <- league_metrics %>%
filter(team == team_name) %>%
select(all_of(radar_metrics)) %>%
pivot_longer(everything(), names_to = "metric", values_to = "team_value")
avg_vals <- league_avg %>%
select(all_of(radar_metrics)) %>%
pivot_longer(everything(), names_to = "metric", values_to = "avg_value")
radar_data <- team_vals %>%
left_join(avg_vals, by = "metric") %>%
mutate(
# Normalize to 0-100 scale
team_norm = (team_value / max(team_value, avg_value)) * 100,
avg_norm = (avg_value / max(team_value, avg_value)) * 100
)
print(radar_data)
}Exercise 23.2: Set Piece Analysis & Pattern Detection
Task: Build a specialized set piece analysis system that identifies opponent corner and free kick patterns, categorizes delivery types, maps target zones, and identifies their most dangerous aerial threats.
Requirements:
- Categorize corner deliveries (inswing, outswing, short, driven)
- Map target zones (near post, far post, edge of box, etc.)
- Calculate xG from set pieces vs open play
- Identify aerial threat players by position
- Generate defensive set piece recommendations
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Simulate set piece data
np.random.seed(42)
n_setpieces = 150
set_piece_data = pd.DataFrame({
'match_id': np.random.randint(1, 16, n_setpieces),
'set_piece_type': np.random.choice(['Corner', 'Free Kick'], n_setpieces, p=[0.6, 0.4]),
'side': np.random.choice(['Left', 'Right'], n_setpieces),
'taker': np.random.choice([f'Taker_{i}' for i in range(1, 5)], n_setpieces),
'delivery_type': np.random.choice(
['Inswinger', 'Outswinger', 'Short', 'Driven', 'Floated'],
n_setpieces, p=[0.35, 0.25, 0.15, 0.15, 0.1]
),
'target_zone': np.random.choice(
['Near Post', 'Central 6-yard', 'Far Post', 'Penalty Spot', 'Edge of Box'],
n_setpieces, p=[0.2, 0.3, 0.25, 0.15, 0.1]
),
'outcome': np.random.choice(
['Cleared', 'Shot Created', 'Goal', 'Keeper Claim', 'Out of Play', 'Second Ball'],
n_setpieces, p=[0.35, 0.2, 0.05, 0.15, 0.15, 0.1]
),
'first_contact': np.random.choice(
['Defender_1', 'Defender_2', 'Midfielder_1', 'Forward_1', 'Forward_2', 'Own Player'],
n_setpieces, p=[0.25, 0.2, 0.15, 0.2, 0.15, 0.05]
)
})
# Add xG for shots
set_piece_data['shot_xg'] = np.where(
set_piece_data['outcome'].isin(['Shot Created', 'Goal']),
np.random.uniform(0.05, 0.35, n_setpieces),
np.nan
)
# Free kick specific
set_piece_data['fk_distance'] = np.where(
set_piece_data['set_piece_type'] == 'Free Kick',
np.random.uniform(20, 35, n_setpieces),
np.nan
)
set_piece_data['fk_angle'] = np.where(
set_piece_data['set_piece_type'] == 'Free Kick',
np.random.choice(['Central', 'Wide Left', 'Wide Right'], n_setpieces),
None
)
def analyze_set_pieces(data):
"""Comprehensive set piece analysis."""
print("=" * 62)
print("SET PIECE ANALYSIS REPORT")
print("=" * 62 + "\n")
# 1. Overview
print("1. SET PIECE OVERVIEW")
print("-" * 40)
total = len(data)
corners = (data['set_piece_type'] == 'Corner').sum()
free_kicks = (data['set_piece_type'] == 'Free Kick').sum()
shots = data['outcome'].isin(['Shot Created', 'Goal']).sum()
goals = (data['outcome'] == 'Goal').sum()
total_xg = data['shot_xg'].sum()
print(f" Total Set Pieces: {total}")
print(f" Corners: {corners} | Free Kicks: {free_kicks}")
print(f" Shots Created: {shots} ({shots/total*100:.1f}%)")
print(f" Goals: {goals} | Total xG: {total_xg:.2f}\n")
# 2. Corner Delivery Patterns
print("2. CORNER DELIVERY PATTERNS")
print("-" * 40)
corner_data = data[data['set_piece_type'] == 'Corner']
corner_patterns = corner_data.groupby(['side', 'delivery_type']).agg({
'match_id': 'count',
'shot_xg': 'sum'
}).reset_index()
corner_patterns.columns = ['Side', 'Delivery', 'Count', 'xG']
corner_patterns = corner_patterns.sort_values(['Side', 'Count'], ascending=[True, False])
print(corner_patterns.to_string(index=False))
most_common = corner_data['delivery_type'].value_counts().idxmax()
most_common_count = corner_data['delivery_type'].value_counts().max()
print(f"\n Most Common Delivery: {most_common} ({most_common_count} corners)\n")
# 3. Target Zone Analysis
print("3. TARGET ZONE ANALYSIS")
print("-" * 40)
target_analysis = corner_data.groupby('target_zone').agg({
'match_id': 'count',
'shot_xg': 'sum',
'outcome': lambda x: (x == 'Goal').sum()
}).reset_index()
target_analysis.columns = ['Target Zone', 'Count', 'xG', 'Goals']
target_analysis['Pct'] = target_analysis['Count'] / target_analysis['Count'].sum() * 100
target_analysis = target_analysis.sort_values('Count', ascending=False)
print(target_analysis.to_string(index=False))
preferred = target_analysis.iloc[0]
print(f"\n Primary Target: {preferred['Target Zone']} ({preferred['Pct']:.1f}% of corners)\n")
# 4. Aerial Threats
print("4. AERIAL THREATS")
print("-" * 40)
shots_created = data[data['outcome'].isin(['Shot Created', 'Goal'])]
aerial_threats = shots_created.groupby('first_contact').agg({
'match_id': 'count',
'shot_xg': 'sum',
'outcome': lambda x: (x == 'Goal').sum()
}).reset_index()
aerial_threats.columns = ['Player', 'Aerial Wins', 'xG', 'Goals']
aerial_threats = aerial_threats.sort_values('xG', ascending=False)
print(aerial_threats.to_string(index=False))
top_threat = aerial_threats.iloc[0]
print(f"\n Primary Aerial Threat: {top_threat['Player']} ({top_threat['xG']:.2f} xG)")
print(" RECOMMENDATION: Assign dedicated marker to this player\n")
# 5. Free Kick Analysis
print("5. FREE KICK ANALYSIS")
print("-" * 40)
fk_data = data[data['set_piece_type'] == 'Free Kick']
fk_analysis = fk_data.groupby(['fk_angle', 'delivery_type']).agg({
'match_id': 'count',
'shot_xg': 'sum'
}).reset_index()
fk_analysis.columns = ['Angle', 'Delivery', 'Count', 'xG']
fk_analysis = fk_analysis.sort_values('xG', ascending=False)
print(fk_analysis.to_string(index=False))
# 6. Recommendations
print("\n6. DEFENSIVE RECOMMENDATIONS")
print("-" * 40)
print(" CORNERS:")
if preferred['Target Zone'] == 'Near Post':
print(" - Strong man at near post; attack ball aggressively")
print(" - Second defender 2 yards behind near post marker")
elif preferred['Target Zone'] == 'Far Post':
print(" - Stack defenders at far post; zonal coverage")
print(" - Keeper to command 6-yard box")
else:
print(" - Mixed zonal-man marking; key players man-marked")
print(" - Two at posts for short corner threat")
print(f" - Primary marker on: {top_threat['Player']}")
print("\n FREE KICKS:")
print(" - Wall: 4-5 players depending on distance")
print(" - Watch for dummy runs behind wall")
return {
'corner_patterns': corner_patterns,
'target_zones': target_analysis,
'aerial_threats': aerial_threats
}
# Run analysis
analysis = analyze_set_pieces(set_piece_data)
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Corner target zones
corner_data = set_piece_data[set_piece_data['set_piece_type'] == 'Corner']
target_counts = corner_data.groupby(['target_zone', 'delivery_type']).size().unstack(fill_value=0)
target_counts.plot(kind='bar', ax=axes[0], colormap='Set2')
axes[0].set_title('Corner Delivery by Target Zone')
axes[0].set_xlabel('Target Zone')
axes[0].set_ylabel('Count')
axes[0].tick_params(axis='x', rotation=45)
axes[0].legend(title='Delivery Type', bbox_to_anchor=(1.02, 1))
# xG by outcome
outcome_xg = set_piece_data.groupby('outcome')['shot_xg'].sum().dropna().sort_values(ascending=True)
outcome_xg.plot(kind='barh', ax=axes[1], color='coral')
axes[1].set_title('Set Piece xG by Outcome')
axes[1].set_xlabel('Total xG')
plt.tight_layout()
plt.show()library(tidyverse)
# Simulate set piece data
set.seed(42)
n_setpieces <- 150
set_piece_data <- tibble(
match_id = sample(1:15, n_setpieces, replace = TRUE),
set_piece_type = sample(c("Corner", "Free Kick"), n_setpieces,
replace = TRUE, prob = c(0.6, 0.4)),
side = sample(c("Left", "Right"), n_setpieces, replace = TRUE),
taker = sample(paste0("Taker_", 1:4), n_setpieces, replace = TRUE),
# Delivery characteristics
delivery_type = sample(c("Inswinger", "Outswinger", "Short", "Driven", "Floated"),
n_setpieces, replace = TRUE,
prob = c(0.35, 0.25, 0.15, 0.15, 0.1)),
# Target zone (6-yard box: near post, central, far post; edge of box)
target_zone = sample(c("Near Post", "Central 6-yard", "Far Post",
"Penalty Spot", "Edge of Box"),
n_setpieces, replace = TRUE,
prob = c(0.2, 0.3, 0.25, 0.15, 0.1)),
# Outcome
outcome = sample(c("Cleared", "Shot Created", "Goal", "Keeper Claim",
"Out of Play", "Second Ball"),
n_setpieces, replace = TRUE,
prob = c(0.35, 0.2, 0.05, 0.15, 0.15, 0.1)),
# First contact player
first_contact = sample(c("Defender_1", "Defender_2", "Midfielder_1",
"Forward_1", "Forward_2", "Own Player"),
n_setpieces, replace = TRUE,
prob = c(0.25, 0.2, 0.15, 0.2, 0.15, 0.05)),
# xG if shot created
shot_xg = ifelse(outcome %in% c("Shot Created", "Goal"),
runif(n_setpieces, 0.05, 0.35), NA),
# Free kick distance (for FK only)
fk_distance = ifelse(set_piece_type == "Free Kick",
runif(n_setpieces, 20, 35), NA),
fk_angle = ifelse(set_piece_type == "Free Kick",
sample(c("Central", "Wide Left", "Wide Right"), n_setpieces, replace = TRUE), NA)
)
# Analyze set piece patterns
analyze_set_pieces <- function(data) {
cat("=" , rep("=", 60), "\n", sep = "")
cat("SET PIECE ANALYSIS REPORT\n")
cat("=" , rep("=", 60), "\n\n", sep = "")
# Overall summary
cat("1. SET PIECE OVERVIEW\n")
cat(rep("-", 40), "\n", sep = "")
summary_stats <- data %>%
summarise(
total_set_pieces = n(),
corners = sum(set_piece_type == "Corner"),
free_kicks = sum(set_piece_type == "Free Kick"),
shots_created = sum(outcome %in% c("Shot Created", "Goal")),
goals = sum(outcome == "Goal"),
total_xg = sum(shot_xg, na.rm = TRUE)
)
cat(sprintf(" Total Set Pieces: %d\n", summary_stats$total_set_pieces))
cat(sprintf(" Corners: %d | Free Kicks: %d\n", summary_stats$corners, summary_stats$free_kicks))
cat(sprintf(" Shots Created: %d (%.1f%%)\n", summary_stats$shots_created,
summary_stats$shots_created / summary_stats$total_set_pieces * 100))
cat(sprintf(" Goals: %d | Total xG: %.2f\n\n", summary_stats$goals, summary_stats$total_xg))
# Corner delivery analysis
cat("2. CORNER DELIVERY PATTERNS\n")
cat(rep("-", 40), "\n", sep = "")
corner_patterns <- data %>%
filter(set_piece_type == "Corner") %>%
group_by(side, delivery_type) %>%
summarise(
count = n(),
shots = sum(outcome %in% c("Shot Created", "Goal")),
xg = sum(shot_xg, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(side, desc(count))
print(corner_patterns)
# Most common delivery
most_common <- corner_patterns %>%
group_by(delivery_type) %>%
summarise(total = sum(count), .groups = "drop") %>%
arrange(desc(total)) %>%
slice(1)
cat(sprintf("\n Most Common Delivery: %s (%d corners)\n\n", most_common$delivery_type, most_common$total))
# Target zone analysis
cat("3. TARGET ZONE ANALYSIS\n")
cat(rep("-", 40), "\n", sep = "")
target_analysis <- data %>%
filter(set_piece_type == "Corner") %>%
group_by(target_zone) %>%
summarise(
count = n(),
pct = n() / nrow(data %>% filter(set_piece_type == "Corner")) * 100,
xg = sum(shot_xg, na.rm = TRUE),
goals = sum(outcome == "Goal"),
.groups = "drop"
) %>%
arrange(desc(count))
print(target_analysis)
# Identify preferred zone
preferred_zone <- target_analysis %>% slice(1)
cat(sprintf("\n Primary Target: %s (%.1f%% of corners)\n\n",
preferred_zone$target_zone, preferred_zone$pct))
# Aerial threat analysis
cat("4. AERIAL THREATS\n")
cat(rep("-", 40), "\n", sep = "")
aerial_threats <- data %>%
filter(outcome %in% c("Shot Created", "Goal")) %>%
group_by(first_contact) %>%
summarise(
aerial_wins = n(),
xg = sum(shot_xg, na.rm = TRUE),
goals = sum(outcome == "Goal"),
.groups = "drop"
) %>%
arrange(desc(xg))
print(aerial_threats)
# Top threat recommendation
top_threat <- aerial_threats %>% slice(1)
cat(sprintf("\n Primary Aerial Threat: %s (%.2f xG from set pieces)\n", top_threat$first_contact, top_threat$xg))
cat(" RECOMMENDATION: Assign dedicated marker to this player at all corners\n\n")
# Free kick analysis
cat("5. FREE KICK ANALYSIS\n")
cat(rep("-", 40), "\n", sep = "")
fk_analysis <- data %>%
filter(set_piece_type == "Free Kick") %>%
group_by(fk_angle, delivery_type) %>%
summarise(
count = n(),
xg = sum(shot_xg, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(xg))
print(fk_analysis)
# Defensive recommendations
cat("\n6. DEFENSIVE RECOMMENDATIONS\n")
cat(rep("-", 40), "\n", sep = "")
# Based on analysis
cat(" CORNERS:\n")
if (preferred_zone$target_zone == "Near Post") {
cat(" - Strong man at near post; attack ball aggressively\n")
cat(" - Second defender 2 yards behind near post marker\n")
} else if (preferred_zone$target_zone == "Far Post") {
cat(" - Stack defenders at far post; zonal coverage\n")
cat(" - Keeper to command 6-yard box\n")
} else {
cat(" - Mixed zonal-man marking; key players man-marked\n")
cat(" - Two at posts for short corner threat\n")
}
cat(sprintf(" - Primary marker on: %s\n\n", top_threat$first_contact))
cat(" FREE KICKS:\n")
cat(" - Wall: 4-5 players depending on distance\n")
cat(" - Watch for dummy runs behind wall\n")
cat(" - One player on post for near post deliveries\n")
return(invisible(list(
corner_patterns = corner_patterns,
target_zones = target_analysis,
aerial_threats = aerial_threats
)))
}
# Run analysis
analysis <- analyze_set_pieces(set_piece_data)
# Visualization: Corner target zones
corner_viz <- set_piece_data %>%
filter(set_piece_type == "Corner") %>%
count(target_zone, delivery_type)
ggplot(corner_viz, aes(x = target_zone, y = n, fill = delivery_type)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Corner Delivery Patterns by Target Zone",
x = "Target Zone", y = "Count", fill = "Delivery Type"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Exercise 23.3: Dynamic Game Model Simulator
Task: Build a tactical game simulator that models how an opponent is likely to adjust their tactics based on match state (winning, losing, drawing) and creates scenario-based game plans.
Requirements:
- Analyze how opponent's style changes by match state
- Model formation/personnel changes when losing
- Create scenario plans (if ahead, if behind, etc.)
- Identify key substitution patterns
- Generate time-based tactical recommendations
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Simulate match state data with tactical changes
np.random.seed(42)
n_matches = 15
def generate_match_state_data(n_matches):
"""Generate tactical data that changes based on match state."""
all_data = []
for match_id in range(1, n_matches + 1):
periods = [
("0-15", 0, 15), ("15-30", 15, 30), ("30-45", 30, 45),
("45-60", 45, 60), ("60-75", 60, 75), ("75-90", 75, 90)
]
goals_for = 0
goals_against = 0
for period_name, start, end in periods:
goal_scored = np.random.binomial(1, 0.15)
goal_conceded = np.random.binomial(1, 0.12)
goals_for += goal_scored
goals_against += goal_conceded
if goals_for > goals_against:
match_state = "Winning"
elif goals_for < goals_against:
match_state = "Losing"
else:
match_state = "Drawing"
# Formation based on state and time
if match_state == "Losing" and start >= 60:
formation = np.random.choice(["4-3-3", "3-4-3", "4-2-4"], p=[0.3, 0.4, 0.3])
elif match_state == "Winning" and start >= 75:
formation = np.random.choice(["5-4-1", "4-5-1", "5-3-2"], p=[0.4, 0.4, 0.2])
else:
formation = "4-3-3"
# Tactical metrics by state
if match_state == "Losing":
pressing = np.random.uniform(70, 95)
directness = np.random.uniform(60, 85) if start >= 60 else np.random.uniform(45, 65)
def_line = np.random.uniform(55, 70)
width = np.random.uniform(65, 85)
crosses = np.random.poisson(6) if start >= 60 else np.random.poisson(3)
long_balls = np.random.poisson(8) if start >= 75 else np.random.poisson(4)
elif match_state == "Winning":
pressing = np.random.uniform(40, 60) if start >= 60 else np.random.uniform(55, 75)
directness = np.random.uniform(35, 55)
def_line = np.random.uniform(35, 50)
width = np.random.uniform(50, 70)
crosses = np.random.poisson(3)
long_balls = np.random.poisson(4)
else:
pressing = np.random.uniform(55, 75)
directness = np.random.uniform(45, 65)
def_line = np.random.uniform(45, 60)
width = np.random.uniform(50, 70)
crosses = np.random.poisson(3)
long_balls = np.random.poisson(4)
all_data.append({
'match_id': match_id,
'period': period_name,
'minute_start': start,
'minute_end': end,
'match_state': match_state,
'formation': formation,
'pressing_intensity': pressing,
'directness': directness,
'defensive_line': def_line,
'attacking_width': width,
'crosses': crosses,
'long_balls': long_balls
})
return pd.DataFrame(all_data)
match_state_data = generate_match_state_data(n_matches)
# Substitution data
substitution_data = pd.DataFrame({
'match_id': np.repeat(range(1, n_matches + 1), 3),
'sub_number': list(range(1, 4)) * n_matches,
'minute': np.concatenate([sorted(np.random.choice(range(55, 91), 3, replace=False)) for _ in range(n_matches)]),
'match_state_at_sub': np.random.choice(['Drawing', 'Winning', 'Losing'], n_matches * 3, p=[0.4, 0.3, 0.3]),
'player_off_type': np.random.choice(['Forward', 'Midfielder', 'Defender', 'Goalkeeper'], n_matches * 3, p=[0.3, 0.35, 0.3, 0.05]),
'player_on_type': np.random.choice(['Forward', 'Midfielder', 'Defender'], n_matches * 3, p=[0.4, 0.35, 0.25])
})
def analyze_state_tactics(state_data, sub_data):
"""Analyze tactical changes by match state."""
print("=" * 67)
print("DYNAMIC GAME MODEL - TACTICAL STATE ANALYSIS")
print("=" * 67 + "\n")
# 1. Style by Match State
print("1. TACTICAL PROFILE BY MATCH STATE")
print("-" * 50)
state_metrics = state_data.groupby('match_state').agg({
'pressing_intensity': 'mean',
'directness': 'mean',
'defensive_line': 'mean',
'attacking_width': 'mean',
'crosses': 'mean',
'long_balls': 'mean'
}).round(1)
print(state_metrics.to_string())
losing = state_metrics.loc['Losing']
winning = state_metrics.loc['Winning']
print("\nKey Observations:")
print(f" When LOSING: Press intensity {losing['pressing_intensity']:.0f}% (+{losing['pressing_intensity']-winning['pressing_intensity']:.0f}% vs winning)")
print(f" When WINNING: Def line at {winning['defensive_line']:.0f}m ({losing['defensive_line']-winning['defensive_line']:.0f}m deeper than losing)")
# 2. Formation Changes
print("\n2. FORMATION CHANGES BY STATE & TIME")
print("-" * 50)
late_game = state_data[state_data['minute_start'] >= 60]
formation_changes = late_game.groupby(['match_state', 'formation']).size().reset_index(name='count')
formation_changes = formation_changes.sort_values(['match_state', 'count'], ascending=[True, False])
print(formation_changes.to_string(index=False))
# 3. Substitution Patterns
print("\n3. SUBSTITUTION PATTERNS")
print("-" * 50)
sub_patterns = sub_data.groupby(['match_state_at_sub', 'player_on_type']).agg({
'minute': ['count', 'mean']
}).reset_index()
sub_patterns.columns = ['State', 'Player Type', 'Count', 'Avg Minute']
sub_patterns = sub_patterns.sort_values(['State', 'Count'], ascending=[True, False])
print(sub_patterns.to_string(index=False))
losing_subs = sub_data[sub_data['match_state_at_sub'] == 'Losing']['player_on_type'].value_counts()
top_sub = losing_subs.idxmax()
top_pct = losing_subs.max() / losing_subs.sum() * 100
print(f"\n When losing, most common sub: {top_sub} ({top_pct:.0f}% of subs)")
# 4. Time-Based Phases
print("\n4. TACTICAL PHASES BY TIME PERIOD")
print("-" * 50)
time_phases = state_data.groupby('period').agg({
'pressing_intensity': 'mean',
'directness': 'mean',
'crosses': 'mean'
}).round(1)
print(time_phases.to_string())
# 5. Scenario Plans
print("\n5. SCENARIO-BASED GAME PLANS")
print("-" * 50)
print("\n SCENARIO A: We are WINNING")
print(f" - Expect them to push higher (def line +{losing['defensive_line']-winning['defensive_line']:.0f}m)")
print(" - Counter-attacks become more viable")
print(f" - Watch for increased crosses (+{losing['crosses']-state_metrics['crosses'].mean():.1f} per 15 min)")
print(" - Likely formation change to 3-4-3 or 4-2-4 after 60'")
print("\n SCENARIO B: We are LOSING")
print(" - They will drop deeper and reduce pressing")
print(" - Expect 5-4-1 or 5-3-2 formation after 75'")
print(" - Counter-attacks become their primary threat")
print("\n SCENARIO C: DRAWING late (75'+)")
print(" - Both teams likely to take more risks")
print(" - Space opens up in transitions")
# 6. Key Triggers
print("\n6. KEY TACTICAL TRIGGERS TO WATCH")
print("-" * 50)
print(" Minute 60: Formation likely changes if losing")
print(" Minute 65-70: Attacking substitution if losing")
print(" Minute 75: Defensive switch if winning (5-back)")
late_long = state_data[state_data['minute_start'] >= 75]['long_balls'].mean()
overall_long = state_data['long_balls'].mean()
print(f" Minute 80+: Long balls increase (+{late_long-overall_long:.1f} per period)")
return state_metrics
game_model = analyze_state_tactics(match_state_data, substitution_data)
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Metrics by state
metrics = ['pressing_intensity', 'directness', 'defensive_line']
state_order = ['Losing', 'Drawing', 'Winning']
for metric in metrics:
means = [match_state_data[match_state_data['match_state'] == s][metric].mean() for s in state_order]
axes[0].plot(state_order, means, marker='o', label=metric)
axes[0].set_xlabel('Match State')
axes[0].set_ylabel('Value')
axes[0].set_title('Tactical Metrics by Match State')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Formation distribution when losing late
late_losing = match_state_data[
(match_state_data['match_state'] == 'Losing') &
(match_state_data['minute_start'] >= 60)
]
formation_counts = late_losing['formation'].value_counts()
axes[1].pie(formation_counts, labels=formation_counts.index, autopct='%1.0f%%', startangle=90)
axes[1].set_title('Formation When Losing (60+ min)')
plt.tight_layout()
plt.show()library(tidyverse)
# Simulate match state data with tactical changes
set.seed(42)
n_matches <- 15
# Generate match data with state changes
generate_match_state_data <- function(n_matches) {
map_dfr(1:n_matches, function(match_id) {
# Each match has multiple time periods
periods <- tibble(
match_id = match_id,
period = c("0-15", "15-30", "30-45", "45-60", "60-75", "75-90"),
minute_start = c(0, 15, 30, 45, 60, 75),
minute_end = c(15, 30, 45, 60, 75, 90)
)
# Simulate match states and goals
states <- c("Drawing", "Winning", "Losing")
current_state <- "Drawing"
goals_for <- 0
goals_against <- 0
periods <- periods %>%
rowwise() %>%
mutate(
goal_scored = rbinom(1, 1, 0.15),
goal_conceded = rbinom(1, 1, 0.12),
match_state = {
if (goal_scored) goals_for <<- goals_for + 1
if (goal_conceded) goals_against <<- goals_against + 1
if (goals_for > goals_against) "Winning"
else if (goals_for < goals_against) "Losing"
else "Drawing"
}
) %>%
ungroup()
# Add tactical metrics that change by state
periods %>%
mutate(
# Formation shifts when losing late
formation = case_when(
match_state == "Losing" & minute_start >= 60 ~ sample(c("4-3-3", "3-4-3", "4-2-4"), 1, prob = c(0.3, 0.4, 0.3)),
match_state == "Winning" & minute_start >= 75 ~ sample(c("5-4-1", "4-5-1", "5-3-2"), 1, prob = c(0.4, 0.4, 0.2)),
TRUE ~ "4-3-3"
),
# Pressing intensity by state
pressing_intensity = case_when(
match_state == "Losing" ~ runif(1, 70, 95),
match_state == "Winning" & minute_start >= 60 ~ runif(1, 40, 60),
TRUE ~ runif(1, 55, 75)
),
# Directness by state
directness = case_when(
match_state == "Losing" & minute_start >= 60 ~ runif(1, 60, 85),
match_state == "Winning" ~ runif(1, 35, 55),
TRUE ~ runif(1, 45, 65)
),
# Defensive line height
defensive_line = case_when(
match_state == "Winning" & minute_start >= 60 ~ runif(1, 35, 50),
match_state == "Losing" ~ runif(1, 55, 70),
TRUE ~ runif(1, 45, 60)
),
# Width in attack
attacking_width = case_when(
match_state == "Losing" ~ runif(1, 65, 85),
TRUE ~ runif(1, 50, 70)
),
# Crosses per period
crosses = case_when(
match_state == "Losing" & minute_start >= 60 ~ rpois(1, 6),
TRUE ~ rpois(1, 3)
),
# Long balls per period
long_balls = case_when(
match_state == "Losing" & minute_start >= 75 ~ rpois(1, 8),
TRUE ~ rpois(1, 4)
)
)
})
}
match_state_data <- generate_match_state_data(n_matches)
# Substitution patterns
substitution_data <- tibble(
match_id = rep(1:n_matches, each = 3),
sub_number = rep(1:3, n_matches),
minute = c(replicate(n_matches, sort(sample(c(55:90), 3, prob = c(rep(1, 10), rep(2, 15), rep(3, 11)))))),
match_state_at_sub = sample(c("Drawing", "Winning", "Losing"), n_matches * 3, replace = TRUE, prob = c(0.4, 0.3, 0.3)),
player_off_type = sample(c("Forward", "Midfielder", "Defender", "Goalkeeper"), n_matches * 3, replace = TRUE, prob = c(0.3, 0.35, 0.3, 0.05)),
player_on_type = sample(c("Forward", "Midfielder", "Defender"), n_matches * 3, replace = TRUE, prob = c(0.4, 0.35, 0.25))
)
# Analyze tactical changes by match state
analyze_state_tactics <- function(state_data, sub_data) {
cat("=" , rep("=", 65), "\n", sep = "")
cat("DYNAMIC GAME MODEL - TACTICAL STATE ANALYSIS\n")
cat("=" , rep("=", 65), "\n\n", sep = "")
# 1. Style by Match State
cat("1. TACTICAL PROFILE BY MATCH STATE\n")
cat(rep("-", 50), "\n", sep = "")
state_metrics <- state_data %>%
group_by(match_state) %>%
summarise(
avg_pressing = mean(pressing_intensity),
avg_directness = mean(directness),
avg_def_line = mean(defensive_line),
avg_width = mean(attacking_width),
avg_crosses = mean(crosses),
avg_long_balls = mean(long_balls),
.groups = "drop"
)
print(state_metrics)
cat("\nKey Observations:\n")
losing_data <- state_metrics %>% filter(match_state == "Losing")
winning_data <- state_metrics %>% filter(match_state == "Winning")
cat(sprintf(" When LOSING: Press intensity %.0f%% (+%.0f%% vs winning)\n",
losing_data$avg_pressing, losing_data$avg_pressing - winning_data$avg_pressing))
cat(sprintf(" When WINNING: Def line at %.0fm (%.0fm deeper than losing)\n",
winning_data$avg_def_line, losing_data$avg_def_line - winning_data$avg_def_line))
# 2. Formation Changes
cat("\n2. FORMATION CHANGES BY STATE & TIME\n")
cat(rep("-", 50), "\n", sep = "")
formation_changes <- state_data %>%
filter(minute_start >= 60) %>%
group_by(match_state, formation) %>%
summarise(count = n(), .groups = "drop") %>%
arrange(match_state, desc(count))
print(formation_changes)
# 3. Substitution Patterns
cat("\n3. SUBSTITUTION PATTERNS\n")
cat(rep("-", 50), "\n", sep = "")
sub_patterns <- sub_data %>%
group_by(match_state_at_sub, player_on_type) %>%
summarise(count = n(), avg_minute = mean(minute), .groups = "drop") %>%
arrange(match_state_at_sub, desc(count))
print(sub_patterns)
# Key sub pattern
losing_subs <- sub_data %>%
filter(match_state_at_sub == "Losing") %>%
count(player_on_type) %>%
arrange(desc(n)) %>%
slice(1)
cat(sprintf("\n When losing, most common sub: %s (%.0f%% of subs)\n",
losing_subs$player_on_type,
losing_subs$n / sum(sub_data$match_state_at_sub == "Losing") * 100))
# 4. Time-Based Tactical Phases
cat("\n4. TACTICAL PHASES BY TIME PERIOD\n")
cat(rep("-", 50), "\n", sep = "")
time_phases <- state_data %>%
group_by(period) %>%
summarise(
pressing = mean(pressing_intensity),
directness = mean(directness),
crosses = mean(crosses),
.groups = "drop"
)
print(time_phases)
# 5. Generate Scenario-Based Game Plans
cat("\n5. SCENARIO-BASED GAME PLANS\n")
cat(rep("-", 50), "\n", sep = "")
cat("\n SCENARIO A: We are WINNING\n")
cat(" - Expect them to push higher (def line +", round(losing_data$avg_def_line - winning_data$avg_def_line, 0), "m)\n")
cat(" - Counter-attacks become more viable\n")
cat(" - Watch for increased crosses (", round(losing_data$avg_crosses - mean(state_metrics$avg_crosses), 1), " more per 15 min)\n")
cat(" - Likely formation change to 3-4-3 or 4-2-4 after 60'\n")
cat(" - Expect striker brought on around 65-70'\n")
cat("\n SCENARIO B: We are LOSING\n")
cat(" - They will drop deeper and reduce pressing\n")
cat(" - Expect 5-4-1 or 5-3-2 formation after 75'\n")
cat(" - Counter-attacks become their primary threat\n")
cat(" - Watch for time-wasting substitutions late\n")
cat("\n SCENARIO C: DRAWING late (75'+)\n")
cat(" - Both teams likely to take more risks\n")
cat(" - Space opens up in transitions\n")
cat(" - Set pieces become crucial\n")
# 6. Key Tactical Triggers
cat("\n6. KEY TACTICAL TRIGGERS TO WATCH\n")
cat(rep("-", 50), "\n", sep = "")
cat(" Minute 60: Formation likely changes if losing\n")
cat(" Minute 65-70: Attacking substitution if losing\n")
cat(" Minute 75: Defensive switch if winning (5-back)\n")
cat(" Minute 80+: Long balls increase significantly (+", round(max(state_data$long_balls[state_data$minute_start >= 75]) - mean(state_data$long_balls), 1), " per period)\n")
return(invisible(list(
state_metrics = state_metrics,
formation_changes = formation_changes,
sub_patterns = sub_patterns
)))
}
# Run analysis
game_model <- analyze_state_tactics(match_state_data, substitution_data)
# Visualization
state_summary <- match_state_data %>%
pivot_longer(cols = c(pressing_intensity, directness, defensive_line),
names_to = "metric", values_to = "value")
ggplot(state_summary, aes(x = match_state, y = value, fill = metric)) +
geom_boxplot() +
facet_wrap(~metric, scales = "free_y") +
labs(
title = "Tactical Metrics by Match State",
x = "Match State", y = "Value"
) +
theme_minimal() +
theme(legend.position = "none")Summary
Key Takeaways
- Team style analysis quantifies how opponents build attacks, press, and defend through metrics like possession share, pass length, and pressing intensity
- Weakness identification reveals exploitable vulnerabilities in defensive organization, transitions, and set pieces
- Key player analysis identifies the most influential opponents and generates strategies for neutralization
- Set piece scouting uncovers delivery patterns, target zones, and defensive organization
- Tactical recommendations translate data insights into actionable strategies for coaches and players