Chapter 60

Capstone - Complete Analytics System

Intermediate 30 min read 5 sections 10 code examples
0 of 60 chapters completed (0%)

Opposition analysis provides the tactical intelligence teams need to prepare for upcoming matches. By systematically analyzing opponents' playing patterns, strengths, weaknesses, and key players, coaches can develop targeted game plans.

Team Style Analysis

Understanding how an opponent plays is the foundation of opposition analysis. We quantify playing style through metrics that capture build-up patterns, pressing intensity, and attacking approach.

team_style_analysis.py
import pandas as pd
import numpy as np
from statsbombpy import sb

# Load opponent's recent matches
matches = sb.matches(competition_id=11, season_id=90)  # La Liga
barcelona_matches = matches[
    (matches['home_team'] == 'Barcelona') |
    (matches['away_team'] == 'Barcelona')
].head(10)

# Get events for all matches
all_events = []
for match_id in barcelona_matches['match_id']:
    events = sb.events(match_id=match_id)
    all_events.append(events)
opponent_events = pd.concat(all_events, ignore_index=True)

def calculate_team_style(events, team_name):
    """
    Calculate comprehensive team style metrics.
    """
    team_events = events[events['team'] == team_name].copy()
    total_events = len(events)

    # Calculate match minutes for per-90 normalization
    match_minutes = team_events['minute'].max() * len(barcelona_matches['match_id'].unique())

    style = {}

    # Possession characteristics
    style['possession_share'] = len(team_events) / total_events * 100

    passes = team_events[team_events['type'] == 'Pass']
    style['avg_pass_length'] = passes['pass_length'].mean()
    style['long_ball_pct'] = (passes['pass_length'] > 30).mean() * 100

    # Directness - progressive passes
    passes['is_progressive'] = (
        (120 - passes['location'].apply(lambda x: x[0] if isinstance(x, list) else 0)) -
        (120 - passes['pass_end_location'].apply(lambda x: x[0] if isinstance(x, list) else 0))
    ) > 10
    style['progressive_pass_pct'] = passes['is_progressive'].mean() * 100

    # Pressing intensity
    pressures = team_events[team_events['type'] == 'Pressure']
    style['pressures_per_90'] = len(pressures) / (match_minutes / 90)

    # High press (final third)
    pressure_locations = pressures['location'].apply(
        lambda x: x[0] if isinstance(x, list) else 0
    )
    style['high_press_pct'] = (pressure_locations > 80).mean() * 100

    # Attacking approach
    shots = team_events[team_events['type'] == 'Shot']
    style['shots_per_90'] = len(shots) / (match_minutes / 90)
    style['xg_per_shot'] = shots['shot_statsbomb_xg'].mean()

    # Width
    possession_events = team_events[team_events['type'].isin(['Pass', 'Carry', 'Dribble'])]
    locations_y = possession_events['location'].apply(
        lambda x: x[1] if isinstance(x, list) else 40
    )
    style['avg_touch_width'] = locations_y.std()

    # Crosses
    crosses = passes[passes['pass_cross'] == True]
    style['crosses_per_90'] = len(crosses) / (match_minutes / 90)

    return pd.Series(style)

barcelona_style = calculate_team_style(opponent_events, 'Barcelona')

# Create style radar
def create_style_radar(team_style, team_name, league_avg):
    """
    Create radar chart comparing team style to league average.
    """
    categories = list(team_style.index)
    n = len(categories)

    # Normalize values (0-100 scale based on league range)
    normalized_team = []
    normalized_avg = []

    for cat in categories:
        team_val = team_style[cat]
        avg_val = league_avg[cat]
        max_val = max(team_val, avg_val) * 1.5

        normalized_team.append(team_val / max_val * 100)
        normalized_avg.append(avg_val / max_val * 100)

    # Create radar
    angles = [n / float(len(categories)) * 2 * np.pi for n in range(len(categories))]
    angles += angles[:1]
    normalized_team += normalized_team[:1]
    normalized_avg += normalized_avg[:1]

    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

    ax.plot(angles, normalized_team, 'o-', linewidth=2, label=team_name, color='#e74c3c')
    ax.fill(angles, normalized_team, alpha=0.25, color='#e74c3c')

    ax.plot(angles, normalized_avg, 'o-', linewidth=2, label='League Average', color='gray')
    ax.fill(angles, normalized_avg, alpha=0.1, color='gray')

    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(categories, size=10)
    ax.set_ylim(0, 100)
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1))

    plt.title(f'{team_name} Playing Style Profile', size=14, y=1.08)
    plt.tight_layout()
    plt.show()

# Calculate Barcelona's style metrics (example data)
barcelona_style = {
    'possession': 67.5,
    'progressive_passes': 12.3,
    'pressures': 145.2,
    'high_press_pct': 42.5,
    'shots': 15.8,
    'xg_per_shot': 0.12
}

# Calculate league average for comparison
league_avg = {
    'possession': 50.0,
    'progressive_passes': 8.5,
    'pressures': 120.0,
    'high_press_pct': 32.0,
    'shots': 12.0,
    'xg_per_shot': 0.10
}

create_style_radar(barcelona_style, 'Barcelona', league_avg)
library(tidyverse)
library(StatsBombR)

# Load opponent's recent matches
opponent_matches <- FreeMatches(Competitions) %>%
  filter(home_team.home_team_name == "Barcelona" |
         away_team.away_team_name == "Barcelona") %>%
  head(10)  # Last 10 matches

opponent_events <- map_dfr(opponent_matches$match_id, get.matchFree)

# Calculate team style metrics
calculate_team_style <- function(events, team_name) {

  team_events <- events %>%
    filter(team.name == team_name)

  possession_events <- team_events %>%
    filter(type.name %in% c("Pass", "Carry", "Dribble"))

  style_metrics <- list(
    # Build-up characteristics
    possession_share = nrow(team_events) / nrow(events) * 100,

    avg_pass_length = team_events %>%
      filter(type.name == "Pass") %>%
      pull(pass.length) %>%
      mean(na.rm = TRUE),

    long_ball_pct = team_events %>%
      filter(type.name == "Pass") %>%
      summarise(pct = mean(pass.length > 30, na.rm = TRUE) * 100) %>%
      pull(pct),

    # Directness
    progressive_pass_pct = team_events %>%
      filter(type.name == "Pass") %>%
      summarise(pct = mean(
        (120 - location.x) - (120 - pass.end_location.x) > 10, na.rm = TRUE
      ) * 100) %>%
      pull(pct),

    # Pressing intensity
    pressures_per_90 = team_events %>%
      filter(type.name == "Pressure") %>%
      nrow() / (sum(team_events$duration, na.rm = TRUE) / 5400) * 90,

    high_press_pct = team_events %>%
      filter(type.name == "Pressure") %>%
      summarise(pct = mean(location.x > 80, na.rm = TRUE) * 100) %>%
      pull(pct),

    # Attacking approach
    shots_per_90 = team_events %>%
      filter(type.name == "Shot") %>%
      nrow() / (sum(team_events$duration, na.rm = TRUE) / 5400) * 90,

    xg_per_shot = team_events %>%
      filter(type.name == "Shot") %>%
      summarise(avg = mean(shot.statsbomb_xg, na.rm = TRUE)) %>%
      pull(avg),

    # Width
    avg_touch_width = possession_events %>%
      summarise(width = sd(location.y, na.rm = TRUE)) %>%
      pull(width),

    # Crosses
    crosses_per_90 = team_events %>%
      filter(type.name == "Pass", pass.cross == TRUE) %>%
      nrow() / (sum(team_events$duration, na.rm = TRUE) / 5400) * 90
  )

  return(as_tibble(style_metrics))
}

barcelona_style <- calculate_team_style(opponent_events, "Barcelona")

# Compare to league average
league_avg <- all_teams %>%
  summarise(across(everything(), mean))

style_comparison <- bind_rows(
  barcelona_style %>% mutate(team = "Barcelona"),
  league_avg %>% mutate(team = "League Avg")
) %>%
  pivot_longer(-team, names_to = "metric", values_to = "value") %>%
  pivot_wider(names_from = team, values_from = value) %>%
  mutate(
    difference = Barcelona - `League Avg`,
    pct_diff = difference / `League Avg` * 100
  )

print(style_comparison)

Identifying Weaknesses

Every team has exploitable weaknesses. Data analysis can reveal defensive vulnerabilities, pressing weaknesses, and situational problems that can be targeted.

weakness_analysis.py
def analyze_defensive_weaknesses(events, team_name):
    """
    Identify defensive vulnerabilities for opposition analysis.
    """
    # Events where opponent is attacking
    defensive_events = events[events['team'] != team_name].copy()

    weaknesses = {}

    # Goals conceded by zone
    shots_conceded = defensive_events[defensive_events['type'] == 'Shot'].copy()
    shots_conceded['location_x'] = shots_conceded['location'].apply(
        lambda x: x[0] if isinstance(x, list) else 60
    )
    shots_conceded['location_y'] = shots_conceded['location'].apply(
        lambda x: x[1] if isinstance(x, list) else 40
    )

    shots_conceded['attack_zone'] = pd.cut(
        shots_conceded['location_x'],
        bins=[0, 40, 80, 120],
        labels=['Low Block', 'Mid Block', 'High Press']
    )
    shots_conceded['attack_side'] = pd.cut(
        shots_conceded['location_y'],
        bins=[0, 27, 53, 80],
        labels=['Left', 'Central', 'Right']
    )

    weaknesses['goals_by_zone'] = (
        shots_conceded
        .groupby(['attack_zone', 'attack_side'])
        .agg({
            'shot_statsbomb_xg': ['sum', 'count'],
            'shot_outcome': lambda x: (x == 'Goal').sum()
        })
        .reset_index()
    )

    # Transition vulnerability
    transition_patterns = ['From Counter', 'From Throw In']
    transition_shots = shots_conceded[
        shots_conceded['play_pattern'].isin(transition_patterns)
    ]
    weaknesses['transition_vulnerability'] = {
        'shots': len(transition_shots),
        'xg': transition_shots['shot_statsbomb_xg'].sum(),
        'goals': (transition_shots['shot_outcome'] == 'Goal').sum()
    }

    # Set piece vulnerability
    set_piece_patterns = ['From Corner', 'From Free Kick']
    set_piece_shots = shots_conceded[
        shots_conceded['play_pattern'].isin(set_piece_patterns)
    ]
    weaknesses['set_piece_vulnerability'] = {
        'shots': len(set_piece_shots),
        'xg': set_piece_shots['shot_statsbomb_xg'].sum(),
        'goals': (set_piece_shots['shot_outcome'] == 'Goal').sum()
    }

    # Defensive duels analysis
    team_events = events[events['team'] == team_name]
    duel_events = team_events[
        team_events['type'].isin(['Duel', 'Dribbled Past'])
    ].copy()

    duel_events['location_x'] = duel_events['location'].apply(
        lambda x: x[0] if isinstance(x, list) else 60
    )
    duel_events['zone'] = pd.cut(
        duel_events['location_x'],
        bins=[0, 40, 80, 120],
        labels=['Defensive Third', 'Middle Third', 'Attacking Third']
    )

    duel_events['duel_lost'] = (
        duel_events['duel_outcome'].isin(['Lost', 'Lost In Play', 'Lost Out']) |
        (duel_events['type'] == 'Dribbled Past')
    )

    weaknesses['duel_analysis'] = (
        duel_events
        .groupby('zone')
        .agg({
            'duel_lost': ['sum', 'count']
        })
        .reset_index()
    )

    # Player-specific weaknesses
    weaknesses['player_weaknesses'] = (
        team_events
        .groupby(['player', 'position'])
        .agg({
            'type': [
                lambda x: (x == 'Dribbled Past').sum(),
                lambda x: (x == 'Foul Committed').sum(),
                lambda x: (x == 'Error').sum()
            ]
        })
        .reset_index()
    )

    return weaknesses

barcelona_weaknesses = analyze_defensive_weaknesses(opponent_events, 'Barcelona')

# Visualize vulnerability heatmap
def plot_vulnerability_zones(weaknesses):
    """
    Create heatmap of defensive vulnerabilities.
    """
    zone_data = weaknesses['goals_by_zone']

    # Reshape for heatmap
    pivot_data = zone_data.pivot(
        index='attack_zone',
        columns='attack_side',
        values=('shot_statsbomb_xg', 'sum')
    )

    fig, ax = plt.subplots(figsize=(8, 6))
    im = ax.imshow(pivot_data.values, cmap='YlOrRd')

    ax.set_xticks(range(len(pivot_data.columns)))
    ax.set_yticks(range(len(pivot_data.index)))
    ax.set_xticklabels(pivot_data.columns)
    ax.set_yticklabels(pivot_data.index)

    # Add text annotations
    for i in range(len(pivot_data.index)):
        for j in range(len(pivot_data.columns)):
            text = f'{pivot_data.values[i, j]:.2f}'
            ax.text(j, i, text, ha='center', va='center', color='white')

    ax.set_title('Defensive Vulnerability Zones (xG Conceded)')
    plt.colorbar(im)
    plt.tight_layout()
    plt.show()

plot_vulnerability_zones(barcelona_weaknesses)
# Analyze defensive weaknesses
analyze_defensive_weaknesses <- function(events, team_name) {

  # Get events where opponent is attacking
  defensive_events <- events %>%
    filter(team.name != team_name)

  # Goals conceded by zone
  goals_conceded <- defensive_events %>%
    filter(type.name == "Shot", shot.outcome.name == "Goal") %>%
    mutate(
      attack_zone = case_when(
        location.x < 40 ~ "Low Block",
        location.x < 80 ~ "Mid Block",
        TRUE ~ "High Press"
      ),
      attack_side = case_when(
        location.y < 27 ~ "Left",
        location.y > 53 ~ "Right",
        TRUE ~ "Central"
      )
    ) %>%
    group_by(attack_zone, attack_side) %>%
    summarise(goals = n(), xg = sum(shot.statsbomb_xg), .groups = "drop")

  # Vulnerability to transitions
  transition_danger <- defensive_events %>%
    filter(
      type.name == "Shot",
      play_pattern.name %in% c("From Counter", "From Throw In")
    ) %>%
    summarise(
      transition_shots = n(),
      transition_xg = sum(shot.statsbomb_xg),
      transition_goals = sum(shot.outcome.name == "Goal")
    )

  # Defensive duels lost
  duels_lost <- events %>%
    filter(
      team.name == team_name,
      type.name %in% c("Duel", "Dribbled Past")
    ) %>%
    mutate(
      duel_lost = coalesce(duel.outcome.name %in% c("Lost", "Lost In Play", "Lost Out"), FALSE) |
                  type.name == "Dribbled Past"
    ) %>%
    group_by(
      zone = case_when(
        location.x < 40 ~ "Defensive Third",
        location.x < 80 ~ "Middle Third",
        TRUE ~ "Attacking Third"
      )
    ) %>%
    summarise(
      total_duels = n(),
      duels_lost = sum(duel_lost),
      loss_rate = duels_lost / total_duels * 100
    )

  # Set piece vulnerability
  set_piece_conceded <- defensive_events %>%
    filter(
      type.name == "Shot",
      play_pattern.name %in% c("From Corner", "From Free Kick")
    ) %>%
    summarise(
      set_piece_shots = n(),
      set_piece_xg = sum(shot.statsbomb_xg),
      set_piece_goals = sum(shot.outcome.name == "Goal")
    )

  # Player-specific weaknesses (who gets beaten most)
  player_weaknesses <- events %>%
    filter(team.name == team_name) %>%
    group_by(player.name, position.name) %>%
    summarise(
      dribbled_past = sum(type.name == "Dribbled Past"),
      fouls = sum(type.name == "Foul Committed"),
      errors = sum(type.name == "Error"),
      .groups = "drop"
    ) %>%
    arrange(desc(dribbled_past + errors))

  return(list(
    goals_by_zone = goals_conceded,
    transition_vulnerability = transition_danger,
    duel_analysis = duels_lost,
    set_piece_vulnerability = set_piece_conceded,
    player_weaknesses = player_weaknesses
  ))
}

barcelona_weaknesses <- analyze_defensive_weaknesses(opponent_events, "Barcelona")

# Visualize weak zones
ggplot(barcelona_weaknesses$goals_by_zone,
       aes(x = attack_side, y = attack_zone, fill = xg)) +
  geom_tile() +
  geom_text(aes(label = paste0("Goals: ", goals, "\nxG: ", round(xg, 2))),
            color = "white") +
  scale_fill_gradient(low = "yellow", high = "red") +
  labs(
    title = "Barcelona Defensive Vulnerability Zones",
    x = "Attack Side", y = "Defensive Phase"
  ) +
  theme_minimal()

Key Player Analysis

Identifying and neutralizing opposition key players is essential. We analyze which players are most influential and how to limit their impact.

key_player_analysis.py
def analyze_key_players(events, team_name):
    """
    Identify and analyze opposition key players.
    """
    team_events = events[events['team'] == team_name].copy()

    # Calculate influence metrics per player
    player_stats = (
        team_events
        .groupby(['player', 'position'])
        .agg({
            'minute': ['min', 'max'],
            'id': 'count',  # touches
            'type': lambda x: (x == 'Pass').sum(),  # passes
            'pass_outcome': lambda x: (x.isna() | (x == 'Complete')).mean() * 100,
            'shot_statsbomb_xg': 'sum',
            'shot_outcome': lambda x: (x == 'Goal').sum()
        })
        .reset_index()
    )

    # Flatten column names
    player_stats.columns = [
        'player', 'position', 'min_minute', 'max_minute',
        'touches', 'passes', 'pass_completion', 'xg', 'goals'
    ]

    player_stats['minutes'] = player_stats['max_minute'] - player_stats['min_minute']

    # Additional metrics
    for player in player_stats['player'].unique():
        player_events = team_events[team_events['player'] == player]

        player_stats.loc[player_stats['player'] == player, 'progressive_passes'] = len(
            player_events[
                (player_events['type'] == 'Pass') &
                (player_events['location'].apply(lambda x: x[0] if isinstance(x, list) else 0) -
                 player_events['pass_end_location'].apply(lambda x: x[0] if isinstance(x, list) else 0) > 10)
            ]
        )

        player_stats.loc[player_stats['player'] == player, 'key_passes'] = (
            player_events['pass_shot_assist'] == True
        ).sum()

        player_stats.loc[player_stats['player'] == player, 'pressures'] = (
            player_events['type'] == 'Pressure'
        ).sum()

    # Calculate influence scores
    player_stats['attacking_influence'] = (
        (player_stats['xg'] * 10 +
         player_stats['key_passes'] * 2 +
         player_stats['progressive_passes'] * 0.5) /
        (player_stats['minutes'] / 90)
    )

    player_stats['overall_influence'] = player_stats['attacking_influence']

    # Top threats
    top_threats = (
        player_stats[player_stats['minutes'] > 45]
        .nlargest(5, 'overall_influence')
    )

    return {
        'influence_ranking': player_stats.sort_values('overall_influence', ascending=False),
        'top_threats': top_threats
    }

key_players = analyze_key_players(opponent_events, 'Barcelona')

def create_player_heatmap(events, player_name):
    """
    Create touch heatmap for a specific player.
    """
    from mplsoccer import Pitch

    player_events = events[events['player'] == player_name].copy()

    # Extract locations
    player_events['x'] = player_events['location'].apply(
        lambda loc: loc[0] if isinstance(loc, list) else np.nan
    )
    player_events['y'] = player_events['location'].apply(
        lambda loc: loc[1] if isinstance(loc, list) else np.nan
    )
    player_events = player_events.dropna(subset=['x', 'y'])

    # Create pitch
    pitch = Pitch(pitch_type='statsbomb', line_color='white', pitch_color='#1a1a1a')
    fig, ax = pitch.draw(figsize=(12, 8))

    # Add heatmap
    pitch.kdeplot(
        player_events['x'], player_events['y'],
        ax=ax, cmap='hot', fill=True, levels=50, alpha=0.7
    )

    ax.set_title(f'{player_name} - Touch Heatmap', fontsize=14, color='white')
    plt.tight_layout()
    plt.show()

# Create threat report
def generate_threat_report(key_players):
    """
    Generate tactical recommendations for neutralizing key players.
    """
    report = []

    for _, player in key_players['top_threats'].iterrows():
        recommendation = {
            'player': player['player'],
            'position': player['position'],
            'threat_level': 'HIGH' if player['overall_influence'] > 5 else 'MEDIUM',
            'key_strength': 'Chance creation' if player['key_passes'] > 2 else 'Ball progression',
            'recommendation': ''
        }

        if player['position'] in ['Center Forward', 'Striker']:
            recommendation['recommendation'] = (
                "Man-mark in box, deny service, press when receiving back to goal"
            )
        elif player['position'] in ['Attacking Midfield', 'Right Wing', 'Left Wing']:
            recommendation['recommendation'] = (
                "Double up when on ball, prevent turns, force to weaker foot"
            )
        else:
            recommendation['recommendation'] = (
                "Press early, prevent easy progression, force long balls"
            )

        report.append(recommendation)

    return pd.DataFrame(report)

threat_report = generate_threat_report(key_players)
print(threat_report)
# Identify key players and their influence
analyze_key_players <- function(events, team_name) {

  team_events <- events %>%
    filter(team.name == team_name)

  # Calculate influence metrics per player
  player_influence <- team_events %>%
    group_by(player.name, position.name) %>%
    summarise(
      minutes = max(minute) - min(minute),
      touches = n(),
      passes = sum(type.name == "Pass"),
      pass_completion = mean(pass.outcome.name == "Complete" |
                             is.na(pass.outcome.name), na.rm = TRUE) * 100,
      progressive_passes = sum(type.name == "Pass" &
                               ((120 - location.x) - (120 - pass.end_location.x)) > 10,
                               na.rm = TRUE),
      key_passes = sum(pass.shot_assist == TRUE, na.rm = TRUE),
      assists = sum(pass.goal_assist == TRUE, na.rm = TRUE),
      shots = sum(type.name == "Shot"),
      xg = sum(shot.statsbomb_xg, na.rm = TRUE),
      goals = sum(type.name == "Shot" & shot.outcome.name == "Goal"),
      dribbles_completed = sum(type.name == "Dribble" &
                               dribble.outcome.name == "Complete"),
      carries = sum(type.name == "Carry"),
      pressures = sum(type.name == "Pressure"),
      tackles = sum(type.name == "Duel" & duel.type.name == "Tackle"),
      interceptions = sum(type.name == "Interception"),
      .groups = "drop"
    ) %>%
    mutate(
      # Calculate composite influence score
      attacking_influence = (xg * 10 + key_passes * 2 + progressive_passes * 0.5) /
                            (minutes / 90),
      defensive_influence = (pressures + tackles + interceptions) / (minutes / 90),
      overall_influence = attacking_influence + defensive_influence * 0.5
    ) %>%
    arrange(desc(overall_influence))

  # Identify most dangerous players
  danger_players <- player_influence %>%
    filter(minutes > 45) %>%  # Minimum playing time
    slice_head(n = 5)

  # Analyze how key players receive the ball
  key_player_receiving <- team_events %>%
    filter(player.name %in% danger_players$player.name) %>%
    filter(type.name == "Ball Receipt*") %>%
    mutate(
      receive_zone = case_when(
        location.x > 80 & location.y < 27 ~ "Right Halfspace",
        location.x > 80 & location.y > 53 ~ "Left Halfspace",
        location.x > 80 ~ "Central",
        location.x > 60 ~ "Build-up Zone",
        TRUE ~ "Deep"
      )
    ) %>%
    group_by(player.name, receive_zone) %>%
    summarise(receipts = n(), .groups = "drop")

  return(list(
    influence_ranking = player_influence,
    top_threats = danger_players,
    receiving_patterns = key_player_receiving
  ))
}

key_players <- analyze_key_players(opponent_events, "Barcelona")

# Visualize key player zones
create_player_heatmap <- function(events, player_name) {
  player_events <- events %>%
    filter(player.name == player_name) %>%
    filter(!is.na(location.x), !is.na(location.y))

  ggplot(player_events, aes(x = location.x, y = location.y)) +
    annotate_pitch(dimensions = pitch_statsbomb, colour = "grey80") +
    stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha = 0.6) +
    scale_fill_viridis_c(option = "plasma") +
    labs(
      title = paste(player_name, "- Touch Heatmap"),
      subtitle = "Opposition Analysis"
    ) +
    theme_pitch() +
    coord_flip()
}

Set Piece Scouting

Detailed analysis of opposition set piece routines reveals patterns that can be defended or exploited.

set_piece_scouting.py
def analyze_set_pieces(events, team_name):
    """
    Analyze opposition set piece routines.
    """
    team_events = events[events['team'] == team_name].copy()

    analysis = {}

    # Corner analysis
    corners = team_events[
        (team_events['type'] == 'Pass') &
        (team_events['pass_type'] == 'Corner')
    ].copy()

    if len(corners) > 0:
        # Categorize corner deliveries
        corners['corner_type'] = corners['pass_technique'].map({
            'Inswinging': 'Inswinger',
            'Outswinging': 'Outswinger'
        }).fillna('Driven')

        corners['location_y'] = corners['location'].apply(
            lambda x: x[1] if isinstance(x, list) else 40
        )
        corners['corner_side'] = np.where(corners['location_y'] < 40, 'Right', 'Left')

        # Target zone
        corners['end_x'] = corners['pass_end_location'].apply(
            lambda x: x[0] if isinstance(x, list) else 100
        )
        corners['end_y'] = corners['pass_end_location'].apply(
            lambda x: x[1] if isinstance(x, list) else 40
        )

        corners['target_zone'] = np.where(
            corners['end_y'] > 44, 'Far Post',
            np.where(corners['end_y'] < 36, 'Near Post', 'Central')
        )

        analysis['corner_patterns'] = (
            corners
            .groupby(['corner_type', 'target_zone', 'corner_side'])
            .size()
            .reset_index(name='count')
        )

        analysis['preferred_delivery'] = corners['corner_type'].mode()[0]
        analysis['most_targeted_zone'] = corners['target_zone'].mode()[0]

    # Free kick analysis
    free_kicks = team_events[
        (team_events['type'] == 'Pass') &
        (team_events['play_pattern'] == 'From Free Kick')
    ].copy()

    if len(free_kicks) > 0:
        free_kicks['fk_x'] = free_kicks['location'].apply(
            lambda x: x[0] if isinstance(x, list) else 60
        )
        free_kicks['fk_zone'] = pd.cut(
            free_kicks['fk_x'],
            bins=[0, 80, 100, 120],
            labels=['Build-up', 'Crossing Position', 'Shooting Range']
        )

        analysis['free_kick_zones'] = (
            free_kicks
            .groupby('fk_zone')
            .size()
            .reset_index(name='count')
        )

    return analysis

set_piece_analysis = analyze_set_pieces(opponent_events, 'Barcelona')

def visualize_corner_patterns(corner_data):
    """
    Visualize corner delivery patterns.
    """
    fig, ax = plt.subplots(figsize=(10, 6))

    pivot_data = corner_data.pivot_table(
        index='target_zone',
        columns='corner_type',
        values='count',
        aggfunc='sum',
        fill_value=0
    )

    pivot_data.plot(kind='bar', ax=ax)

    ax.set_xlabel('Target Zone')
    ax.set_ylabel('Count')
    ax.set_title('Opposition Corner Delivery Patterns')
    ax.legend(title='Delivery Type')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

if 'corner_patterns' in set_piece_analysis:
    visualize_corner_patterns(set_piece_analysis['corner_patterns'])
# Analyze opposition set piece routines
analyze_set_pieces <- function(events, team_name) {

  team_events <- events %>%
    filter(team.name == team_name)

  # Corner analysis
  corners <- team_events %>%
    filter(type.name == "Pass", pass.type.name == "Corner")

  corner_analysis <- corners %>%
    mutate(
      corner_type = case_when(
        pass.technique.name == "Inswinging" ~ "Inswinger",
        pass.technique.name == "Outswinging" ~ "Outswinger",
        pass.length < 15 ~ "Short",
        TRUE ~ "Driven"
      ),
      target_zone = case_when(
        pass.end_location.x > 114 & pass.end_location.y > 44 ~ "Far Post",
        pass.end_location.x > 114 & pass.end_location.y < 36 ~ "Near Post",
        pass.end_location.x > 114 ~ "Central",
        TRUE ~ "Edge of Box"
      ),
      corner_side = ifelse(location.y < 40, "Right", "Left")
    ) %>%
    group_by(corner_type, target_zone, corner_side) %>%
    summarise(
      count = n(),
      shot_created = sum(!is.na(pass.shot_assist) | !is.na(pass.goal_assist)),
      .groups = "drop"
    )

  # Free kick analysis
  free_kicks <- team_events %>%
    filter(
      type.name == "Pass",
      play_pattern.name == "From Free Kick"
    ) %>%
    mutate(
      fk_zone = case_when(
        location.x > 100 ~ "Shooting Range",
        location.x > 80 ~ "Crossing Position",
        TRUE ~ "Build-up"
      ),
      delivery_type = case_when(
        pass.cross == TRUE ~ "Cross",
        pass.length < 10 ~ "Short",
        TRUE ~ "Direct"
      )
    )

  # Defensive set piece organization
  defensive_corners <- events %>%
    filter(
      team.name != team_name,
      type.name == "Pass",
      pass.type.name == "Corner"
    )

  defensive_system <- defensive_corners %>%
    # Analyze team positioning from clearing actions
    left_join(
      events %>%
        filter(team.name == team_name, type.name == "Clearance") %>%
        select(match_id, index, clearance_location_x = location.x,
               clearance_location_y = location.y, clearing_player = player.name),
      by = c("match_id")
    ) %>%
    filter(index < index + 10)  # Within 10 events of corner

  return(list(
    corner_delivery = corner_analysis,
    free_kick_patterns = free_kicks,
    defensive_organization = defensive_system,

    # Key stats
    summary = list(
      corners_taken = nrow(corners),
      preferred_delivery = corner_analysis %>%
        group_by(corner_type) %>%
        summarise(n = sum(count)) %>%
        arrange(desc(n)) %>%
        slice(1) %>%
        pull(corner_type),
      most_targeted_zone = corner_analysis %>%
        group_by(target_zone) %>%
        summarise(n = sum(count)) %>%
        arrange(desc(n)) %>%
        slice(1) %>%
        pull(target_zone)
    )
  ))
}

set_piece_analysis <- analyze_set_pieces(opponent_events, "Barcelona")

# Visualize corner delivery patterns
visualize_corner_patterns <- function(corners_data) {
  ggplot(corners_data, aes(x = target_zone, y = count, fill = corner_type)) +
    geom_bar(stat = "identity", position = "dodge") +
    facet_wrap(~corner_side) +
    labs(
      title = "Opposition Corner Delivery Patterns",
      x = "Target Zone", y = "Count", fill = "Delivery Type"
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

Generating Tactical Recommendations

The final step is translating data insights into actionable tactical recommendations for coaches and players.

tactical_recommendations.py
def generate_opposition_report(team_name, style, weaknesses, key_players, set_pieces):
    """
    Generate comprehensive opposition analysis report.
    """
    report = {
        'team': team_name,
        'summary': '',
        'recommendations': {}
    }

    # Determine style label
    if style['possession_share'] > 55 and style['avg_pass_length'] < 18:
        style_label = 'possession-based'
    elif style['long_ball_pct'] > 15:
        style_label = 'direct'
    elif style['pressures_per_90'] > 150:
        style_label = 'high-pressing'
    else:
        style_label = 'balanced'

    # Identify main threat
    if style['crosses_per_90'] > 5:
        main_threat = 'wide areas and crosses'
    elif style['xg_per_shot'] > 0.12:
        main_threat = 'high-quality chances centrally'
    else:
        main_threat = 'patient build-up play'

    # Executive summary
    report['summary'] = (
        f"{team_name} play a {style_label} style with "
        f"{style['possession_share']:.0f}% possession. "
        f"They are most dangerous through {main_threat}."
    )

    # Pressing recommendations
    if style['high_press_pct'] > 40:
        report['recommendations']['against_press'] = {
            'situation': 'Against their high press',
            'recommendation': 'Use long balls to bypass press, target channels behind fullbacks',
            'key_players': 'Goalkeeper and CBs need to be comfortable under pressure'
        }
    else:
        report['recommendations']['in_possession'] = {
            'situation': 'Against their mid-block',
            'recommendation': 'Patient build-up, switches of play to create overloads',
            'key_players': 'Midfielders need to find pockets of space'
        }

    # Defensive recommendations
    report['recommendations']['defensive'] = []

    if style['crosses_per_90'] > 5:
        report['recommendations']['defensive'].append(
            'Expect crossing - ensure good box coverage'
        )

    if weaknesses['transition_vulnerability']['xg'] > 2:
        report['recommendations']['offensive'] = {
            'transitions': 'Exploit on transitions - they concede from counters'
        }

    # Key player strategy
    if len(key_players['top_threats']) > 0:
        top_threat = key_players['top_threats'].iloc[0]
        report['recommendations']['key_player'] = {
            'player': top_threat['player'],
            'position': top_threat['position'],
            'strategy': generate_player_strategy(top_threat)
        }

    # Set piece tactics
    if set_pieces:
        report['recommendations']['set_pieces'] = {
            'corners': f"Most corners are {set_pieces.get('preferred_delivery', 'varied')} "
                       f"targeting the {set_pieces.get('most_targeted_zone', 'central area')}",
            'defensive_priority': 'Assign marker to their tallest player at corners'
        }

    return report

def generate_player_strategy(player):
    """
    Generate neutralization strategy for key player.
    """
    position = player['position']

    if position in ['Center Forward', 'Striker']:
        return "Man-mark in box, deny service, press when receiving back to goal"
    elif position in ['Attacking Midfield', 'Right Wing', 'Left Wing']:
        return "Double up when on ball, prevent turns, force to weaker foot"
    else:
        return "Press early, prevent easy progression, force long balls"

# Generate report
opposition_report = generate_opposition_report(
    'Barcelona',
    barcelona_style,
    barcelona_weaknesses,
    key_players,
    set_piece_analysis
)

print("OPPOSITION ANALYSIS REPORT")
print("=" * 50)
print(f"\n{opposition_report['summary']}\n")
print("TACTICAL RECOMMENDATIONS:")
for category, rec in opposition_report['recommendations'].items():
    print(f"\n{category.upper()}:")
    if isinstance(rec, dict):
        for k, v in rec.items():
            print(f"  - {k}: {v}")
    elif isinstance(rec, list):
        for item in rec:
            print(f"  - {item}")
# Generate comprehensive opposition report
generate_opposition_report <- function(team_name, style, weaknesses, key_players, set_pieces) {

  report <- list()

  # Executive summary
  report$summary <- paste0(
    team_name, " play a ", determine_style_label(style), " style ",
    "with ", round(style$possession_share, 0), "% possession. ",
    "They are most dangerous through ", identify_main_threat(style), ". ",
    "Key vulnerabilities include ", identify_main_weakness(weaknesses), "."
  )

  # Tactical recommendations
  report$recommendations <- list()

  # Pressing recommendations
  if (style$high_press_pct > 40) {
    report$recommendations$pressing <- list(
      situation = "Against their high press",
      recommendation = "Use long balls to bypass press, target channels behind fullbacks",
      key_players = "Goalkeeper and CBs need to be comfortable under pressure"
    )
  } else {
    report$recommendations$pressing <- list(
      situation = "Against their mid-block",
      recommendation = "Patient build-up, look for switches of play to create overloads",
      key_players = "Midfielders need to find pockets of space"
    )
  }

  # Defensive recommendations
  if (style$crosses_per_90 > 5) {
    report$recommendations$defensive$wide_areas <- "Expect crossing - ensure good box coverage, mark aerially strong players"
  }

  if (weaknesses$transition_vulnerability$transition_xg > 2) {
    report$recommendations$offensive$transitions <- "Exploit on transitions - they concede dangerous chances from counter-attacks"
  }

  # Key player neutralization
  top_threat <- key_players$top_threats[1, ]
  report$recommendations$key_player <- list(
    player = top_threat$player.name,
    position = top_threat$position.name,
    strategy = generate_player_strategy(top_threat)
  )

  # Set piece tactics
  report$recommendations$set_pieces <- list(
    corners = paste("Most corners are", set_pieces$summary$preferred_delivery,
                    "targeting the", set_pieces$summary$most_targeted_zone),
    defensive_priority = "Assign marker to their tallest player at corners"
  )

  return(report)
}

# Helper functions
determine_style_label <- function(style) {
  if (style$possession_share > 55 && style$avg_pass_length < 18) {
    return("possession-based")
  } else if (style$long_ball_pct > 15) {
    return("direct")
  } else if (style$pressures_per_90 > 150) {
    return("high-pressing")
  } else {
    return("balanced")
  }
}

identify_main_threat <- function(style) {
  if (style$crosses_per_90 > 5) {
    return("wide areas and crosses")
  } else if (style$xg_per_shot > 0.12) {
    return("high-quality chances centrally")
  } else {
    return("patient build-up play")
  }
}

# Generate and print report
opposition_report <- generate_opposition_report(
  "Barcelona",
  barcelona_style,
  barcelona_weaknesses,
  key_players,
  set_piece_analysis
)

cat(opposition_report$summary)

Practice Exercises

Exercise 23.1: Complete Opposition Analysis Report Generator

Task: Build an automated opposition report generator that analyzes a team's last 5-10 matches and produces a comprehensive tactical report including playing style metrics, weakness identification, key player analysis, and actionable recommendations.

Requirements:

  • Calculate 15+ style metrics (possession, directness, pressing, width, etc.)
  • Compare opponent to league average
  • Identify top 3 exploitable weaknesses
  • Profile top 5 dangerous players
  • Generate text-based tactical recommendations

opposition_report_generator.py
import pandas as pd
import numpy as np
from collections import defaultdict

# Simulate match event data
np.random.seed(42)

teams = ["Manchester City", "Liverpool", "Arsenal", "Chelsea",
         "Man United", "Tottenham", "Newcastle", "Brighton"]

def simulate_match_events(team_name, n_matches=10):
    """Simulate event data for a team across multiple matches."""
    all_events = []

    for match_id in range(1, n_matches + 1):
        n_events = np.random.randint(400, 601)

        events = pd.DataFrame({
            'match_id': match_id,
            'team': team_name,
            'minute': sorted(np.random.randint(1, 96, n_events)),
            'event_type': np.random.choice(
                ['Pass', 'Carry', 'Shot', 'Dribble', 'Pressure', 'Duel',
                 'Clearance', 'Interception', 'Cross', 'Foul'],
                n_events,
                p=[0.55, 0.12, 0.03, 0.05, 0.08, 0.07, 0.04, 0.03, 0.02, 0.01]
            ),
            'location_x': np.random.uniform(0, 120, n_events),
            'location_y': np.random.uniform(0, 80, n_events),
            'end_location_x': np.random.uniform(0, 120, n_events),
            'end_location_y': np.random.uniform(0, 80, n_events),
            'pass_length': np.random.uniform(2, 50, n_events),
            'pass_outcome': np.random.choice(['Complete', 'Incomplete'], n_events, p=[0.8, 0.2]),
            'player_name': np.random.choice([f'Player_{i}' for i in range(1, 15)], n_events)
        })

        # Add shot-specific columns
        shots_mask = events['event_type'] == 'Shot'
        events['shot_xg'] = np.where(shots_mask, np.random.uniform(0.02, 0.6, n_events), np.nan)
        events['shot_outcome'] = np.where(
            shots_mask,
            np.random.choice(['Goal', 'Saved', 'Off Target', 'Blocked'], n_events, p=[0.1, 0.4, 0.3, 0.2]),
            None
        )

        dribble_mask = events['event_type'] == 'Dribble'
        events['dribble_outcome'] = np.where(
            dribble_mask,
            np.random.choice(['Complete', 'Incomplete'], n_events, p=[0.55, 0.45]),
            None
        )

        all_events.append(events)

    return pd.concat(all_events, ignore_index=True)

# Generate data for all teams
all_team_data = pd.concat([simulate_match_events(team) for team in teams], ignore_index=True)

def calculate_style_metrics(team_events):
    """Calculate comprehensive style metrics for a team."""
    passes = team_events[team_events['event_type'] == 'Pass']
    shots = team_events[team_events['event_type'] == 'Shot']
    pressures = team_events[team_events['event_type'] == 'Pressure']
    crosses = team_events[team_events['event_type'] == 'Cross']
    dribbles = team_events[team_events['event_type'] == 'Dribble']

    n_matches = team_events['match_id'].nunique()

    return {
        # Possession & Build-up
        'passes_per_90': len(passes) / n_matches,
        'pass_completion': (passes['pass_outcome'] == 'Complete').mean() * 100,
        'avg_pass_length': passes['pass_length'].mean(),
        'long_ball_pct': (passes['pass_length'] > 30).mean() * 100,
        'progressive_pass_pct': ((passes['end_location_x'] - passes['location_x']) > 10).mean() * 100,

        # Possession zones
        'possession_own_third': (team_events['location_x'] < 40).mean() * 100,
        'possession_mid_third': ((team_events['location_x'] >= 40) & (team_events['location_x'] < 80)).mean() * 100,
        'possession_final_third': (team_events['location_x'] >= 80).mean() * 100,

        # Width
        'avg_width': team_events['location_y'].std(),
        'left_side_pct': (team_events['location_y'] < 30).mean() * 100,
        'right_side_pct': (team_events['location_y'] > 50).mean() * 100,

        # Pressing
        'pressures_per_90': len(pressures) / n_matches,
        'high_press_pct': (pressures['location_x'] > 80).mean() * 100 if len(pressures) > 0 else 0,

        # Attacking
        'shots_per_90': len(shots) / n_matches,
        'xg_per_shot': shots['shot_xg'].mean() if len(shots) > 0 else 0,
        'xg_per_90': shots['shot_xg'].sum() / n_matches if len(shots) > 0 else 0,
        'shot_accuracy': shots['shot_outcome'].isin(['Goal', 'Saved']).mean() * 100 if len(shots) > 0 else 0,

        # Crosses & Dribbles
        'crosses_per_90': len(crosses) / n_matches,
        'dribbles_per_90': len(dribbles) / n_matches,
        'dribble_success': (dribbles['dribble_outcome'] == 'Complete').mean() * 100 if len(dribbles) > 0 else 0,

        # Directness
        'directness_index': (shots['shot_xg'].sum() / len(passes)) * 1000 if len(passes) > 0 else 0
    }

# Calculate metrics for all teams
league_metrics = {team: calculate_style_metrics(all_team_data[all_team_data['team'] == team])
                  for team in teams}
league_metrics_df = pd.DataFrame(league_metrics).T.reset_index().rename(columns={'index': 'team'})

# Calculate league average
league_avg = {k: np.mean([m[k] for m in league_metrics.values()]) for k in league_metrics[teams[0]].keys()}

def generate_opposition_report(opponent_name, team_data, league_metrics_dict, league_avg):
    """Generate comprehensive opposition analysis report."""
    opponent_events = team_data[team_data['team'] == opponent_name]
    opponent_style = league_metrics_dict[opponent_name]

    print("\n" + "=" * 72)
    print(f"         OPPOSITION ANALYSIS REPORT: {opponent_name}")
    print("=" * 72 + "\n")

    # 1. Style Profile
    print("1. PLAYING STYLE PROFILE")
    print("-" * 50)

    # Determine style label
    if (opponent_style['possession_final_third'] > league_avg['possession_final_third'] and
        opponent_style['avg_pass_length'] < league_avg['avg_pass_length']):
        style_label = "Possession-Based"
    elif opponent_style['long_ball_pct'] > league_avg['long_ball_pct'] + 5:
        style_label = "Direct"
    elif opponent_style['high_press_pct'] > league_avg['high_press_pct'] + 10:
        style_label = "High-Pressing"
    elif opponent_style['crosses_per_90'] > league_avg['crosses_per_90'] + 2:
        style_label = "Wing-Focused"
    else:
        style_label = "Balanced"

    print(f"Style Classification: {style_label}\n")

    # Key metrics comparison
    key_metrics = ['pass_completion', 'progressive_pass_pct', 'pressures_per_90',
                   'high_press_pct', 'shots_per_90', 'xg_per_shot', 'crosses_per_90']

    print("Key Metrics vs League Average:")
    for metric in key_metrics:
        opp_val = opponent_style[metric]
        league_val = league_avg[metric]
        direction = "ABOVE" if opp_val > league_val else "BELOW"
        print(f"  {metric.replace('_', ' '):<25}: {opp_val:.1f} (League: {league_val:.1f}) - {direction}")

    # 2. Weakness Analysis
    print("\n2. EXPLOITABLE WEAKNESSES")
    print("-" * 50)

    weaknesses = []

    if opponent_style['high_press_pct'] < league_avg['high_press_pct'] - 5:
        weaknesses.append("LOW PRESS - Space available in midfield. Build from back confidently.")

    if opponent_style['pass_completion'] < league_avg['pass_completion'] - 3:
        weaknesses.append("TURNOVER PRONE - Press high to force errors and create transitions.")

    if opponent_style['left_side_pct'] < opponent_style['right_side_pct'] - 10:
        weaknesses.append("WEAK LEFT SIDE - Attack down their left; overload with RW + RB.")

    if opponent_style['dribble_success'] < league_avg['dribble_success'] - 5:
        weaknesses.append("POOR DRIBBLE DEFENSE - 1v1 situations favourable.")

    if not weaknesses:
        print("  No major weaknesses identified.")
    else:
        for i, weakness in enumerate(weaknesses, 1):
            print(f"  {i}. {weakness}")

    # 3. Key Player Threats
    print("\n3. KEY PLAYER THREATS")
    print("-" * 50)

    player_stats = opponent_events.groupby('player_name').agg({
        'event_type': 'count',
        'shot_xg': 'sum',
        'shot_outcome': lambda x: (x == 'Goal').sum(),
    }).reset_index()

    player_stats.columns = ['player', 'events', 'xg', 'goals']

    # Add key passes
    key_passes = opponent_events[
        (opponent_events['event_type'] == 'Pass') &
        (opponent_events['end_location_x'] > 100)
    ].groupby('player_name').size().reset_index(name='key_passes')

    player_stats = player_stats.merge(key_passes, left_on='player', right_on='player_name', how='left')
    player_stats['key_passes'] = player_stats['key_passes'].fillna(0)
    player_stats['threat_score'] = player_stats['xg'] * 5 + player_stats['key_passes'] * 0.5

    top_threats = player_stats.nlargest(5, 'threat_score')

    for i, (_, p) in enumerate(top_threats.iterrows(), 1):
        print(f"  {i}. {p['player']} - Threat Score: {p['threat_score']:.1f}")
        print(f"     xG: {p['xg']:.2f} | Goals: {int(p['goals'])} | Key Passes: {int(p['key_passes'])}\n")

    # 4. Tactical Recommendations
    print("4. TACTICAL RECOMMENDATIONS")
    print("-" * 50)

    print("\n  IN POSSESSION:")
    if opponent_style['high_press_pct'] > league_avg['high_press_pct']:
        print("    - Play through their press with quick combinations")
        print("    - Use wide areas to escape pressure")
        print("    - Goalkeeper comfortable playing with feet essential")
    else:
        print("    - Build patiently; they don't press high")
        print("    - Look for progressive passes into midfield pockets")
        print("    - Switch play to exploit width")

    print("\n  OUT OF POSSESSION:")
    if opponent_style['long_ball_pct'] > league_avg['long_ball_pct']:
        print("    - Win second balls; strong aerial presence needed")
        print("    - Compress midfield to limit space for knockdowns")
    else:
        print("    - Press in middle third to disrupt build-up")
        print("    - Man-mark their key playmaker")

    print("\n" + "=" * 72)

    return {
        'style': opponent_style,
        'weaknesses': weaknesses,
        'threats': top_threats
    }

# Generate report for Manchester City
report = generate_opposition_report("Manchester City", all_team_data, league_metrics, league_avg)
library(tidyverse)

# Simulate comprehensive match event data
set.seed(42)

# Generate team and league data
teams <- c("Manchester City", "Liverpool", "Arsenal", "Chelsea",
           "Man United", "Tottenham", "Newcastle", "Brighton")

# Function to simulate match events
simulate_match_events <- function(team_name, n_matches = 10) {
  map_dfr(1:n_matches, function(m) {
    n_events <- sample(400:600, 1)

    tibble(
      match_id = m,
      team = team_name,
      minute = sort(sample(1:95, n_events, replace = TRUE)),
      event_type = sample(c("Pass", "Carry", "Shot", "Dribble", "Pressure", "Duel",
                            "Clearance", "Interception", "Cross", "Foul"),
                          n_events, replace = TRUE,
                          prob = c(0.55, 0.12, 0.03, 0.05, 0.08, 0.07, 0.04, 0.03, 0.02, 0.01)),
      location_x = runif(n_events, 0, 120),
      location_y = runif(n_events, 0, 80),
      end_location_x = runif(n_events, 0, 120),
      end_location_y = runif(n_events, 0, 80),
      pass_length = runif(n_events, 2, 50),
      pass_outcome = sample(c("Complete", "Incomplete"), n_events, replace = TRUE, prob = c(0.8, 0.2)),
      shot_xg = ifelse(event_type == "Shot", runif(n_events, 0.02, 0.6), NA),
      shot_outcome = ifelse(event_type == "Shot",
                           sample(c("Goal", "Saved", "Off Target", "Blocked"),
                                  n_events, replace = TRUE, prob = c(0.1, 0.4, 0.3, 0.2)), NA),
      dribble_outcome = ifelse(event_type == "Dribble",
                               sample(c("Complete", "Incomplete"), n_events, replace = TRUE, prob = c(0.55, 0.45)), NA),
      player_name = sample(paste0("Player_", 1:14), n_events, replace = TRUE)
    )
  })
}

# Generate data for all teams
all_team_data <- map_dfr(teams, ~simulate_match_events(.x))

# Comprehensive style metrics calculation
calculate_style_metrics <- function(team_events) {
  passes <- team_events %>% filter(event_type == "Pass")
  shots <- team_events %>% filter(event_type == "Shot")
  pressures <- team_events %>% filter(event_type == "Pressure")
  crosses <- team_events %>% filter(event_type == "Cross")
  dribbles <- team_events %>% filter(event_type == "Dribble")

  n_matches <- n_distinct(team_events$match_id)

  tibble(
    # Possession & Build-up
    passes_per_90 = nrow(passes) / n_matches,
    pass_completion = mean(passes$pass_outcome == "Complete") * 100,
    avg_pass_length = mean(passes$pass_length),
    long_ball_pct = mean(passes$pass_length > 30) * 100,
    progressive_pass_pct = mean(passes$end_location_x - passes$location_x > 10) * 100,
    backward_pass_pct = mean(passes$end_location_x - passes$location_x < -5) * 100,

    # Possession zones
    possession_own_third = mean(team_events$location_x < 40) * 100,
    possession_mid_third = mean(team_events$location_x >= 40 & team_events$location_x < 80) * 100,
    possession_final_third = mean(team_events$location_x >= 80) * 100,

    # Width
    avg_width = sd(team_events$location_y),
    left_side_pct = mean(team_events$location_y < 30) * 100,
    right_side_pct = mean(team_events$location_y > 50) * 100,

    # Pressing
    pressures_per_90 = nrow(pressures) / n_matches,
    high_press_pct = mean(pressures$location_x > 80) * 100,
    mid_press_pct = mean(pressures$location_x >= 40 & pressures$location_x <= 80) * 100,

    # Attacking
    shots_per_90 = nrow(shots) / n_matches,
    xg_per_shot = mean(shots$shot_xg, na.rm = TRUE),
    xg_per_90 = sum(shots$shot_xg, na.rm = TRUE) / n_matches,
    shot_accuracy = mean(shots$shot_outcome %in% c("Goal", "Saved"), na.rm = TRUE) * 100,

    # Crosses & Dribbles
    crosses_per_90 = nrow(crosses) / n_matches,
    dribbles_per_90 = nrow(dribbles) / n_matches,
    dribble_success = mean(dribbles$dribble_outcome == "Complete", na.rm = TRUE) * 100,

    # Directness
    directness_index = (sum(shots$shot_xg, na.rm = TRUE) / nrow(passes)) * 1000
  )
}

# Calculate metrics for all teams
league_metrics <- map_dfr(teams, function(t) {
  team_events <- all_team_data %>% filter(team == t)
  metrics <- calculate_style_metrics(team_events)
  metrics$team <- t
  metrics
})

# League average
league_avg <- league_metrics %>%
  summarise(across(where(is.numeric), mean, na.rm = TRUE)) %>%
  mutate(team = "League Average")

# Opposition report generator
generate_opposition_report <- function(opponent_name, team_data, league_metrics, league_avg) {

  opponent_events <- team_data %>% filter(team == opponent_name)
  opponent_style <- league_metrics %>% filter(team == opponent_name)

  cat("\n")
  cat("=" , rep("=", 70), "\n", sep = "")
  cat("         OPPOSITION ANALYSIS REPORT: ", opponent_name, "\n", sep = "")
  cat("=" , rep("=", 70), "\n\n", sep = "")

  # 1. Style Profile
  cat("1. PLAYING STYLE PROFILE\n")
  cat(rep("-", 50), "\n", sep = "")

  style_label <- case_when(
    opponent_style$possession_final_third > league_avg$possession_final_third &
      opponent_style$avg_pass_length < league_avg$avg_pass_length ~ "Possession-Based",
    opponent_style$long_ball_pct > league_avg$long_ball_pct + 5 ~ "Direct",
    opponent_style$high_press_pct > league_avg$high_press_pct + 10 ~ "High-Pressing",
    opponent_style$crosses_per_90 > league_avg$crosses_per_90 + 2 ~ "Wing-Focused",
    TRUE ~ "Balanced"
  )

  cat("Style Classification:", style_label, "\n\n")

  # Key metrics vs league
  key_metrics <- c("pass_completion", "progressive_pass_pct", "pressures_per_90",
                   "high_press_pct", "shots_per_90", "xg_per_shot", "crosses_per_90")

  cat("Key Metrics vs League Average:\n")
  for (metric in key_metrics) {
    opp_val <- opponent_style[[metric]]
    league_val <- league_avg[[metric]]
    diff <- opp_val - league_val
    direction <- ifelse(diff > 0, "ABOVE", "BELOW")

    cat(sprintf("  %-25s: %.1f (League: %.1f) - %s\n",
                gsub("_", " ", metric), opp_val, league_val, direction))
  }

  # 2. Weakness Analysis
  cat("\n2. EXPLOITABLE WEAKNESSES\n")
  cat(rep("-", 50), "\n", sep = "")

  weaknesses <- list()

  # Check for weaknesses
  if (opponent_style$high_press_pct < league_avg$high_press_pct - 5) {
    weaknesses$press <- "LOW PRESS - Space available in midfield. Build from back confidently."
  }

  if (opponent_style$pass_completion < league_avg$pass_completion - 3) {
    weaknesses$turnovers <- "TURNOVER PRONE - Press high to force errors and create transitions."
  }

  if (opponent_style$left_side_pct < opponent_style$right_side_pct - 10) {
    weaknesses$left_side <- "WEAK LEFT SIDE - Attack down their left; overload with RW + RB."
  }

  if (opponent_style$possession_own_third > league_avg$possession_own_third + 5) {
    weaknesses$deep_play <- "DEEP BUILD-UP - High press can force long balls and turnovers."
  }

  if (opponent_style$dribble_success < league_avg$dribble_success - 5) {
    weaknesses$dribble_def <- "POOR DRIBBLE DEFENSE - 1v1 situations favourable; encourage attackers to take on players."
  }

  if (length(weaknesses) == 0) {
    cat("  No major weaknesses identified.\n")
  } else {
    for (i in seq_along(weaknesses)) {
      cat(sprintf("  %d. %s\n", i, weaknesses[[i]]))
    }
  }

  # 3. Key Player Analysis
  cat("\n3. KEY PLAYER THREATS\n")
  cat(rep("-", 50), "\n", sep = "")

  player_threats <- opponent_events %>%
    group_by(player_name) %>%
    summarise(
      events = n(),
      shots = sum(event_type == "Shot"),
      xg = sum(shot_xg, na.rm = TRUE),
      goals = sum(shot_outcome == "Goal", na.rm = TRUE),
      key_passes = sum(event_type == "Pass" & end_location_x > 100, na.rm = TRUE),
      progressive_carries = sum(event_type == "Carry" & (end_location_x - location_x) > 15),
      pressures = sum(event_type == "Pressure"),
      .groups = "drop"
    ) %>%
    mutate(
      threat_score = xg * 5 + key_passes * 0.5 + progressive_carries * 0.3
    ) %>%
    arrange(desc(threat_score)) %>%
    head(5)

  for (i in 1:nrow(player_threats)) {
    p <- player_threats[i, ]
    cat(sprintf("  %d. %s - Threat Score: %.1f\n", i, p$player_name, p$threat_score))
    cat(sprintf("     xG: %.2f | Shots: %d | Key Passes: %d\n\n", p$xg, p$shots, p$key_passes))
  }

  # 4. Tactical Recommendations
  cat("4. TACTICAL RECOMMENDATIONS\n")
  cat(rep("-", 50), "\n", sep = "")

  cat("\n  IN POSSESSION:\n")
  if (opponent_style$high_press_pct > league_avg$high_press_pct) {
    cat("    - Play through their press with quick combinations\n")
    cat("    - Use wide areas to escape pressure\n")
    cat("    - Goalkeeper comfortable playing with feet essential\n")
  } else {
    cat("    - Build patiently; they don't press high\n")
    cat("    - Look for progressive passes into midfield pockets\n")
    cat("    - Switch play to exploit width\n")
  }

  cat("\n  OUT OF POSSESSION:\n")
  if (opponent_style$long_ball_pct > league_avg$long_ball_pct) {
    cat("    - Win second balls; strong aerial presence needed\n")
    cat("    - Compress midfield to limit space for knockdowns\n")
    cat("    - Fullbacks stay tight to wingers\n")
  } else {
    cat("    - Press in middle third to disrupt build-up\n")
    cat("    - Man-mark their key playmaker\n")
    cat("    - Force play into wide areas\n")
  }

  cat("\n  SET PIECES:\n")
  cat("    - Assign dedicated markers for aerial threats\n")
  cat("    - Watch for short corner routines\n")

  cat("\n")
  cat("=" , rep("=", 70), "\n", sep = "")

  return(invisible(list(
    style = opponent_style,
    weaknesses = weaknesses,
    threats = player_threats
  )))
}

# Generate report for Manchester City
report <- generate_opposition_report("Manchester City", all_team_data, league_metrics, league_avg)

# Radar chart comparison
create_style_radar <- function(team_name, league_metrics, league_avg) {
  # Select key metrics for radar
  radar_metrics <- c("pass_completion", "progressive_pass_pct", "pressures_per_90",
                     "high_press_pct", "xg_per_90", "directness_index")

  team_vals <- league_metrics %>%
    filter(team == team_name) %>%
    select(all_of(radar_metrics)) %>%
    pivot_longer(everything(), names_to = "metric", values_to = "team_value")

  avg_vals <- league_avg %>%
    select(all_of(radar_metrics)) %>%
    pivot_longer(everything(), names_to = "metric", values_to = "avg_value")

  radar_data <- team_vals %>%
    left_join(avg_vals, by = "metric") %>%
    mutate(
      # Normalize to 0-100 scale
      team_norm = (team_value / max(team_value, avg_value)) * 100,
      avg_norm = (avg_value / max(team_value, avg_value)) * 100
    )

  print(radar_data)
}
Exercise 23.2: Set Piece Analysis & Pattern Detection

Task: Build a specialized set piece analysis system that identifies opponent corner and free kick patterns, categorizes delivery types, maps target zones, and identifies their most dangerous aerial threats.

Requirements:

  • Categorize corner deliveries (inswing, outswing, short, driven)
  • Map target zones (near post, far post, edge of box, etc.)
  • Calculate xG from set pieces vs open play
  • Identify aerial threat players by position
  • Generate defensive set piece recommendations

set_piece_analysis.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Simulate set piece data
np.random.seed(42)
n_setpieces = 150

set_piece_data = pd.DataFrame({
    'match_id': np.random.randint(1, 16, n_setpieces),
    'set_piece_type': np.random.choice(['Corner', 'Free Kick'], n_setpieces, p=[0.6, 0.4]),
    'side': np.random.choice(['Left', 'Right'], n_setpieces),
    'taker': np.random.choice([f'Taker_{i}' for i in range(1, 5)], n_setpieces),
    'delivery_type': np.random.choice(
        ['Inswinger', 'Outswinger', 'Short', 'Driven', 'Floated'],
        n_setpieces, p=[0.35, 0.25, 0.15, 0.15, 0.1]
    ),
    'target_zone': np.random.choice(
        ['Near Post', 'Central 6-yard', 'Far Post', 'Penalty Spot', 'Edge of Box'],
        n_setpieces, p=[0.2, 0.3, 0.25, 0.15, 0.1]
    ),
    'outcome': np.random.choice(
        ['Cleared', 'Shot Created', 'Goal', 'Keeper Claim', 'Out of Play', 'Second Ball'],
        n_setpieces, p=[0.35, 0.2, 0.05, 0.15, 0.15, 0.1]
    ),
    'first_contact': np.random.choice(
        ['Defender_1', 'Defender_2', 'Midfielder_1', 'Forward_1', 'Forward_2', 'Own Player'],
        n_setpieces, p=[0.25, 0.2, 0.15, 0.2, 0.15, 0.05]
    )
})

# Add xG for shots
set_piece_data['shot_xg'] = np.where(
    set_piece_data['outcome'].isin(['Shot Created', 'Goal']),
    np.random.uniform(0.05, 0.35, n_setpieces),
    np.nan
)

# Free kick specific
set_piece_data['fk_distance'] = np.where(
    set_piece_data['set_piece_type'] == 'Free Kick',
    np.random.uniform(20, 35, n_setpieces),
    np.nan
)
set_piece_data['fk_angle'] = np.where(
    set_piece_data['set_piece_type'] == 'Free Kick',
    np.random.choice(['Central', 'Wide Left', 'Wide Right'], n_setpieces),
    None
)

def analyze_set_pieces(data):
    """Comprehensive set piece analysis."""

    print("=" * 62)
    print("SET PIECE ANALYSIS REPORT")
    print("=" * 62 + "\n")

    # 1. Overview
    print("1. SET PIECE OVERVIEW")
    print("-" * 40)

    total = len(data)
    corners = (data['set_piece_type'] == 'Corner').sum()
    free_kicks = (data['set_piece_type'] == 'Free Kick').sum()
    shots = data['outcome'].isin(['Shot Created', 'Goal']).sum()
    goals = (data['outcome'] == 'Goal').sum()
    total_xg = data['shot_xg'].sum()

    print(f"  Total Set Pieces: {total}")
    print(f"  Corners: {corners} | Free Kicks: {free_kicks}")
    print(f"  Shots Created: {shots} ({shots/total*100:.1f}%)")
    print(f"  Goals: {goals} | Total xG: {total_xg:.2f}\n")

    # 2. Corner Delivery Patterns
    print("2. CORNER DELIVERY PATTERNS")
    print("-" * 40)

    corner_data = data[data['set_piece_type'] == 'Corner']
    corner_patterns = corner_data.groupby(['side', 'delivery_type']).agg({
        'match_id': 'count',
        'shot_xg': 'sum'
    }).reset_index()
    corner_patterns.columns = ['Side', 'Delivery', 'Count', 'xG']
    corner_patterns = corner_patterns.sort_values(['Side', 'Count'], ascending=[True, False])
    print(corner_patterns.to_string(index=False))

    most_common = corner_data['delivery_type'].value_counts().idxmax()
    most_common_count = corner_data['delivery_type'].value_counts().max()
    print(f"\n  Most Common Delivery: {most_common} ({most_common_count} corners)\n")

    # 3. Target Zone Analysis
    print("3. TARGET ZONE ANALYSIS")
    print("-" * 40)

    target_analysis = corner_data.groupby('target_zone').agg({
        'match_id': 'count',
        'shot_xg': 'sum',
        'outcome': lambda x: (x == 'Goal').sum()
    }).reset_index()
    target_analysis.columns = ['Target Zone', 'Count', 'xG', 'Goals']
    target_analysis['Pct'] = target_analysis['Count'] / target_analysis['Count'].sum() * 100
    target_analysis = target_analysis.sort_values('Count', ascending=False)
    print(target_analysis.to_string(index=False))

    preferred = target_analysis.iloc[0]
    print(f"\n  Primary Target: {preferred['Target Zone']} ({preferred['Pct']:.1f}% of corners)\n")

    # 4. Aerial Threats
    print("4. AERIAL THREATS")
    print("-" * 40)

    shots_created = data[data['outcome'].isin(['Shot Created', 'Goal'])]
    aerial_threats = shots_created.groupby('first_contact').agg({
        'match_id': 'count',
        'shot_xg': 'sum',
        'outcome': lambda x: (x == 'Goal').sum()
    }).reset_index()
    aerial_threats.columns = ['Player', 'Aerial Wins', 'xG', 'Goals']
    aerial_threats = aerial_threats.sort_values('xG', ascending=False)
    print(aerial_threats.to_string(index=False))

    top_threat = aerial_threats.iloc[0]
    print(f"\n  Primary Aerial Threat: {top_threat['Player']} ({top_threat['xG']:.2f} xG)")
    print("  RECOMMENDATION: Assign dedicated marker to this player\n")

    # 5. Free Kick Analysis
    print("5. FREE KICK ANALYSIS")
    print("-" * 40)

    fk_data = data[data['set_piece_type'] == 'Free Kick']
    fk_analysis = fk_data.groupby(['fk_angle', 'delivery_type']).agg({
        'match_id': 'count',
        'shot_xg': 'sum'
    }).reset_index()
    fk_analysis.columns = ['Angle', 'Delivery', 'Count', 'xG']
    fk_analysis = fk_analysis.sort_values('xG', ascending=False)
    print(fk_analysis.to_string(index=False))

    # 6. Recommendations
    print("\n6. DEFENSIVE RECOMMENDATIONS")
    print("-" * 40)

    print("  CORNERS:")
    if preferred['Target Zone'] == 'Near Post':
        print("    - Strong man at near post; attack ball aggressively")
        print("    - Second defender 2 yards behind near post marker")
    elif preferred['Target Zone'] == 'Far Post':
        print("    - Stack defenders at far post; zonal coverage")
        print("    - Keeper to command 6-yard box")
    else:
        print("    - Mixed zonal-man marking; key players man-marked")
        print("    - Two at posts for short corner threat")

    print(f"    - Primary marker on: {top_threat['Player']}")

    print("\n  FREE KICKS:")
    print("    - Wall: 4-5 players depending on distance")
    print("    - Watch for dummy runs behind wall")

    return {
        'corner_patterns': corner_patterns,
        'target_zones': target_analysis,
        'aerial_threats': aerial_threats
    }

# Run analysis
analysis = analyze_set_pieces(set_piece_data)

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Corner target zones
corner_data = set_piece_data[set_piece_data['set_piece_type'] == 'Corner']
target_counts = corner_data.groupby(['target_zone', 'delivery_type']).size().unstack(fill_value=0)

target_counts.plot(kind='bar', ax=axes[0], colormap='Set2')
axes[0].set_title('Corner Delivery by Target Zone')
axes[0].set_xlabel('Target Zone')
axes[0].set_ylabel('Count')
axes[0].tick_params(axis='x', rotation=45)
axes[0].legend(title='Delivery Type', bbox_to_anchor=(1.02, 1))

# xG by outcome
outcome_xg = set_piece_data.groupby('outcome')['shot_xg'].sum().dropna().sort_values(ascending=True)
outcome_xg.plot(kind='barh', ax=axes[1], color='coral')
axes[1].set_title('Set Piece xG by Outcome')
axes[1].set_xlabel('Total xG')

plt.tight_layout()
plt.show()
library(tidyverse)

# Simulate set piece data
set.seed(42)
n_setpieces <- 150

set_piece_data <- tibble(
  match_id = sample(1:15, n_setpieces, replace = TRUE),
  set_piece_type = sample(c("Corner", "Free Kick"), n_setpieces,
                          replace = TRUE, prob = c(0.6, 0.4)),
  side = sample(c("Left", "Right"), n_setpieces, replace = TRUE),
  taker = sample(paste0("Taker_", 1:4), n_setpieces, replace = TRUE),

  # Delivery characteristics
  delivery_type = sample(c("Inswinger", "Outswinger", "Short", "Driven", "Floated"),
                         n_setpieces, replace = TRUE,
                         prob = c(0.35, 0.25, 0.15, 0.15, 0.1)),

  # Target zone (6-yard box: near post, central, far post; edge of box)
  target_zone = sample(c("Near Post", "Central 6-yard", "Far Post",
                         "Penalty Spot", "Edge of Box"),
                       n_setpieces, replace = TRUE,
                       prob = c(0.2, 0.3, 0.25, 0.15, 0.1)),

  # Outcome
  outcome = sample(c("Cleared", "Shot Created", "Goal", "Keeper Claim",
                     "Out of Play", "Second Ball"),
                   n_setpieces, replace = TRUE,
                   prob = c(0.35, 0.2, 0.05, 0.15, 0.15, 0.1)),

  # First contact player
  first_contact = sample(c("Defender_1", "Defender_2", "Midfielder_1",
                           "Forward_1", "Forward_2", "Own Player"),
                        n_setpieces, replace = TRUE,
                        prob = c(0.25, 0.2, 0.15, 0.2, 0.15, 0.05)),

  # xG if shot created
  shot_xg = ifelse(outcome %in% c("Shot Created", "Goal"),
                   runif(n_setpieces, 0.05, 0.35), NA),

  # Free kick distance (for FK only)
  fk_distance = ifelse(set_piece_type == "Free Kick",
                       runif(n_setpieces, 20, 35), NA),
  fk_angle = ifelse(set_piece_type == "Free Kick",
                    sample(c("Central", "Wide Left", "Wide Right"), n_setpieces, replace = TRUE), NA)
)

# Analyze set piece patterns
analyze_set_pieces <- function(data) {

  cat("=" , rep("=", 60), "\n", sep = "")
  cat("SET PIECE ANALYSIS REPORT\n")
  cat("=" , rep("=", 60), "\n\n", sep = "")

  # Overall summary
  cat("1. SET PIECE OVERVIEW\n")
  cat(rep("-", 40), "\n", sep = "")

  summary_stats <- data %>%
    summarise(
      total_set_pieces = n(),
      corners = sum(set_piece_type == "Corner"),
      free_kicks = sum(set_piece_type == "Free Kick"),
      shots_created = sum(outcome %in% c("Shot Created", "Goal")),
      goals = sum(outcome == "Goal"),
      total_xg = sum(shot_xg, na.rm = TRUE)
    )

  cat(sprintf("  Total Set Pieces: %d\n", summary_stats$total_set_pieces))
  cat(sprintf("  Corners: %d | Free Kicks: %d\n", summary_stats$corners, summary_stats$free_kicks))
  cat(sprintf("  Shots Created: %d (%.1f%%)\n", summary_stats$shots_created,
              summary_stats$shots_created / summary_stats$total_set_pieces * 100))
  cat(sprintf("  Goals: %d | Total xG: %.2f\n\n", summary_stats$goals, summary_stats$total_xg))

  # Corner delivery analysis
  cat("2. CORNER DELIVERY PATTERNS\n")
  cat(rep("-", 40), "\n", sep = "")

  corner_patterns <- data %>%
    filter(set_piece_type == "Corner") %>%
    group_by(side, delivery_type) %>%
    summarise(
      count = n(),
      shots = sum(outcome %in% c("Shot Created", "Goal")),
      xg = sum(shot_xg, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    arrange(side, desc(count))

  print(corner_patterns)

  # Most common delivery
  most_common <- corner_patterns %>%
    group_by(delivery_type) %>%
    summarise(total = sum(count), .groups = "drop") %>%
    arrange(desc(total)) %>%
    slice(1)

  cat(sprintf("\n  Most Common Delivery: %s (%d corners)\n\n", most_common$delivery_type, most_common$total))

  # Target zone analysis
  cat("3. TARGET ZONE ANALYSIS\n")
  cat(rep("-", 40), "\n", sep = "")

  target_analysis <- data %>%
    filter(set_piece_type == "Corner") %>%
    group_by(target_zone) %>%
    summarise(
      count = n(),
      pct = n() / nrow(data %>% filter(set_piece_type == "Corner")) * 100,
      xg = sum(shot_xg, na.rm = TRUE),
      goals = sum(outcome == "Goal"),
      .groups = "drop"
    ) %>%
    arrange(desc(count))

  print(target_analysis)

  # Identify preferred zone
  preferred_zone <- target_analysis %>% slice(1)
  cat(sprintf("\n  Primary Target: %s (%.1f%% of corners)\n\n",
              preferred_zone$target_zone, preferred_zone$pct))

  # Aerial threat analysis
  cat("4. AERIAL THREATS\n")
  cat(rep("-", 40), "\n", sep = "")

  aerial_threats <- data %>%
    filter(outcome %in% c("Shot Created", "Goal")) %>%
    group_by(first_contact) %>%
    summarise(
      aerial_wins = n(),
      xg = sum(shot_xg, na.rm = TRUE),
      goals = sum(outcome == "Goal"),
      .groups = "drop"
    ) %>%
    arrange(desc(xg))

  print(aerial_threats)

  # Top threat recommendation
  top_threat <- aerial_threats %>% slice(1)
  cat(sprintf("\n  Primary Aerial Threat: %s (%.2f xG from set pieces)\n", top_threat$first_contact, top_threat$xg))
  cat("  RECOMMENDATION: Assign dedicated marker to this player at all corners\n\n")

  # Free kick analysis
  cat("5. FREE KICK ANALYSIS\n")
  cat(rep("-", 40), "\n", sep = "")

  fk_analysis <- data %>%
    filter(set_piece_type == "Free Kick") %>%
    group_by(fk_angle, delivery_type) %>%
    summarise(
      count = n(),
      xg = sum(shot_xg, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    arrange(desc(xg))

  print(fk_analysis)

  # Defensive recommendations
  cat("\n6. DEFENSIVE RECOMMENDATIONS\n")
  cat(rep("-", 40), "\n", sep = "")

  # Based on analysis
  cat("  CORNERS:\n")

  if (preferred_zone$target_zone == "Near Post") {
    cat("    - Strong man at near post; attack ball aggressively\n")
    cat("    - Second defender 2 yards behind near post marker\n")
  } else if (preferred_zone$target_zone == "Far Post") {
    cat("    - Stack defenders at far post; zonal coverage\n")
    cat("    - Keeper to command 6-yard box\n")
  } else {
    cat("    - Mixed zonal-man marking; key players man-marked\n")
    cat("    - Two at posts for short corner threat\n")
  }

  cat(sprintf("    - Primary marker on: %s\n\n", top_threat$first_contact))

  cat("  FREE KICKS:\n")
  cat("    - Wall: 4-5 players depending on distance\n")
  cat("    - Watch for dummy runs behind wall\n")
  cat("    - One player on post for near post deliveries\n")

  return(invisible(list(
    corner_patterns = corner_patterns,
    target_zones = target_analysis,
    aerial_threats = aerial_threats
  )))
}

# Run analysis
analysis <- analyze_set_pieces(set_piece_data)

# Visualization: Corner target zones
corner_viz <- set_piece_data %>%
  filter(set_piece_type == "Corner") %>%
  count(target_zone, delivery_type)

ggplot(corner_viz, aes(x = target_zone, y = n, fill = delivery_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Corner Delivery Patterns by Target Zone",
    x = "Target Zone", y = "Count", fill = "Delivery Type"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Exercise 23.3: Dynamic Game Model Simulator

Task: Build a tactical game simulator that models how an opponent is likely to adjust their tactics based on match state (winning, losing, drawing) and creates scenario-based game plans.

Requirements:

  • Analyze how opponent's style changes by match state
  • Model formation/personnel changes when losing
  • Create scenario plans (if ahead, if behind, etc.)
  • Identify key substitution patterns
  • Generate time-based tactical recommendations

dynamic_game_model.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Simulate match state data with tactical changes
np.random.seed(42)
n_matches = 15

def generate_match_state_data(n_matches):
    """Generate tactical data that changes based on match state."""
    all_data = []

    for match_id in range(1, n_matches + 1):
        periods = [
            ("0-15", 0, 15), ("15-30", 15, 30), ("30-45", 30, 45),
            ("45-60", 45, 60), ("60-75", 60, 75), ("75-90", 75, 90)
        ]

        goals_for = 0
        goals_against = 0

        for period_name, start, end in periods:
            goal_scored = np.random.binomial(1, 0.15)
            goal_conceded = np.random.binomial(1, 0.12)

            goals_for += goal_scored
            goals_against += goal_conceded

            if goals_for > goals_against:
                match_state = "Winning"
            elif goals_for < goals_against:
                match_state = "Losing"
            else:
                match_state = "Drawing"

            # Formation based on state and time
            if match_state == "Losing" and start >= 60:
                formation = np.random.choice(["4-3-3", "3-4-3", "4-2-4"], p=[0.3, 0.4, 0.3])
            elif match_state == "Winning" and start >= 75:
                formation = np.random.choice(["5-4-1", "4-5-1", "5-3-2"], p=[0.4, 0.4, 0.2])
            else:
                formation = "4-3-3"

            # Tactical metrics by state
            if match_state == "Losing":
                pressing = np.random.uniform(70, 95)
                directness = np.random.uniform(60, 85) if start >= 60 else np.random.uniform(45, 65)
                def_line = np.random.uniform(55, 70)
                width = np.random.uniform(65, 85)
                crosses = np.random.poisson(6) if start >= 60 else np.random.poisson(3)
                long_balls = np.random.poisson(8) if start >= 75 else np.random.poisson(4)
            elif match_state == "Winning":
                pressing = np.random.uniform(40, 60) if start >= 60 else np.random.uniform(55, 75)
                directness = np.random.uniform(35, 55)
                def_line = np.random.uniform(35, 50)
                width = np.random.uniform(50, 70)
                crosses = np.random.poisson(3)
                long_balls = np.random.poisson(4)
            else:
                pressing = np.random.uniform(55, 75)
                directness = np.random.uniform(45, 65)
                def_line = np.random.uniform(45, 60)
                width = np.random.uniform(50, 70)
                crosses = np.random.poisson(3)
                long_balls = np.random.poisson(4)

            all_data.append({
                'match_id': match_id,
                'period': period_name,
                'minute_start': start,
                'minute_end': end,
                'match_state': match_state,
                'formation': formation,
                'pressing_intensity': pressing,
                'directness': directness,
                'defensive_line': def_line,
                'attacking_width': width,
                'crosses': crosses,
                'long_balls': long_balls
            })

    return pd.DataFrame(all_data)

match_state_data = generate_match_state_data(n_matches)

# Substitution data
substitution_data = pd.DataFrame({
    'match_id': np.repeat(range(1, n_matches + 1), 3),
    'sub_number': list(range(1, 4)) * n_matches,
    'minute': np.concatenate([sorted(np.random.choice(range(55, 91), 3, replace=False)) for _ in range(n_matches)]),
    'match_state_at_sub': np.random.choice(['Drawing', 'Winning', 'Losing'], n_matches * 3, p=[0.4, 0.3, 0.3]),
    'player_off_type': np.random.choice(['Forward', 'Midfielder', 'Defender', 'Goalkeeper'], n_matches * 3, p=[0.3, 0.35, 0.3, 0.05]),
    'player_on_type': np.random.choice(['Forward', 'Midfielder', 'Defender'], n_matches * 3, p=[0.4, 0.35, 0.25])
})

def analyze_state_tactics(state_data, sub_data):
    """Analyze tactical changes by match state."""

    print("=" * 67)
    print("DYNAMIC GAME MODEL - TACTICAL STATE ANALYSIS")
    print("=" * 67 + "\n")

    # 1. Style by Match State
    print("1. TACTICAL PROFILE BY MATCH STATE")
    print("-" * 50)

    state_metrics = state_data.groupby('match_state').agg({
        'pressing_intensity': 'mean',
        'directness': 'mean',
        'defensive_line': 'mean',
        'attacking_width': 'mean',
        'crosses': 'mean',
        'long_balls': 'mean'
    }).round(1)

    print(state_metrics.to_string())

    losing = state_metrics.loc['Losing']
    winning = state_metrics.loc['Winning']

    print("\nKey Observations:")
    print(f"  When LOSING: Press intensity {losing['pressing_intensity']:.0f}% (+{losing['pressing_intensity']-winning['pressing_intensity']:.0f}% vs winning)")
    print(f"  When WINNING: Def line at {winning['defensive_line']:.0f}m ({losing['defensive_line']-winning['defensive_line']:.0f}m deeper than losing)")

    # 2. Formation Changes
    print("\n2. FORMATION CHANGES BY STATE & TIME")
    print("-" * 50)

    late_game = state_data[state_data['minute_start'] >= 60]
    formation_changes = late_game.groupby(['match_state', 'formation']).size().reset_index(name='count')
    formation_changes = formation_changes.sort_values(['match_state', 'count'], ascending=[True, False])
    print(formation_changes.to_string(index=False))

    # 3. Substitution Patterns
    print("\n3. SUBSTITUTION PATTERNS")
    print("-" * 50)

    sub_patterns = sub_data.groupby(['match_state_at_sub', 'player_on_type']).agg({
        'minute': ['count', 'mean']
    }).reset_index()
    sub_patterns.columns = ['State', 'Player Type', 'Count', 'Avg Minute']
    sub_patterns = sub_patterns.sort_values(['State', 'Count'], ascending=[True, False])
    print(sub_patterns.to_string(index=False))

    losing_subs = sub_data[sub_data['match_state_at_sub'] == 'Losing']['player_on_type'].value_counts()
    top_sub = losing_subs.idxmax()
    top_pct = losing_subs.max() / losing_subs.sum() * 100
    print(f"\n  When losing, most common sub: {top_sub} ({top_pct:.0f}% of subs)")

    # 4. Time-Based Phases
    print("\n4. TACTICAL PHASES BY TIME PERIOD")
    print("-" * 50)

    time_phases = state_data.groupby('period').agg({
        'pressing_intensity': 'mean',
        'directness': 'mean',
        'crosses': 'mean'
    }).round(1)
    print(time_phases.to_string())

    # 5. Scenario Plans
    print("\n5. SCENARIO-BASED GAME PLANS")
    print("-" * 50)

    print("\n  SCENARIO A: We are WINNING")
    print(f"    - Expect them to push higher (def line +{losing['defensive_line']-winning['defensive_line']:.0f}m)")
    print("    - Counter-attacks become more viable")
    print(f"    - Watch for increased crosses (+{losing['crosses']-state_metrics['crosses'].mean():.1f} per 15 min)")
    print("    - Likely formation change to 3-4-3 or 4-2-4 after 60'")

    print("\n  SCENARIO B: We are LOSING")
    print("    - They will drop deeper and reduce pressing")
    print("    - Expect 5-4-1 or 5-3-2 formation after 75'")
    print("    - Counter-attacks become their primary threat")

    print("\n  SCENARIO C: DRAWING late (75'+)")
    print("    - Both teams likely to take more risks")
    print("    - Space opens up in transitions")

    # 6. Key Triggers
    print("\n6. KEY TACTICAL TRIGGERS TO WATCH")
    print("-" * 50)
    print("  Minute 60: Formation likely changes if losing")
    print("  Minute 65-70: Attacking substitution if losing")
    print("  Minute 75: Defensive switch if winning (5-back)")

    late_long = state_data[state_data['minute_start'] >= 75]['long_balls'].mean()
    overall_long = state_data['long_balls'].mean()
    print(f"  Minute 80+: Long balls increase (+{late_long-overall_long:.1f} per period)")

    return state_metrics

game_model = analyze_state_tactics(match_state_data, substitution_data)

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Metrics by state
metrics = ['pressing_intensity', 'directness', 'defensive_line']
state_order = ['Losing', 'Drawing', 'Winning']

for metric in metrics:
    means = [match_state_data[match_state_data['match_state'] == s][metric].mean() for s in state_order]
    axes[0].plot(state_order, means, marker='o', label=metric)

axes[0].set_xlabel('Match State')
axes[0].set_ylabel('Value')
axes[0].set_title('Tactical Metrics by Match State')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Formation distribution when losing late
late_losing = match_state_data[
    (match_state_data['match_state'] == 'Losing') &
    (match_state_data['minute_start'] >= 60)
]
formation_counts = late_losing['formation'].value_counts()
axes[1].pie(formation_counts, labels=formation_counts.index, autopct='%1.0f%%', startangle=90)
axes[1].set_title('Formation When Losing (60+ min)')

plt.tight_layout()
plt.show()
library(tidyverse)

# Simulate match state data with tactical changes
set.seed(42)
n_matches <- 15

# Generate match data with state changes
generate_match_state_data <- function(n_matches) {
  map_dfr(1:n_matches, function(match_id) {
    # Each match has multiple time periods
    periods <- tibble(
      match_id = match_id,
      period = c("0-15", "15-30", "30-45", "45-60", "60-75", "75-90"),
      minute_start = c(0, 15, 30, 45, 60, 75),
      minute_end = c(15, 30, 45, 60, 75, 90)
    )

    # Simulate match states and goals
    states <- c("Drawing", "Winning", "Losing")
    current_state <- "Drawing"
    goals_for <- 0
    goals_against <- 0

    periods <- periods %>%
      rowwise() %>%
      mutate(
        goal_scored = rbinom(1, 1, 0.15),
        goal_conceded = rbinom(1, 1, 0.12),
        match_state = {
          if (goal_scored) goals_for <<- goals_for + 1
          if (goal_conceded) goals_against <<- goals_against + 1
          if (goals_for > goals_against) "Winning"
          else if (goals_for < goals_against) "Losing"
          else "Drawing"
        }
      ) %>%
      ungroup()

    # Add tactical metrics that change by state
    periods %>%
      mutate(
        # Formation shifts when losing late
        formation = case_when(
          match_state == "Losing" & minute_start >= 60 ~ sample(c("4-3-3", "3-4-3", "4-2-4"), 1, prob = c(0.3, 0.4, 0.3)),
          match_state == "Winning" & minute_start >= 75 ~ sample(c("5-4-1", "4-5-1", "5-3-2"), 1, prob = c(0.4, 0.4, 0.2)),
          TRUE ~ "4-3-3"
        ),

        # Pressing intensity by state
        pressing_intensity = case_when(
          match_state == "Losing" ~ runif(1, 70, 95),
          match_state == "Winning" & minute_start >= 60 ~ runif(1, 40, 60),
          TRUE ~ runif(1, 55, 75)
        ),

        # Directness by state
        directness = case_when(
          match_state == "Losing" & minute_start >= 60 ~ runif(1, 60, 85),
          match_state == "Winning" ~ runif(1, 35, 55),
          TRUE ~ runif(1, 45, 65)
        ),

        # Defensive line height
        defensive_line = case_when(
          match_state == "Winning" & minute_start >= 60 ~ runif(1, 35, 50),
          match_state == "Losing" ~ runif(1, 55, 70),
          TRUE ~ runif(1, 45, 60)
        ),

        # Width in attack
        attacking_width = case_when(
          match_state == "Losing" ~ runif(1, 65, 85),
          TRUE ~ runif(1, 50, 70)
        ),

        # Crosses per period
        crosses = case_when(
          match_state == "Losing" & minute_start >= 60 ~ rpois(1, 6),
          TRUE ~ rpois(1, 3)
        ),

        # Long balls per period
        long_balls = case_when(
          match_state == "Losing" & minute_start >= 75 ~ rpois(1, 8),
          TRUE ~ rpois(1, 4)
        )
      )
  })
}

match_state_data <- generate_match_state_data(n_matches)

# Substitution patterns
substitution_data <- tibble(
  match_id = rep(1:n_matches, each = 3),
  sub_number = rep(1:3, n_matches),
  minute = c(replicate(n_matches, sort(sample(c(55:90), 3, prob = c(rep(1, 10), rep(2, 15), rep(3, 11)))))),
  match_state_at_sub = sample(c("Drawing", "Winning", "Losing"), n_matches * 3, replace = TRUE, prob = c(0.4, 0.3, 0.3)),
  player_off_type = sample(c("Forward", "Midfielder", "Defender", "Goalkeeper"), n_matches * 3, replace = TRUE, prob = c(0.3, 0.35, 0.3, 0.05)),
  player_on_type = sample(c("Forward", "Midfielder", "Defender"), n_matches * 3, replace = TRUE, prob = c(0.4, 0.35, 0.25))
)

# Analyze tactical changes by match state
analyze_state_tactics <- function(state_data, sub_data) {

  cat("=" , rep("=", 65), "\n", sep = "")
  cat("DYNAMIC GAME MODEL - TACTICAL STATE ANALYSIS\n")
  cat("=" , rep("=", 65), "\n\n", sep = "")

  # 1. Style by Match State
  cat("1. TACTICAL PROFILE BY MATCH STATE\n")
  cat(rep("-", 50), "\n", sep = "")

  state_metrics <- state_data %>%
    group_by(match_state) %>%
    summarise(
      avg_pressing = mean(pressing_intensity),
      avg_directness = mean(directness),
      avg_def_line = mean(defensive_line),
      avg_width = mean(attacking_width),
      avg_crosses = mean(crosses),
      avg_long_balls = mean(long_balls),
      .groups = "drop"
    )

  print(state_metrics)

  cat("\nKey Observations:\n")
  losing_data <- state_metrics %>% filter(match_state == "Losing")
  winning_data <- state_metrics %>% filter(match_state == "Winning")

  cat(sprintf("  When LOSING: Press intensity %.0f%% (+%.0f%% vs winning)\n",
              losing_data$avg_pressing, losing_data$avg_pressing - winning_data$avg_pressing))
  cat(sprintf("  When WINNING: Def line at %.0fm (%.0fm deeper than losing)\n",
              winning_data$avg_def_line, losing_data$avg_def_line - winning_data$avg_def_line))

  # 2. Formation Changes
  cat("\n2. FORMATION CHANGES BY STATE & TIME\n")
  cat(rep("-", 50), "\n", sep = "")

  formation_changes <- state_data %>%
    filter(minute_start >= 60) %>%
    group_by(match_state, formation) %>%
    summarise(count = n(), .groups = "drop") %>%
    arrange(match_state, desc(count))

  print(formation_changes)

  # 3. Substitution Patterns
  cat("\n3. SUBSTITUTION PATTERNS\n")
  cat(rep("-", 50), "\n", sep = "")

  sub_patterns <- sub_data %>%
    group_by(match_state_at_sub, player_on_type) %>%
    summarise(count = n(), avg_minute = mean(minute), .groups = "drop") %>%
    arrange(match_state_at_sub, desc(count))

  print(sub_patterns)

  # Key sub pattern
  losing_subs <- sub_data %>%
    filter(match_state_at_sub == "Losing") %>%
    count(player_on_type) %>%
    arrange(desc(n)) %>%
    slice(1)

  cat(sprintf("\n  When losing, most common sub: %s (%.0f%% of subs)\n",
              losing_subs$player_on_type,
              losing_subs$n / sum(sub_data$match_state_at_sub == "Losing") * 100))

  # 4. Time-Based Tactical Phases
  cat("\n4. TACTICAL PHASES BY TIME PERIOD\n")
  cat(rep("-", 50), "\n", sep = "")

  time_phases <- state_data %>%
    group_by(period) %>%
    summarise(
      pressing = mean(pressing_intensity),
      directness = mean(directness),
      crosses = mean(crosses),
      .groups = "drop"
    )

  print(time_phases)

  # 5. Generate Scenario-Based Game Plans
  cat("\n5. SCENARIO-BASED GAME PLANS\n")
  cat(rep("-", 50), "\n", sep = "")

  cat("\n  SCENARIO A: We are WINNING\n")
  cat("    - Expect them to push higher (def line +", round(losing_data$avg_def_line - winning_data$avg_def_line, 0), "m)\n")
  cat("    - Counter-attacks become more viable\n")
  cat("    - Watch for increased crosses (", round(losing_data$avg_crosses - mean(state_metrics$avg_crosses), 1), " more per 15 min)\n")
  cat("    - Likely formation change to 3-4-3 or 4-2-4 after 60'\n")
  cat("    - Expect striker brought on around 65-70'\n")

  cat("\n  SCENARIO B: We are LOSING\n")
  cat("    - They will drop deeper and reduce pressing\n")
  cat("    - Expect 5-4-1 or 5-3-2 formation after 75'\n")
  cat("    - Counter-attacks become their primary threat\n")
  cat("    - Watch for time-wasting substitutions late\n")

  cat("\n  SCENARIO C: DRAWING late (75'+)\n")
  cat("    - Both teams likely to take more risks\n")
  cat("    - Space opens up in transitions\n")
  cat("    - Set pieces become crucial\n")

  # 6. Key Tactical Triggers
  cat("\n6. KEY TACTICAL TRIGGERS TO WATCH\n")
  cat(rep("-", 50), "\n", sep = "")

  cat("  Minute 60: Formation likely changes if losing\n")
  cat("  Minute 65-70: Attacking substitution if losing\n")
  cat("  Minute 75: Defensive switch if winning (5-back)\n")
  cat("  Minute 80+: Long balls increase significantly (+", round(max(state_data$long_balls[state_data$minute_start >= 75]) - mean(state_data$long_balls), 1), " per period)\n")

  return(invisible(list(
    state_metrics = state_metrics,
    formation_changes = formation_changes,
    sub_patterns = sub_patterns
  )))
}

# Run analysis
game_model <- analyze_state_tactics(match_state_data, substitution_data)

# Visualization
state_summary <- match_state_data %>%
  pivot_longer(cols = c(pressing_intensity, directness, defensive_line),
               names_to = "metric", values_to = "value")

ggplot(state_summary, aes(x = match_state, y = value, fill = metric)) +
  geom_boxplot() +
  facet_wrap(~metric, scales = "free_y") +
  labs(
    title = "Tactical Metrics by Match State",
    x = "Match State", y = "Value"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Summary

Key Takeaways
  • Team style analysis quantifies how opponents build attacks, press, and defend through metrics like possession share, pass length, and pressing intensity
  • Weakness identification reveals exploitable vulnerabilities in defensive organization, transitions, and set pieces
  • Key player analysis identifies the most influential opponents and generates strategies for neutralization
  • Set piece scouting uncovers delivery patterns, target zones, and defensive organization
  • Tactical recommendations translate data insights into actionable strategies for coaches and players