Chapter 60

Capstone - Complete Analytics System

Intermediate 30 min read 5 sections 10 code examples
0 of 60 chapters completed (0%)

Analytics for Broadcasters & Media

Modern football broadcasting has been transformed by analytics. From live xG graphics to in-depth tactical analysis shows, data enhances storytelling and viewer engagement. This chapter explores how media professionals can effectively use analytics in their coverage.

Analytics in Broadcast

Live broadcasts increasingly feature analytics graphics to enhance viewer understanding. The challenge is presenting complex data clearly in seconds.

Live Graphics
  • xG accumulation charts
  • Pass maps and networks
  • Heat maps
  • Sprint and distance counters
  • Formation displays
Post-Match
  • Shot maps with xG
  • Key pass visualizations
  • Pressing metrics
  • Player radar comparisons
  • Tactical breakdowns
Studio Analysis
  • Season trend analysis
  • Player comparison tools
  • Expected points tables
  • Form and fixture analysis
  • Transfer market data
broadcast_xg_timeline
# Python: Create broadcast-quality xG timeline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def create_xg_timeline(match_events, home_team, away_team,
                       home_color="#E31937", away_color="#132257"):
    """Create broadcast-quality xG timeline graphic."""

    # Filter shots and calculate cumulative xG
    shots = match_events[match_events["type"] == "Shot"].copy()
    shots["time"] = shots["minute"] + shots["second"] / 60

    # Separate by team
    home_shots = shots[shots["team"] == home_team].sort_values("time")
    away_shots = shots[shots["team"] == away_team].sort_values("time")

    # Calculate cumulative xG
    home_shots["cum_xg"] = home_shots["xg"].cumsum()
    away_shots["cum_xg"] = away_shots["xg"].cumsum()

    # Create figure with dark theme
    fig, ax = plt.subplots(figsize=(16, 9), facecolor="#0D1117")
    ax.set_facecolor("#0D1117")

    # Add start points
    home_times = [0] + home_shots["time"].tolist()
    home_xg = [0] + home_shots["cum_xg"].tolist()
    away_times = [0] + away_shots["time"].tolist()
    away_xg = [0] + away_shots["cum_xg"].tolist()

    # Step plots
    ax.step(home_times, home_xg, where="post", linewidth=3,
            color=home_color, label=home_team)
    ax.step(away_times, away_xg, where="post", linewidth=3,
            color=away_color, label=away_team)

    # Add shot markers
    ax.scatter(home_shots["time"], home_shots["cum_xg"],
               s=home_shots["xg"] * 500, color=home_color,
               alpha=0.7, edgecolors="white", linewidths=2)
    ax.scatter(away_shots["time"], away_shots["cum_xg"],
               s=away_shots["xg"] * 500, color=away_color,
               alpha=0.7, edgecolors="white", linewidths=2)

    # Add goal markers
    home_goals = home_shots[home_shots["outcome"] == "Goal"]
    away_goals = away_shots[away_shots["outcome"] == "Goal"]

    for _, goal in home_goals.iterrows():
        ax.annotate("GOAL", (goal["time"], goal["cum_xg"]),
                   textcoords="offset points", xytext=(0, 15),
                   ha="center", fontsize=10, color="white",
                   fontweight="bold")

    # Styling
    ax.set_xlim(0, 95)
    ax.set_xticks([0, 15, 30, 45, 60, 75, 90])
    ax.set_xlabel("Minutes", color="white", fontsize=14)
    ax.set_ylabel("Expected Goals (xG)", color="white", fontsize=14)
    ax.tick_params(colors="white")
    ax.spines["bottom"].set_color("#30363D")
    ax.spines["left"].set_color("#30363D")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.grid(True, alpha=0.3, color="#30363D")

    # Final xG display
    final_home = home_xg[-1] if home_xg else 0
    final_away = away_xg[-1] if away_xg else 0

    ax.text(0.02, 0.98, f"{home_team}: {final_home:.2f} xG",
            transform=ax.transAxes, fontsize=16, color=home_color,
            fontweight="bold", va="top")
    ax.text(0.02, 0.92, f"{away_team}: {final_away:.2f} xG",
            transform=ax.transAxes, fontsize=16, color=away_color,
            fontweight="bold", va="top")

    ax.legend(loc="lower right", facecolor="#0D1117",
              edgecolor="#30363D", labelcolor="white")

    plt.tight_layout()
    return fig

# Create sample visualization
# fig = create_xg_timeline(events, "Liverpool", "Man City")
# R: Create broadcast-quality xG timeline
library(tidyverse)
library(ggplot2)

# Match data
create_xg_timeline <- function(match_events) {
  # Calculate cumulative xG
  xg_timeline <- match_events %>%
    filter(type == "Shot") %>%
    arrange(minute, second) %>%
    group_by(team) %>%
    mutate(
      cumulative_xg = cumsum(xg),
      time = minute + second/60
    ) %>%
    ungroup()

  # Add start and end points
  home_team <- unique(match_events$home_team)
  away_team <- unique(match_events$away_team)

  start_points <- tibble(
    team = c(home_team, away_team),
    time = c(0, 0),
    cumulative_xg = c(0, 0)
  )

  end_time <- max(xg_timeline$time, na.rm = TRUE)
  end_points <- xg_timeline %>%
    group_by(team) %>%
    slice_max(time) %>%
    mutate(time = end_time)

  xg_timeline <- bind_rows(start_points, xg_timeline, end_points)

  # Broadcast-quality visualization
  ggplot(xg_timeline, aes(x = time, y = cumulative_xg, color = team)) +
    geom_step(linewidth = 2) +
    geom_point(data = xg_timeline %>% filter(type == "Shot"),
               aes(size = xg), alpha = 0.7) +
    scale_color_manual(values = c("#E31937", "#132257")) +
    scale_x_continuous(breaks = c(0, 15, 30, 45, 60, 75, 90),
                       limits = c(0, 95)) +
    labs(
      title = NULL,  # Broadcasters add their own
      x = "Minutes",
      y = "Expected Goals (xG)",
      color = NULL,
      size = "Shot xG"
    ) +
    theme_minimal(base_size = 18) +
    theme(
      panel.grid.minor = element_blank(),
      legend.position = "bottom",
      plot.background = element_rect(fill = "#0D1117", color = NA),
      panel.background = element_rect(fill = "#0D1117", color = NA),
      text = element_text(color = "white"),
      axis.text = element_text(color = "white"),
      panel.grid.major = element_line(color = "#30363D")
    )
}

Real-Time Graphics Pipeline

realtime_graphics
# Python: Real-time graphics pipeline
import asyncio
from datetime import datetime
from typing import Dict, Any
import matplotlib.pyplot as plt
from io import BytesIO
import base64

class BroadcastGraphicsEngine:
    """
    Real-time graphics generation for broadcast.
    """

    def __init__(self, match_id: str, output_dir: str = "./broadcast"):
        self.match_id = match_id
        self.output_dir = output_dir
        self.cache = {}
        self.last_update = None

    async def connect_to_feed(self, feed_url: str):
        """Connect to live data feed."""
        # In production, connect to StatsBomb/Opta live feed
        pass

    def generate_xg_graphic(self, events: list) -> bytes:
        """Generate xG timeline for broadcast."""
        fig = create_xg_timeline(events, "Home", "Away")

        # Export to bytes
        buf = BytesIO()
        fig.savefig(buf, format="png", dpi=100,
                   facecolor="#0D1117", bbox_inches="tight")
        plt.close(fig)
        buf.seek(0)

        return buf.getvalue()

    def generate_shot_map(self, events: list, team: str) -> bytes:
        """Generate shot map with xG coloring."""
        from mplsoccer import VerticalPitch

        pitch = VerticalPitch(half=True, pitch_color="#0D1117",
                             line_color="white")
        fig, ax = pitch.draw(figsize=(8, 8))

        shots = [e for e in events if e["type"] == "Shot"
                 and e["team"] == team]

        for shot in shots:
            color = "#E31937" if shot["outcome"] == "Goal" else "#666666"
            size = shot["xg"] * 1000

            ax.scatter(shot["x"], shot["y"], s=size, c=color,
                      alpha=0.7, edgecolors="white", linewidths=2)

        # Export
        buf = BytesIO()
        fig.savefig(buf, format="png", dpi=100,
                   facecolor="#0D1117", bbox_inches="tight")
        plt.close(fig)
        buf.seek(0)

        return buf.getvalue()

    def generate_pass_network(self, events: list, team: str) -> bytes:
        """Generate pass network visualization."""
        from mplsoccer import Pitch
        import networkx as nx

        # Build network from passes
        passes = [e for e in events if e["type"] == "Pass"
                  and e["team"] == team and e["outcome"] == "Complete"]

        # Calculate average positions and pass counts
        # ... (network building logic)

        pitch = Pitch(pitch_color="#0D1117", line_color="white")
        fig, ax = pitch.draw(figsize=(12, 8))

        # Draw network
        # ... (drawing logic)

        buf = BytesIO()
        fig.savefig(buf, format="png", dpi=100,
                   facecolor="#0D1117", bbox_inches="tight")
        plt.close(fig)
        buf.seek(0)

        return buf.getvalue()

    async def run_live_updates(self, interval_seconds: int = 30):
        """Continuously update graphics during match."""
        while True:
            try:
                # Fetch latest events
                events = await self.fetch_events()

                # Generate all graphics
                xg_graphic = self.generate_xg_graphic(events)
                shot_map = self.generate_shot_map(events, "Home")

                # Save to output directory
                timestamp = datetime.now().strftime("%H%M%S")
                with open(f"{self.output_dir}/xg_{timestamp}.png", "wb") as f:
                    f.write(xg_graphic)

                self.last_update = datetime.now()

            except Exception as e:
                print(f"Error updating graphics: {e}")

            await asyncio.sleep(interval_seconds)
# R: Real-time graphics pipeline concept
library(R6)

BroadcastGraphics <- R6Class("BroadcastGraphics",
  public = list(
    match_id = NULL,
    data_feed = NULL,

    initialize = function(match_id, feed_url) {
      self$match_id <- match_id
      # Connect to live data feed
    },

    generate_xg_graphic = function() {
      # Fetch latest data
      # Generate visualization
      # Export in broadcast format (PNG/SVG)
    },

    generate_formation_graphic = function() {
      # Current formations with player positions
    },

    generate_momentum_graphic = function(window_minutes = 10) {
      # Calculate rolling metrics for momentum display
    },

    export_for_broadcast = function(graphic, format = "png",
                                    resolution = c(1920, 1080)) {
      # Export in broadcast-ready format
      ggsave(
        filename = paste0("broadcast_", Sys.time(), ".", format),
        plot = graphic,
        width = resolution[1]/100,
        height = resolution[2]/100,
        dpi = 100
      )
    }
  )
)

Social Media Content

Analytics content thrives on social media platforms. Creating shareable visualizations requires understanding platform constraints and audience expectations.

social_media_graphics
# Python: Social media optimized visualizations
import matplotlib.pyplot as plt
from mplsoccer import Pitch, VerticalPitch
from PIL import Image
import numpy as np

class SocialMediaGraphics:
    """Create platform-optimized football graphics."""

    DIMENSIONS = {
        "twitter": (1200, 675),
        "instagram_square": (1080, 1080),
        "instagram_story": (1080, 1920),
        "linkedin": (1200, 627)
    }

    def __init__(self, brand_colors=None):
        self.brand_colors = brand_colors or {
            "primary": "#1B5E20",
            "secondary": "#FFD700",
            "background": "#0D1117",
            "text": "#FFFFFF"
        }

    def create_shot_map(self, shots_df, team_name, platform="twitter"):
        """Create shot map optimized for social media."""

        width, height = self.DIMENSIONS[platform]
        fig_width = width / 100
        fig_height = height / 100

        # Create pitch
        pitch = VerticalPitch(
            half=True,
            pitch_color=self.brand_colors["background"],
            line_color="#30363D",
            linewidth=1
        )

        fig, ax = pitch.draw(figsize=(fig_width, fig_height))
        fig.patch.set_facecolor(self.brand_colors["background"])

        # Plot shots
        for _, shot in shots_df.iterrows():
            color = self.brand_colors["secondary"] if shot["outcome"] == "Goal" \
                    else "#FFFFFF"
            size = shot["xg"] * 800 + 50

            ax.scatter(shot["x"], shot["y"], s=size, c=color,
                      alpha=0.8, edgecolors="white", linewidths=2, zorder=5)

        # Add title and stats
        total_xg = shots_df["xg"].sum()
        goals = len(shots_df[shots_df["outcome"] == "Goal"])

        fig.text(0.5, 0.95, f"{team_name} Shot Map",
                fontsize=20, color="white", ha="center",
                fontweight="bold")
        fig.text(0.5, 0.90, f"{goals} Goals | {total_xg:.2f} xG",
                fontsize=14, color="#8b949e", ha="center")
        fig.text(0.5, 0.02, "@YourHandle | Data: StatsBomb",
                fontsize=10, color="#8b949e", ha="center")

        plt.tight_layout()
        return fig

    def create_comparison_graphic(self, player1_data, player2_data,
                                  platform="twitter"):
        """Create player comparison graphic."""

        width, height = self.DIMENSIONS[platform]

        fig, axes = plt.subplots(1, 2, figsize=(width/100, height/100))
        fig.patch.set_facecolor(self.brand_colors["background"])

        metrics = ["Goals", "xG", "Assists", "xA", "Shots", "Key Passes"]

        for ax, player_data, side in zip(axes, [player1_data, player2_data],
                                         ["left", "right"]):
            ax.set_facecolor(self.brand_colors["background"])

            values = [player_data[m.lower().replace(" ", "_")] for m in metrics]

            # Horizontal bar chart
            y_pos = np.arange(len(metrics))
            ax.barh(y_pos, values, color=self.brand_colors["primary"],
                   alpha=0.8)

            ax.set_yticks(y_pos)
            ax.set_yticklabels(metrics if side == "left" else [],
                              color="white")
            ax.set_xlabel(player_data["name"], color="white", fontsize=12)

            if side == "right":
                ax.invert_xaxis()

            ax.tick_params(colors="white")
            for spine in ax.spines.values():
                spine.set_visible(False)

        fig.suptitle("Player Comparison", color="white",
                    fontsize=18, fontweight="bold")

        plt.tight_layout()
        return fig

    def create_matchday_graphic(self, fixtures, platform="instagram_story"):
        """Create matchday fixtures graphic."""

        width, height = self.DIMENSIONS[platform]
        fig, ax = plt.subplots(figsize=(width/100, height/100))
        fig.patch.set_facecolor(self.brand_colors["background"])
        ax.set_facecolor(self.brand_colors["background"])

        # Title
        ax.text(0.5, 0.95, "MATCHDAY", transform=ax.transAxes,
               fontsize=32, color="white", ha="center",
               fontweight="bold")

        # Fixtures
        for i, fixture in enumerate(fixtures):
            y_pos = 0.85 - (i * 0.12)

            ax.text(0.5, y_pos,
                   f"{fixture['home']} vs {fixture['away']}",
                   transform=ax.transAxes, fontsize=16,
                   color="white", ha="center")
            ax.text(0.5, y_pos - 0.03,
                   fixture["time"],
                   transform=ax.transAxes, fontsize=12,
                   color="#8b949e", ha="center")

        ax.axis("off")
        return fig

# Usage
graphics = SocialMediaGraphics()
# shot_map = graphics.create_shot_map(shots_df, "Liverpool", "twitter")
# plt.savefig("shot_map_twitter.png", dpi=100, facecolor="#0D1117")
# R: Social media optimized visualizations
library(tidyverse)
library(ggplot2)

# Platform-specific dimensions
social_dimensions <- list(
  twitter = c(width = 1200, height = 675),    # 16:9
  instagram_square = c(width = 1080, height = 1080),
  instagram_story = c(width = 1080, height = 1920),
  linkedin = c(width = 1200, height = 627)
)

create_social_shot_map <- function(shots_data, team_name,
                                   platform = "twitter") {
  dims <- social_dimensions[[platform]]

  p <- ggplot() +
    # Pitch background
    annotate("rect", xmin = 0, xmax = 120, ymin = 0, ymax = 80,
             fill = "#1a472a") +
    # Shots
    geom_point(data = shots_data,
               aes(x = x, y = y, size = xg,
                   color = ifelse(outcome == "Goal", "Goal", "No Goal")),
               alpha = 0.8) +
    scale_color_manual(values = c("Goal" = "#FFD700", "No Goal" = "#FFFFFF")) +
    scale_size_continuous(range = c(3, 15)) +
    # Team branding
    labs(
      title = paste(team_name, "Shot Map"),
      subtitle = paste0("Total xG: ", round(sum(shots_data$xg), 2)),
      caption = "@YourHandle | Data: StatsBomb"
    ) +
    theme_void() +
    theme(
      plot.background = element_rect(fill = "#0D1117", color = NA),
      plot.title = element_text(color = "white", size = 24,
                                face = "bold", hjust = 0.5),
      plot.subtitle = element_text(color = "#8b949e", size = 16,
                                   hjust = 0.5),
      plot.caption = element_text(color = "#8b949e", size = 10),
      legend.position = "bottom",
      legend.text = element_text(color = "white")
    )

  ggsave("shot_map.png", p,
         width = dims["width"]/100, height = dims["height"]/100,
         dpi = 100)

  p
}

# Thread-ready multiple graphics
create_analysis_thread <- function(match_data) {
  graphics <- list()

  # 1. Score and xG summary
  graphics$summary <- create_match_summary(match_data)

  # 2. Shot maps for both teams
  graphics$home_shots <- create_social_shot_map(
    match_data$shots %>% filter(team == "home"),
    match_data$home_team
  )

  # 3. Pass network
  graphics$pass_network <- create_pass_network(match_data)

  # 4. Key player radar
  graphics$player_radar <- create_player_radar(match_data$top_performer)

  graphics
}

Data-Driven Writing

Effective analytics journalism combines statistical rigor with compelling narrative. The key is making data accessible without oversimplifying.

Writing Best Practices
  1. Lead with the story, not the stat: "Liverpool's attack is broken" not "Liverpool's xG is down 0.4 per game"
  2. Explain metrics in context: Always provide comparison points and benchmarks
  3. Use visuals to support, not replace, narrative: Every chart needs explanation
  4. Acknowledge uncertainty: Sample sizes, luck, and limitations matter
  5. Make it actionable: What does this mean for the team/player?
article_generation
# Python: Generate article data points and templates
import pandas as pd
import numpy as np
from scipy import stats

class ArticleGenerator:
    """Generate data points and text for analytics articles."""

    def __init__(self, player_data, league_data):
        self.player = player_data
        self.league = league_data
        self.stats = self._calculate_stats()

    def _calculate_stats(self):
        """Calculate all relevant statistics for the article."""

        position_peers = self.league[
            (self.league["position"] == self.player["position"]) &
            (self.league["minutes"] >= 900)
        ]

        stats = {}

        # Percentile rankings
        stats["xg_percentile"] = stats.percentileofscore(
            position_peers["xg_per90"], self.player["xg_per90"]
        )
        stats["xa_percentile"] = stats.percentileofscore(
            position_peers["xa_per90"], self.player["xa_per90"]
        )

        # Sample size context
        stats["sample_size"] = self.player["minutes"] / 90
        stats["sample_reliability"] = "robust" if stats["sample_size"] > 15 \
                                      else "emerging" if stats["sample_size"] > 8 \
                                      else "limited"

        # Trend analysis
        stats["xg_trend"] = self.player["recent_xg_per90"] - self.player["season_xg_per90"]
        stats["trend_direction"] = "improving" if stats["xg_trend"] > 0.05 \
                                   else "declining" if stats["xg_trend"] < -0.05 \
                                   else "stable"

        # Over/underperformance
        stats["goals_vs_xg"] = self.player["goals"] - self.player["xg"]
        stats["performance_label"] = "overperforming" if stats["goals_vs_xg"] > 2 \
                                     else "underperforming" if stats["goals_vs_xg"] < -2 \
                                     else "performing as expected"

        return stats

    def generate_lede(self):
        """Generate article opening paragraph."""

        templates = {
            "overperforming": f"{self.player['name']} is riding a hot streak. "
                             f"With {self.player['goals']} goals from just "
                             f"{self.player['xg']:.1f} xG, the {self.player['position']} "
                             f"is currently {self.stats['performance_label']} expectations.",

            "underperforming": f"The numbers suggest {self.player['name']} is due for a "
                              f"breakthrough. Despite registering {self.player['xg']:.1f} xG, "
                              f"the {self.player['position']} has only {self.player['goals']} "
                              f"goals to show for it.",

            "performing as expected": f"{self.player['name']}'s output this season tells a "
                                      f"story of consistency. The {self.player['position']}'s "
                                      f"{self.player['goals']} goals align almost perfectly "
                                      f"with their {self.player['xg']:.1f} xG."
        }

        return templates[self.stats["performance_label"]]

    def generate_context_paragraph(self):
        """Generate contextual comparison paragraph."""

        percentile = round(self.stats["xg_percentile"])

        if percentile >= 90:
            tier = "elite"
            comparison = "among the league's best"
        elif percentile >= 75:
            tier = "above average"
            comparison = "better than most peers"
        elif percentile >= 50:
            tier = "average"
            comparison = "roughly average for their position"
        else:
            tier = "below average"
            comparison = "below the typical output"

        return (f"In context, {self.player['name']}'s {self.player['xg_per90']:.2f} xG "
               f"per 90 ranks in the {percentile}th percentile among "
               f"{self.player['position']}s with 900+ minutes. That places them "
               f"{comparison} - {tier} production for their role.")

    def generate_sample_size_caveat(self):
        """Generate appropriate sample size caveat."""

        minutes = self.player["minutes"]

        if minutes < 450:
            return (f"With only {minutes} minutes played, these numbers should be "
                   f"treated with significant caution. Small samples can be misleading.")
        elif minutes < 900:
            return (f"At {minutes} minutes, we're seeing an emerging picture, "
                   f"though more playing time would increase confidence in these trends.")
        else:
            return ""  # No caveat needed

# Usage
# generator = ArticleGenerator(player_data, league_data)
# print(generator.generate_lede())
# print(generator.generate_context_paragraph())
# R: Generate article data points
library(tidyverse)

generate_article_stats <- function(player_data, league_data) {
  # Calculate key talking points

  stats <- list()

  # Percentile rankings
  stats$xg_percentile <- ecdf(league_data$xg_per90)(player_data$xg_per90) * 100
  stats$xa_percentile <- ecdf(league_data$xa_per90)(player_data$xa_per90) * 100

  # Trend analysis
  stats$xg_trend <- player_data$recent_xg - player_data$season_xg
  stats$form_description <- case_when(
    stats$xg_trend > 0.2 ~ "dramatically improved",
    stats$xg_trend > 0.1 ~ "trending upward",
    stats$xg_trend < -0.2 ~ "significantly declined",
    stats$xg_trend < -0.1 ~ "trending downward",
    TRUE ~ "maintained consistent"
  )

  # Comparison to peers
  similar_players <- league_data %>%
    filter(position == player_data$position,
           minutes >= 900) %>%
    mutate(similarity = abs(xg_per90 - player_data$xg_per90) +
                        abs(xa_per90 - player_data$xa_per90)) %>%
    arrange(similarity) %>%
    head(5)

  stats$comparisons <- similar_players$player

  # Generate sentence templates
  stats$sentences <- list(
    performance = glue::glue(
      "{player_data$name} ranks in the {round(stats$xg_percentile)}th percentile ",
      "for xG per 90 among {player_data$position}s in the Premier League."
    ),
    trend = glue::glue(
      "Their attacking output has {stats$form_description} over the past 10 matches, ",
      "with xG per 90 {ifelse(stats$xg_trend >= 0, 'up', 'down')} ",
      "{abs(round(stats$xg_trend, 2))} from their season average."
    ),
    comparison = glue::glue(
      "Statistically similar players include ",
      "{paste(head(stats$comparisons, 3), collapse = ', ')}."
    )
  )

  stats
}

Interactive Web Visualizations

Modern media outlets increasingly feature interactive graphics that let readers explore data themselves. These require web development skills alongside data analysis.

interactive_viz
# Python: Create interactive visualizations
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, callback, Input, Output
import pandas as pd

class InteractiveVisualizations:
    """Create interactive web-based football visualizations."""

    @staticmethod
    def create_interactive_shot_map(shots_df):
        """Create interactive shot map with Plotly."""

        # Add hover text
        shots_df["hover_text"] = shots_df.apply(
            lambda r: f"Player: {r['player']}<br>"
                     f"xG: {r['xg']:.2f}<br>"
                     f"Minute: {r['minute']}",
            axis=1
        )

        fig = go.Figure()

        # Draw pitch (simplified)
        fig.add_shape(type="rect", x0=60, x1=120, y0=0, y1=80,
                     fillcolor="#1a472a", line_color="white")
        fig.add_shape(type="rect", x0=102, x1=120, y0=18, y1=62,
                     line_color="white", fillcolor="rgba(0,0,0,0)")
        fig.add_shape(type="rect", x0=114, x1=120, y0=30, y1=50,
                     line_color="white", fillcolor="rgba(0,0,0,0)")

        # Add shots
        goals = shots_df[shots_df["outcome"] == "Goal"]
        non_goals = shots_df[shots_df["outcome"] != "Goal"]

        fig.add_trace(go.Scatter(
            x=non_goals["x"], y=non_goals["y"],
            mode="markers",
            marker=dict(
                size=non_goals["xg"] * 50 + 5,
                color="white",
                opacity=0.6,
                line=dict(color="white", width=1)
            ),
            text=non_goals["hover_text"],
            hoverinfo="text",
            name="No Goal"
        ))

        fig.add_trace(go.Scatter(
            x=goals["x"], y=goals["y"],
            mode="markers",
            marker=dict(
                size=goals["xg"] * 50 + 5,
                color="#FFD700",
                opacity=0.9,
                line=dict(color="white", width=2)
            ),
            text=goals["hover_text"],
            hoverinfo="text",
            name="Goal"
        ))

        fig.update_layout(
            plot_bgcolor="#0D1117",
            paper_bgcolor="#0D1117",
            font_color="white",
            showlegend=True,
            xaxis=dict(visible=False, range=[55, 125]),
            yaxis=dict(visible=False, range=[-5, 85]),
            height=600
        )

        return fig

    @staticmethod
    def create_xg_timeline_interactive(events_df, home_team, away_team):
        """Create interactive xG timeline."""

        shots = events_df[events_df["type"] == "Shot"].copy()
        shots["time"] = shots["minute"] + shots["second"] / 60

        fig = go.Figure()

        for team, color in [(home_team, "#E31937"), (away_team, "#132257")]:
            team_shots = shots[shots["team"] == team].sort_values("time")
            team_shots["cum_xg"] = team_shots["xg"].cumsum()

            # Add start point
            times = [0] + team_shots["time"].tolist()
            cum_xg = [0] + team_shots["cum_xg"].tolist()

            fig.add_trace(go.Scatter(
                x=times, y=cum_xg,
                mode="lines+markers",
                name=team,
                line=dict(color=color, width=3, shape="hv"),
                marker=dict(size=8)
            ))

            # Add goal annotations
            goals = team_shots[team_shots["outcome"] == "Goal"]
            for _, goal in goals.iterrows():
                fig.add_annotation(
                    x=goal["time"], y=goal["cum_xg"],
                    text="GOAL",
                    showarrow=True,
                    arrowhead=2,
                    arrowcolor=color,
                    font=dict(color="white", size=10)
                )

        fig.update_layout(
            title="Expected Goals Timeline",
            xaxis_title="Minutes",
            yaxis_title="Cumulative xG",
            plot_bgcolor="#0D1117",
            paper_bgcolor="#0D1117",
            font_color="white",
            xaxis=dict(range=[0, 95], tickvals=[0, 15, 30, 45, 60, 75, 90]),
            height=500
        )

        return fig

# Dash app for interactive dashboard
def create_dash_app():
    """Create full interactive dashboard with Dash."""

    app = Dash(__name__)

    app.layout = html.Div([
        html.H1("Football Analytics Dashboard",
               style={"color": "white", "textAlign": "center"}),

        html.Div([
            html.Label("Select Team:", style={"color": "white"}),
            dcc.Dropdown(
                id="team-dropdown",
                options=[
                    {"label": "Arsenal", "value": "Arsenal"},
                    {"label": "Chelsea", "value": "Chelsea"},
                    {"label": "Liverpool", "value": "Liverpool"}
                ],
                value="Arsenal",
                style={"width": "200px"}
            ),

            html.Label("Select Metric:", style={"color": "white", "marginLeft": "20px"}),
            dcc.Dropdown(
                id="metric-dropdown",
                options=[
                    {"label": "Expected Goals (xG)", "value": "xg"},
                    {"label": "Expected Assists (xA)", "value": "xa"},
                    {"label": "Passes", "value": "passes"}
                ],
                value="xg",
                style={"width": "200px"}
            )
        ], style={"display": "flex", "marginBottom": "20px"}),

        html.Div([
            dcc.Graph(id="shot-map", style={"width": "50%"}),
            dcc.Graph(id="xg-timeline", style={"width": "50%"})
        ], style={"display": "flex"}),

        html.Div([
            dcc.Graph(id="player-stats")
        ])
    ], style={"backgroundColor": "#0D1117", "padding": "20px"})

    @callback(
        Output("shot-map", "figure"),
        Input("team-dropdown", "value")
    )
    def update_shot_map(team):
        shots = get_team_shots(team)  # Load data
        return InteractiveVisualizations.create_interactive_shot_map(shots)

    return app

# Run with: app.run_server(debug=True)
# R: Create interactive visualizations with Shiny
library(shiny)
library(plotly)
library(tidyverse)

# Interactive shot map app
shot_map_app <- function() {
    ui <- fluidPage(
        theme = bslib::bs_theme(bootswatch = "darkly"),

        titlePanel("Interactive Shot Map"),

        sidebarLayout(
            sidebarPanel(
                selectInput("team", "Select Team",
                           choices = c("Arsenal", "Chelsea", "Liverpool")),
                selectInput("player", "Select Player",
                           choices = NULL),
                sliderInput("xg_min", "Minimum xG",
                           min = 0, max = 1, value = 0, step = 0.05),
                checkboxGroupInput("outcome", "Shot Outcome",
                                  choices = c("Goal", "Saved", "Blocked", "Off Target"),
                                  selected = c("Goal", "Saved", "Blocked", "Off Target"))
            ),

            mainPanel(
                plotlyOutput("shot_map", height = "600px"),
                tableOutput("shot_stats")
            )
        )
    )

    server <- function(input, output, session) {
        # Update player choices based on team
        observe({
            players <- get_players_for_team(input$team)
            updateSelectInput(session, "player", choices = players)
        })

        filtered_shots <- reactive({
            shots_data %>%
                filter(team == input$team,
                       player == input$player,
                       xg >= input$xg_min,
                       outcome %in% input$outcome)
        })

        output$shot_map <- renderPlotly({
            shots <- filtered_shots()

            p <- ggplot(shots, aes(x = x, y = y, size = xg,
                                   color = outcome,
                                   text = paste("xG:", round(xg, 2),
                                              "<br>Min:", minute))) +
                # Pitch drawing
                annotate("rect", xmin = 60, xmax = 120, ymin = 0, ymax = 80,
                        fill = "#1a472a") +
                geom_point(alpha = 0.7) +
                scale_color_manual(values = c("Goal" = "#FFD700",
                                             "Saved" = "#FFFFFF",
                                             "Blocked" = "#888888",
                                             "Off Target" = "#444444")) +
                theme_void() +
                theme(plot.background = element_rect(fill = "#0D1117"))

            ggplotly(p, tooltip = "text")
        })

        output$shot_stats <- renderTable({
            shots <- filtered_shots()

            tibble(
                `Total Shots` = nrow(shots),
                `Total xG` = sum(shots$xg),
                Goals = sum(shots$outcome == "Goal"),
                `Goals - xG` = sum(shots$outcome == "Goal") - sum(shots$xg)
            )
        })
    }

    shinyApp(ui, server)
}

Podcast and Video Content

Audio and video content allows for deeper exploration of analytics topics. Successful content combines entertainment with education.

Video Content Tips
  • Hook in 5 seconds: Start with the insight, not the methodology
  • Show, don't tell: Use animations and visual examples
  • Keep graphics on screen: Viewers need time to absorb
  • Use concrete examples: Reference real matches and players
  • Include timestamps: Let viewers jump to sections
  • Call to action: Ask questions, encourage engagement
Podcast Tips
  • Prepare talking points: Have stats ready but conversation natural
  • Explain context: Listeners can't see your graphs
  • Use round numbers: "About 0.5 xG" not "0.487 xG"
  • Tell stories: Data supports narrative, doesn't replace it
  • Invite guests: Different perspectives add value
  • Reference show notes: "Check the link for the full analysis"
video_content
# Python: Video content planning and automation
import pandas as pd
from dataclasses import dataclass
from typing import List, Dict

@dataclass
class VideoSection:
    title: str
    duration_seconds: int
    content: str
    visuals: List[str]
    data_points: List[str]

class VideoContentPlanner:
    """Plan and structure analytics video content."""

    def __init__(self, topic: str, target_duration_minutes: int = 10):
        self.topic = topic
        self.target_duration = target_duration_minutes * 60

    def create_script_outline(self, data_analysis: Dict) -> List[VideoSection]:
        """Generate video script outline from analysis."""

        sections = [
            VideoSection(
                title="Hook",
                duration_seconds=15,
                content=f"Did you know that {data_analysis['headline']}?",
                visuals=["Dramatic stat reveal"],
                data_points=[data_analysis["hook_stat"]]
            ),
            VideoSection(
                title="Introduction",
                duration_seconds=30,
                content="Welcome back to the channel. Today we're diving into...",
                visuals=["Channel branding", "Topic title card"],
                data_points=[]
            ),
            VideoSection(
                title="Context",
                duration_seconds=60,
                content=f"Before we get into the numbers, let's understand why "
                       f"this matters: {data_analysis['context']}",
                visuals=["Season timeline", "League table position"],
                data_points=[data_analysis["context_stats"]]
            ),
            VideoSection(
                title="The Data",
                duration_seconds=180,
                content="Let's look at what the data actually shows us...",
                visuals=["Shot map", "xG timeline", "Comparison charts"],
                data_points=data_analysis["main_points"]
            ),
            VideoSection(
                title="Key Insight",
                duration_seconds=120,
                content=f"Here's the most interesting finding: "
                       f"{data_analysis['key_insight']}",
                visuals=["Animated highlight", "Statistical breakdown"],
                data_points=[data_analysis["insight_stat"]]
            ),
            VideoSection(
                title="Implications",
                duration_seconds=90,
                content="So what does this mean going forward?",
                visuals=["Future projections", "Recommendations"],
                data_points=data_analysis.get("predictions", [])
            ),
            VideoSection(
                title="Conclusion",
                duration_seconds=30,
                content="That's all for today. If you found this useful...",
                visuals=["Subscribe button", "Related video suggestions"],
                data_points=[]
            )
        ]

        return sections

    def calculate_duration(self, sections: List[VideoSection]) -> Dict:
        """Calculate total duration and pacing metrics."""

        total = sum(s.duration_seconds for s in sections)
        data_heavy = sum(s.duration_seconds for s in sections
                        if len(s.data_points) > 0)

        return {
            "total_seconds": total,
            "total_minutes": round(total / 60, 1),
            "data_coverage_pct": round(data_heavy / total * 100, 1),
            "over_target_by": total - self.target_duration
        }

    def generate_visual_list(self, sections: List[VideoSection]) -> List[str]:
        """Generate list of all visuals needed."""

        all_visuals = []
        for section in sections:
            for visual in section.visuals:
                all_visuals.append({
                    "section": section.title,
                    "visual": visual,
                    "duration": section.duration_seconds
                })

        return all_visuals

class PodcastEpisodePlanner:
    """Plan analytics podcast episodes."""

    def __init__(self, episode_title: str, target_minutes: int = 45):
        self.title = episode_title
        self.target_minutes = target_minutes

    def create_episode_structure(self, topics: List[Dict]) -> List[Dict]:
        """Create episode structure with talking points."""

        structure = [
            {
                "segment": "Opening",
                "duration": 3,
                "content": "Welcome, preview of episode, housekeeping"
            }
        ]

        # Distribute time across topics
        content_time = self.target_minutes - 8  # Reserve intro/outro
        per_topic = content_time // len(topics)

        for topic in topics:
            structure.append({
                "segment": topic["title"],
                "duration": per_topic,
                "content": topic["description"],
                "talking_points": topic["key_stats"],
                "questions_to_ask": topic.get("questions", [])
            })

        structure.append({
            "segment": "Closing",
            "duration": 5,
            "content": "Summary, preview next episode, call to action"
        })

        return structure

    def prepare_stat_cards(self, topics: List[Dict]) -> List[Dict]:
        """Prepare stat cards for quick reference during recording."""

        cards = []
        for topic in topics:
            cards.append({
                "topic": topic["title"],
                "headline_stat": topic["key_stats"][0],
                "supporting_stats": topic["key_stats"][1:3],
                "comparison": topic.get("comparison"),
                "caveat": topic.get("sample_size_note")
            })

        return cards

# Usage
planner = VideoContentPlanner("Liverpool's Defensive Transformation", 10)
analysis = {
    "headline": "Liverpool have conceded 40% fewer xG since switching to a back 3",
    "hook_stat": "0.8 xG against per game vs 1.4 last season",
    "context": "After a slow start, Slot made tactical changes",
    "context_stats": "First 10 games vs last 10 games comparison",
    "main_points": ["xG against dropped", "Press success up", "Line height changed"],
    "key_insight": "The back 3 allows Robertson to push higher",
    "insight_stat": "Robertson progressive passes up 35%"
}

sections = planner.create_script_outline(analysis)
duration = planner.calculate_duration(sections)
print(f"Video Duration: {duration['total_minutes']} minutes")
# R: Generate video script outline
library(tidyverse)
library(glue)

generate_video_script <- function(topic, data_points, duration_minutes = 10) {
    # Structure for analytics video
    script <- list(
        hook = list(
            duration = 15,
            content = glue("Open with the key insight: {data_points$headline}")
        ),

        intro = list(
            duration = 30,
            content = "Brief intro, what viewers will learn"
        ),

        context = list(
            duration = 60,
            content = glue("Why this matters: {data_points$context}"),
            visuals = c("Team badge", "Season overview graphic")
        ),

        methodology = list(
            duration = 45,
            content = "Quick explanation of metrics used",
            visuals = c("xG explainer", "Data source logos")
        ),

        analysis = list(
            duration = 300,
            content = "Main analysis with multiple data points",
            sections = data_points$sections,
            visuals = c("Shot map", "Timeline", "Comparison charts")
        ),

        implications = list(
            duration = 90,
            content = "What this means for the team/player",
            visuals = c("Future fixtures", "Key matchups")
        ),

        conclusion = list(
            duration = 30,
            content = "Summary and call to action",
            visuals = c("Subscribe reminder", "Next video preview")
        )
    )

    # Calculate total duration
    total <- sum(sapply(script, function(x) x$duration))
    cat(glue("Total script duration: {total} seconds ({round(total/60, 1)} minutes)\n"))

    script
}

Monetization and Content Strategy

Sustainable analytics content requires a business model. Understanding monetization options helps you invest in quality content creation.

Revenue Stream Audience Required Typical Earnings Considerations
Substack/Newsletter 1,000+ subscribers £5-15/month per paid sub Need consistent quality content
YouTube AdSense 1,000 subs + 4,000 watch hours £1-5 per 1,000 views Niche content has lower CPM
Patreon/Ko-fi 500+ engaged followers £3-20/month per patron Need exclusive content tiers
Freelance Writing Strong portfolio £100-500 per article Builds on public reputation
Consulting Expertise + network £50-200/hour Time-intensive but high value
Courses/Products 10,000+ audience Variable (£50-500 per sale) High upfront creation cost
content_strategy
# Python: Content strategy and analytics
import pandas as pd
from datetime import datetime, timedelta
from typing import List, Dict

class ContentStrategyManager:
    """Manage and optimize analytics content strategy."""

    def __init__(self):
        self.content_log = []
        self.revenue_log = []

    def log_content(self, platform: str, content_type: str,
                   topic: str, views: int, engagement: int,
                   time_hours: float, revenue: float = 0):
        """Log content performance."""

        self.content_log.append({
            "date": datetime.now(),
            "platform": platform,
            "content_type": content_type,
            "topic": topic,
            "views": views,
            "engagement": engagement,
            "time_hours": time_hours,
            "revenue": revenue,
            "views_per_hour": views / time_hours,
            "engagement_rate": engagement / views * 100 if views > 0 else 0,
            "revenue_per_hour": revenue / time_hours
        })

    def analyze_performance(self) -> pd.DataFrame:
        """Analyze content performance by platform and type."""

        df = pd.DataFrame(self.content_log)

        summary = df.groupby(["platform", "content_type"]).agg({
            "views": "sum",
            "engagement": "sum",
            "time_hours": "sum",
            "revenue": "sum",
            "views_per_hour": "mean",
            "engagement_rate": "mean"
        }).round(2)

        summary["total_roi"] = summary["views"] / summary["time_hours"]

        return summary.sort_values("total_roi", ascending=False)

    def recommend_focus(self) -> Dict:
        """Recommend content focus based on performance."""

        df = pd.DataFrame(self.content_log)

        if len(df) < 5:
            return {"recommendation": "Not enough data yet"}

        best_roi = df.nlargest(3, "views_per_hour")
        best_engagement = df.nlargest(3, "engagement_rate")
        best_revenue = df.nlargest(3, "revenue_per_hour")

        return {
            "highest_reach": {
                "platform": best_roi["platform"].mode()[0],
                "content_type": best_roi["content_type"].mode()[0],
                "avg_views_per_hour": best_roi["views_per_hour"].mean()
            },
            "highest_engagement": {
                "platform": best_engagement["platform"].mode()[0],
                "content_type": best_engagement["content_type"].mode()[0],
                "avg_engagement_rate": best_engagement["engagement_rate"].mean()
            },
            "highest_revenue": {
                "platform": best_revenue["platform"].mode()[0],
                "content_type": best_revenue["content_type"].mode()[0],
                "avg_revenue_per_hour": best_revenue["revenue_per_hour"].mean()
            },
            "recommendation": self._generate_recommendation(df)
        }

    def _generate_recommendation(self, df: pd.DataFrame) -> str:
        """Generate strategic recommendation."""

        best_platform = df.groupby("platform")["views_per_hour"].mean().idxmax()
        best_type = df.groupby("content_type")["engagement_rate"].mean().idxmax()

        return (f"Focus on {best_type} content on {best_platform} "
               f"for optimal reach and engagement balance.")

    def content_calendar(self, weeks: int = 4) -> List[Dict]:
        """Generate content calendar based on analysis."""

        # Analyze best performing days/topics
        df = pd.DataFrame(self.content_log)

        calendar = []
        start_date = datetime.now()

        for week in range(weeks):
            week_start = start_date + timedelta(weeks=week)

            # Sample schedule
            calendar.append({
                "week": week + 1,
                "monday": {"type": "Thread", "platform": "Twitter"},
                "wednesday": {"type": "Article", "platform": "Substack"},
                "friday": {"type": "Visualization", "platform": "Twitter"},
                "saturday": {"type": "Match Review", "platform": "YouTube"}
            })

        return calendar

# Newsletter growth tracking
class NewsletterGrowth:
    """Track newsletter subscriber growth and engagement."""

    def __init__(self, start_subscribers: int = 0):
        self.subscribers = start_subscribers
        self.history = []

    def add_week(self, new_subs: int, churned: int, open_rate: float):
        """Add weekly metrics."""

        self.subscribers = self.subscribers + new_subs - churned
        self.history.append({
            "date": datetime.now(),
            "total_subscribers": self.subscribers,
            "new": new_subs,
            "churned": churned,
            "net_growth": new_subs - churned,
            "open_rate": open_rate
        })

    def project_growth(self, weeks: int = 12) -> pd.DataFrame:
        """Project subscriber growth."""

        if len(self.history) < 4:
            return None

        df = pd.DataFrame(self.history)
        avg_net_growth = df["net_growth"].mean()

        projections = []
        current = self.subscribers

        for week in range(1, weeks + 1):
            current += avg_net_growth
            projections.append({
                "week": week,
                "projected_subscribers": int(current)
            })

        return pd.DataFrame(projections)

    def monetization_potential(self, paid_conversion_rate: float = 0.05,
                               monthly_price: float = 5) -> Dict:
        """Calculate monetization potential."""

        potential_paid = int(self.subscribers * paid_conversion_rate)
        monthly_revenue = potential_paid * monthly_price
        annual_revenue = monthly_revenue * 12

        return {
            "total_subscribers": self.subscribers,
            "potential_paid_subscribers": potential_paid,
            "conversion_rate": paid_conversion_rate,
            "monthly_price": monthly_price,
            "estimated_monthly_revenue": monthly_revenue,
            "estimated_annual_revenue": annual_revenue
        }

# Example usage
strategy = ContentStrategyManager()
strategy.log_content("Twitter", "Thread", "xG Explainer", 45000, 1200, 2)
strategy.log_content("Substack", "Article", "Liverpool Analysis", 3500, 150, 8)
strategy.log_content("YouTube", "Video", "Match Review", 12000, 800, 15, 25)

print("Content Performance:")
print(strategy.analyze_performance())
# R: Content performance tracking
library(tidyverse)

# Track content performance
content_tracker <- tribble(
    ~date, ~platform, ~content_type, ~topic, ~views, ~engagement, ~time_spent_hours,
    "2024-01-15", "Twitter", "Thread", "xG Explainer", 45000, 1200, 2,
    "2024-01-16", "Substack", "Article", "Liverpool Analysis", 3500, 150, 8,
    "2024-01-18", "YouTube", "Video", "Match Review", 12000, 800, 15,
    "2024-01-20", "Twitter", "Visualization", "Shot Map", 28000, 650, 1
)

# Calculate ROI metrics
calculate_content_roi <- function(data) {
    data %>%
        mutate(
            views_per_hour = views / time_spent_hours,
            engagement_rate = engagement / views * 100
        ) %>%
        group_by(platform, content_type) %>%
        summarise(
            total_views = sum(views),
            avg_engagement_rate = mean(engagement_rate),
            avg_time_investment = mean(time_spent_hours),
            roi_score = total_views / sum(time_spent_hours)
        ) %>%
        arrange(desc(roi_score))
}

roi_analysis <- calculate_content_roi(content_tracker)
print(roi_analysis)

Case Study: Building a Media Analytics Brand

Let's examine how successful analytics content creators have built their platforms.

Success Patterns from Analytics Creators
Common Success Factors:
  1. Consistency: Regular posting schedule (minimum 2-3x per week)
  2. Visual identity: Recognizable style and color scheme
  3. Niche focus: Deep expertise in specific area (team, league, metric)
  4. Engagement: Responding to comments, asking questions
  5. Cross-platform: Repurposing content across Twitter, Substack, YouTube
  6. Collaboration: Guest posts, podcast appearances, co-analysis
Growth Timeline (Typical):
  • Months 1-3: Build portfolio, find voice, minimal engagement
  • Months 4-6: First viral post, growing followers, early recognition
  • Months 7-12: Consistent audience, networking opportunities
  • Year 2: Monetization options, freelance opportunities, industry connections
  • Year 3+: Established brand, multiple revenue streams, potential full-time

Practice Exercises

Exercise 42.1: Broadcast Graphic

Create a broadcast-quality xG timeline graphic for a recent match. Use a dark theme, clear typography, and team colors. Export at 1920x1080 resolution.

Exercise 42.2: Social Thread

Create a 5-part Twitter thread analyzing a team's season so far. Include: summary graphic, shot map, defensive metrics, trend line, and conclusion.

Exercise 42.3: Written Analysis

Write a 500-word analysis piece about a player using the article generation framework. Include at least 3 data points with proper context.

Exercise 42.4: Interactive Dashboard

Build a Shiny or Dash dashboard for match analysis with the following features:

  • Shot map with filters (team, period, shot type)
  • Pass network visualization with minimum pass threshold slider
  • Player stats comparison selector
  • Exportable summary report

Deploy the dashboard to a free hosting platform (shinyapps.io or Render).

Exercise 42.5: Podcast Episode Planning

Plan a 30-minute analytics podcast episode:

  • Write an outline with timing for each segment
  • Prepare 5 data visualizations to discuss
  • Create a companion Twitter thread with key graphics
  • Draft show notes with links and data sources
Exercise 42.6: Real-Time Match Commentary

Simulate real-time match coverage:

  • Set up a data refresh pipeline (use historical match data with timestamps)
  • Create auto-updating graphics every 5 minutes
  • Write 10 "live" social posts with increasing tension/narrative
  • Generate a post-match summary within 5 minutes of "final whistle"
Exercise 42.7: Newsletter Launch

Create a launch plan for a football analytics newsletter:

  • Design a visual template using Canva or similar tool
  • Write your first 3 newsletter editions (draft format)
  • Create a growth tracking spreadsheet with KPIs
  • Plan your first 8 weeks of content
  • Set up on Substack, Buttondown, or similar platform
Exercise 42.8: YouTube Analytics Video

Script and produce a 10-minute analytics video:

  • Write a complete script with visual cues
  • Create all necessary graphics and animations
  • Record a voiceover or screen recording walkthrough
  • Design a thumbnail following YouTube best practices
  • Write an SEO-optimized title and description

Summary

Essential Tools and Libraries

Category R Libraries Python Libraries Purpose
Static Visualization ggplot2, ggsoccer matplotlib, mplsoccer Creating pitch graphics and charts
Animation gganimate matplotlib.animation, Manim Moving graphics for video content
Interactive Dashboards Shiny, flexdashboard Dash, Streamlit, Panel Web-based interactive analysis
Image Export ragg, Cairo Pillow, cairosvg High-resolution export for broadcast
Video Creation av (FFmpeg bindings) moviepy, ffmpeg-python Programmatic video generation
Report Generation rmarkdown, Quarto Jupyter, nbconvert Automated match reports
Newsletter/Email blastula, mailR yagmail, emails Automated email delivery
Social Media APIs rtweet, httr2 tweepy, instagrapi Automated posting and analytics

Platform Specifications Reference

Platform Image Dimensions Video Specs Best Posting Times (UK)
Twitter/X 1200x675 (16:9) or 1080x1080 (1:1) 1920x1080, 2:20 max, MP4 12-1pm, 5-6pm weekdays; Match days
Instagram Feed 1080x1080 (1:1) or 1080x1350 (4:5) 1080x1920 (Stories/Reels), 60s 11am-1pm, 7-9pm weekdays
LinkedIn 1200x627 or 1080x1080 1920x1080, 10min max 8-10am, 12pm weekdays
YouTube Thumbnail: 1280x720 (16:9) 1920x1080 or 4K, no limit 2-4pm weekdays; 9-11am weekends
TikTok 1080x1920 (9:16) 1080x1920, 10min max 7-9am, 12-3pm, 7-11pm
Broadcast (TV) 1920x1080 (16:9) 1080p/50fps or 4K Pre-match, half-time, post-match

Content Strategy Framework

The PACE Framework for Analytics Content:
  • P - Platform-native: Each platform has its own language and format expectations
  • A - Accessible: Explain concepts simply without sacrificing accuracy
  • C - Consistent: Same style, same schedule, same quality every time
  • E - Engaging: Ask questions, respond to comments, build community
Content Mix Recommendation:
  • 60% Reactive: Match analysis, breaking news, current topics
  • 30% Evergreen: Explainers, tutorials, historical analysis
  • 10% Personal: Behind-the-scenes, methodology discussions, opinions

Monetization Milestones

Follower Count Revenue Opportunities Expected Monthly Revenue
0-1,000 None (focus on content quality) £0
1,000-5,000 Small freelance gigs, guest posts £0-100
5,000-10,000 Substack paid tier, consulting inquiries £100-500
10,000-25,000 Sponsorships, courses, regular freelance £500-2,000
25,000-50,000 Multiple revenue streams, media partnerships £2,000-5,000
50,000+ Full-time potential, brand deals, speaking £5,000+
complete_media_workflow
# Python: Complete media analytics workflow
import matplotlib.pyplot as plt
from mplsoccer import Pitch, VerticalPitch
import pandas as pd
from dataclasses import dataclass
from typing import Dict, List, Optional
from pathlib import Path
import json

@dataclass
class MediaPackage:
    """Complete media package for match analysis."""
    broadcast_graphic: plt.Figure
    social_graphic: plt.Figure
    stats: pd.DataFrame
    tweet_text: str
    thread: List[str]
    article_draft: str

class MatchMediaGenerator:
    """Generate complete media package from match data."""

    def __init__(self, team_a: str, team_b: str,
                 primary_colors: Dict[str, str] = None):
        self.team_a = team_a
        self.team_b = team_b
        self.colors = primary_colors or {
            team_a: "#d32f2f",
            team_b: "#1976d2"
        }

    def generate_broadcast_graphic(self, shots_df: pd.DataFrame) -> plt.Figure:
        """Create 16:9 broadcast-quality shot map."""

        fig, ax = plt.subplots(figsize=(16, 9), facecolor="#1a472a")

        pitch = Pitch(pitch_color="#1a472a", line_color="white",
                     goal_type="box")
        pitch.draw(ax=ax)

        for team in shots_df["team"].unique():
            team_shots = shots_df[shots_df["team"] == team]
            scatter = pitch.scatter(
                team_shots["x"], team_shots["y"],
                s=team_shots["xG"] * 500,
                c=self.colors.get(team, "gray"),
                alpha=0.7,
                edgecolors="white",
                linewidth=1,
                ax=ax,
                label=f"{team} (xG: {team_shots['xG'].sum():.2f})"
            )

        ax.legend(loc="lower center", ncol=2, fontsize=12,
                 facecolor="#1a472a", edgecolor="white",
                 labelcolor="white")

        ax.set_title(f"{self.team_a} vs {self.team_b} - Shot Map",
                    color="white", fontsize=24, fontweight="bold", pad=20)

        fig.text(0.5, 0.02, "Data: StatsBomb | @YourHandle",
                ha="center", color="white", fontsize=10)

        plt.tight_layout()
        return fig

    def generate_social_graphic(self, shots_df: pd.DataFrame,
                               platform: str = "twitter") -> plt.Figure:
        """Create platform-specific social graphic."""

        dimensions = {
            "twitter": (12, 6.75),
            "instagram": (10.8, 10.8),
            "instagram_story": (10.8, 19.2),
            "linkedin": (12, 6.27)
        }

        figsize = dimensions.get(platform, (12, 6.75))
        fig, ax = plt.subplots(figsize=figsize, facecolor="#1a472a")

        pitch = Pitch(pitch_color="#1a472a", line_color="white")
        pitch.draw(ax=ax)

        for team in shots_df["team"].unique():
            team_shots = shots_df[shots_df["team"] == team]
            pitch.scatter(
                team_shots["x"], team_shots["y"],
                s=team_shots["xG"] * 400,
                c=self.colors.get(team, "gray"),
                alpha=0.8,
                ax=ax
            )

        ax.set_title(f"{self.team_a} vs {self.team_b}",
                    color="white", fontsize=18, fontweight="bold")

        plt.tight_layout()
        return fig

    def generate_stats_summary(self, shots_df: pd.DataFrame) -> pd.DataFrame:
        """Generate summary statistics."""

        summary = shots_df.groupby("team").agg({
            "xG": ["count", "sum"],
            "is_goal": "sum"
        }).round(2)

        summary.columns = ["Shots", "xG", "Goals"]
        summary["Conversion"] = (summary["Goals"] / summary["Shots"] * 100).round(1)

        return summary.reset_index()

    def generate_tweet(self, stats: pd.DataFrame) -> str:
        """Generate tweet text."""

        row_a = stats[stats["team"] == self.team_a].iloc[0]
        row_b = stats[stats["team"] == self.team_b].iloc[0]

        return f"""📊 {self.team_a} {int(row_a["Goals"])}-{int(row_b["Goals"])} {self.team_b}

xG: {row_a["xG"]:.2f} - {row_b["xG"]:.2f}
Shots: {int(row_a["Shots"])} - {int(row_b["Shots"])}
Conversion: {row_a["Conversion"]:.1f}% - {row_b["Conversion"]:.1f}%

#FootballAnalytics #xG"""

    def generate_thread(self, shots_df: pd.DataFrame,
                       stats: pd.DataFrame) -> List[str]:
        """Generate Twitter thread."""

        row_a = stats[stats["team"] == self.team_a].iloc[0]
        row_b = stats[stats["team"] == self.team_b].iloc[0]

        thread = [
            f"🧵 THREAD: {self.team_a} vs {self.team_b} - Complete Analysis\n\n"
            f"Final Score: {int(row_a[\"Goals\"])}-{int(row_b[\"Goals\"])}\n"
            f"xG: {row_a[\"xG\"]:.2f} - {row_b[\"xG\"]:.2f}\n\n"
            f"Let's break it down 👇",

            f"📈 SHOT QUALITY\n\n"
            f"{self.team_a}: {int(row_a[\"Shots\"])} shots, avg xG/shot: {row_a[\"xG\"]/row_a[\"Shots\"]:.3f}\n"
            f"{self.team_b}: {int(row_b[\"Shots\"])} shots, avg xG/shot: {row_b[\"xG\"]/row_b[\"Shots\"]:.3f}\n\n"
            f"Higher average = better chance creation",

            f"🎯 FINISHING\n\n"
            f"{self.team_a}: {row_a[\"Conversion\"]:.1f}% conversion\n"
            f"{self.team_b}: {row_b[\"Conversion\"]:.1f}% conversion\n\n"
            f"League average is ~10-12%",

            f"📊 KEY INSIGHT\n\n"
            + self._generate_insight(stats),

            f"That's it for this analysis!\n\n"
            f"🔔 Follow for more analytics content\n"
            f"💬 What did you think of the match?\n\n"
            f"#FootballAnalytics"
        ]

        return thread

    def _generate_insight(self, stats: pd.DataFrame) -> str:
        """Generate key insight based on data."""

        row_a = stats[stats["team"] == self.team_a].iloc[0]
        row_b = stats[stats["team"] == self.team_b].iloc[0]

        xg_diff = row_a["xG"] - row_b["xG"]
        goal_diff = row_a["Goals"] - row_b["Goals"]

        if abs(xg_diff) < 0.3:
            return "Very even match by xG - result could have gone either way."
        elif xg_diff > 0 and goal_diff < 0:
            return f"{self.team_a} dominated xG but lost - clinical finishing won the day for {self.team_b}."
        elif xg_diff < 0 and goal_diff > 0:
            return f"{self.team_a} won despite lower xG - efficient finishing made the difference."
        elif xg_diff > 0.5:
            return f"{self.team_a} dominated the chances - deserved result."
        else:
            return f"{self.team_b} created the better chances."

    def generate_package(self, shots_df: pd.DataFrame) -> MediaPackage:
        """Generate complete media package."""

        stats = self.generate_stats_summary(shots_df)

        return MediaPackage(
            broadcast_graphic=self.generate_broadcast_graphic(shots_df),
            social_graphic=self.generate_social_graphic(shots_df),
            stats=stats,
            tweet_text=self.generate_tweet(stats),
            thread=self.generate_thread(shots_df, stats),
            article_draft=self._generate_article(shots_df, stats)
        )

    def _generate_article(self, shots_df: pd.DataFrame,
                         stats: pd.DataFrame) -> str:
        """Generate article draft."""

        row_a = stats[stats["team"] == self.team_a].iloc[0]
        row_b = stats[stats["team"] == self.team_b].iloc[0]

        return f"""# {self.team_a} {int(row_a["Goals"])}-{int(row_b["Goals"])} {self.team_b}: A Statistical Deep Dive

## Match Overview

{self.team_a} faced {self.team_b} in what proved to be a fascinating tactical battle.
The final scoreline of {int(row_a["Goals"])}-{int(row_b["Goals"])} tells part of the story,
but the underlying numbers reveal much more about how this match unfolded.

## Expected Goals Analysis

{self.team_a} generated {row_a["xG"]:.2f} xG from {int(row_a["Shots"])} shots,
while {self.team_b} accumulated {row_b["xG"]:.2f} xG from {int(row_b["Shots"])} attempts.

{self._generate_insight(stats)}

## Shot Quality Breakdown

Looking at shot quality, {self.team_a} averaged {row_a["xG"]/row_a["Shots"]:.3f} xG per shot
compared to {row_b["xG"]/row_b["Shots"]:.3f} for {self.team_b}.

## Conclusion

[Add tactical observations and forward-looking analysis here]

---
*Data: StatsBomb via FBref | Analysis: @YourHandle*
"""

    def export_all(self, package: MediaPackage, output_dir: str = "./output"):
        """Export all media assets."""

        Path(output_dir).mkdir(exist_ok=True)

        # Save graphics
        package.broadcast_graphic.savefig(
            f"{output_dir}/broadcast_16x9.png", dpi=150, bbox_inches="tight"
        )
        package.social_graphic.savefig(
            f"{output_dir}/social_twitter.png", dpi=300, bbox_inches="tight"
        )

        # Save text content
        with open(f"{output_dir}/tweet.txt", "w") as f:
            f.write(package.tweet_text)

        with open(f"{output_dir}/thread.json", "w") as f:
            json.dump(package.thread, f, indent=2)

        with open(f"{output_dir}/article_draft.md", "w") as f:
            f.write(package.article_draft)

        print(f"All assets exported to {output_dir}/")


# Example usage
generator = MatchMediaGenerator("Liverpool", "Manchester City")
print("Media generator initialized and ready!")
# R: Complete media analytics workflow
library(tidyverse)
library(ggsoccer)
library(scales)

# Define a complete media content generator
create_media_package <- function(match_data, team_a, team_b) {

    # 1. Create broadcast-quality shot map
    broadcast_shot_map <- ggplot(match_data, aes(x = x, y = y)) +
        annotate_pitch(colour = "white", fill = "#1a472a") +
        geom_point(aes(size = xG, color = team),
                  alpha = 0.8) +
        scale_size(range = c(2, 8)) +
        scale_color_manual(values = c("#d32f2f", "#1976d2")) +
        theme_pitch() +
        theme(
            legend.position = "bottom",
            plot.background = element_rect(fill = "#1a472a"),
            text = element_text(color = "white", family = "Arial Bold"),
            plot.title = element_text(size = 24, hjust = 0.5)
        ) +
        labs(
            title = paste(team_a, "vs", team_b, "- Shot Map"),
            caption = "Data: StatsBomb | @YourHandle"
        )

    # 2. Social media version (square crop)
    social_shot_map <- broadcast_shot_map +
        coord_fixed(ratio = 1, xlim = c(0, 120), ylim = c(0, 80))

    # 3. Generate summary stats
    stats_summary <- match_data %>%
        group_by(team) %>%
        summarise(
            Shots = n(),
            xG = round(sum(xG), 2),
            Goals = sum(is_goal),
            Conversion = paste0(round(mean(is_goal) * 100, 1), "%")
        )

    # 4. Create tweet text
    winner <- if (stats_summary$Goals[1] > stats_summary$Goals[2]) team_a
              else if (stats_summary$Goals[1] < stats_summary$Goals[2]) team_b
              else "Draw"

    tweet <- paste0(
        "📊 ", team_a, " ", stats_summary$Goals[1], "-",
        stats_summary$Goals[2], " ", team_b, "\n\n",
        "xG: ", stats_summary$xG[1], " - ", stats_summary$xG[2], "\n",
        "Shots: ", stats_summary$Shots[1], " - ", stats_summary$Shots[2], "\n\n",
        "#FootballAnalytics #xG"
    )

    # Return package
    list(
        broadcast_graphic = broadcast_shot_map,
        social_graphic = social_shot_map,
        stats = stats_summary,
        tweet_text = tweet
    )
}

# Export functions for different platforms
export_for_broadcast <- function(plot, filename) {
    ggsave(filename, plot, width = 16, height = 9, dpi = 150)
}

export_for_social <- function(plot, filename, platform = "twitter") {
    dims <- switch(platform,
        twitter = c(12, 6.75),
        instagram = c(10.8, 10.8),
        linkedin = c(12, 6.27)
    )
    ggsave(filename, plot, width = dims[1], height = dims[2], dpi = 300)
}

print("Media package generator ready!")

Media analytics requires balancing statistical rigor with accessibility. The best analysts make complex data understandable without dumbing it down. Whether you're working for a broadcaster, building your own audience, or creating content for a club, the principles remain the same: clarity, consistency, and compelling narrative.

In the next chapter, we'll explore fantasy football and betting analytics applications—where the intersection of data and entertainment creates unique analytical challenges.