Capstone - Complete Analytics System
Analytics for Broadcasters & Media
Modern football broadcasting has been transformed by analytics. From live xG graphics to in-depth tactical analysis shows, data enhances storytelling and viewer engagement. This chapter explores how media professionals can effectively use analytics in their coverage.
Learning Objectives
- Understand how broadcasters integrate analytics into coverage
- Create compelling real-time match graphics
- Build visualizations optimized for TV and social media
- Write data-driven articles and analysis pieces
- Develop engaging content formats using analytics
- Balance statistical depth with accessibility
Analytics in Broadcast
Live broadcasts increasingly feature analytics graphics to enhance viewer understanding. The challenge is presenting complex data clearly in seconds.
- xG accumulation charts
- Pass maps and networks
- Heat maps
- Sprint and distance counters
- Formation displays
- Shot maps with xG
- Key pass visualizations
- Pressing metrics
- Player radar comparisons
- Tactical breakdowns
- Season trend analysis
- Player comparison tools
- Expected points tables
- Form and fixture analysis
- Transfer market data
# Python: Create broadcast-quality xG timeline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def create_xg_timeline(match_events, home_team, away_team,
home_color="#E31937", away_color="#132257"):
"""Create broadcast-quality xG timeline graphic."""
# Filter shots and calculate cumulative xG
shots = match_events[match_events["type"] == "Shot"].copy()
shots["time"] = shots["minute"] + shots["second"] / 60
# Separate by team
home_shots = shots[shots["team"] == home_team].sort_values("time")
away_shots = shots[shots["team"] == away_team].sort_values("time")
# Calculate cumulative xG
home_shots["cum_xg"] = home_shots["xg"].cumsum()
away_shots["cum_xg"] = away_shots["xg"].cumsum()
# Create figure with dark theme
fig, ax = plt.subplots(figsize=(16, 9), facecolor="#0D1117")
ax.set_facecolor("#0D1117")
# Add start points
home_times = [0] + home_shots["time"].tolist()
home_xg = [0] + home_shots["cum_xg"].tolist()
away_times = [0] + away_shots["time"].tolist()
away_xg = [0] + away_shots["cum_xg"].tolist()
# Step plots
ax.step(home_times, home_xg, where="post", linewidth=3,
color=home_color, label=home_team)
ax.step(away_times, away_xg, where="post", linewidth=3,
color=away_color, label=away_team)
# Add shot markers
ax.scatter(home_shots["time"], home_shots["cum_xg"],
s=home_shots["xg"] * 500, color=home_color,
alpha=0.7, edgecolors="white", linewidths=2)
ax.scatter(away_shots["time"], away_shots["cum_xg"],
s=away_shots["xg"] * 500, color=away_color,
alpha=0.7, edgecolors="white", linewidths=2)
# Add goal markers
home_goals = home_shots[home_shots["outcome"] == "Goal"]
away_goals = away_shots[away_shots["outcome"] == "Goal"]
for _, goal in home_goals.iterrows():
ax.annotate("GOAL", (goal["time"], goal["cum_xg"]),
textcoords="offset points", xytext=(0, 15),
ha="center", fontsize=10, color="white",
fontweight="bold")
# Styling
ax.set_xlim(0, 95)
ax.set_xticks([0, 15, 30, 45, 60, 75, 90])
ax.set_xlabel("Minutes", color="white", fontsize=14)
ax.set_ylabel("Expected Goals (xG)", color="white", fontsize=14)
ax.tick_params(colors="white")
ax.spines["bottom"].set_color("#30363D")
ax.spines["left"].set_color("#30363D")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.grid(True, alpha=0.3, color="#30363D")
# Final xG display
final_home = home_xg[-1] if home_xg else 0
final_away = away_xg[-1] if away_xg else 0
ax.text(0.02, 0.98, f"{home_team}: {final_home:.2f} xG",
transform=ax.transAxes, fontsize=16, color=home_color,
fontweight="bold", va="top")
ax.text(0.02, 0.92, f"{away_team}: {final_away:.2f} xG",
transform=ax.transAxes, fontsize=16, color=away_color,
fontweight="bold", va="top")
ax.legend(loc="lower right", facecolor="#0D1117",
edgecolor="#30363D", labelcolor="white")
plt.tight_layout()
return fig
# Create sample visualization
# fig = create_xg_timeline(events, "Liverpool", "Man City")# R: Create broadcast-quality xG timeline
library(tidyverse)
library(ggplot2)
# Match data
create_xg_timeline <- function(match_events) {
# Calculate cumulative xG
xg_timeline <- match_events %>%
filter(type == "Shot") %>%
arrange(minute, second) %>%
group_by(team) %>%
mutate(
cumulative_xg = cumsum(xg),
time = minute + second/60
) %>%
ungroup()
# Add start and end points
home_team <- unique(match_events$home_team)
away_team <- unique(match_events$away_team)
start_points <- tibble(
team = c(home_team, away_team),
time = c(0, 0),
cumulative_xg = c(0, 0)
)
end_time <- max(xg_timeline$time, na.rm = TRUE)
end_points <- xg_timeline %>%
group_by(team) %>%
slice_max(time) %>%
mutate(time = end_time)
xg_timeline <- bind_rows(start_points, xg_timeline, end_points)
# Broadcast-quality visualization
ggplot(xg_timeline, aes(x = time, y = cumulative_xg, color = team)) +
geom_step(linewidth = 2) +
geom_point(data = xg_timeline %>% filter(type == "Shot"),
aes(size = xg), alpha = 0.7) +
scale_color_manual(values = c("#E31937", "#132257")) +
scale_x_continuous(breaks = c(0, 15, 30, 45, 60, 75, 90),
limits = c(0, 95)) +
labs(
title = NULL, # Broadcasters add their own
x = "Minutes",
y = "Expected Goals (xG)",
color = NULL,
size = "Shot xG"
) +
theme_minimal(base_size = 18) +
theme(
panel.grid.minor = element_blank(),
legend.position = "bottom",
plot.background = element_rect(fill = "#0D1117", color = NA),
panel.background = element_rect(fill = "#0D1117", color = NA),
text = element_text(color = "white"),
axis.text = element_text(color = "white"),
panel.grid.major = element_line(color = "#30363D")
)
}Real-Time Graphics Pipeline
# Python: Real-time graphics pipeline
import asyncio
from datetime import datetime
from typing import Dict, Any
import matplotlib.pyplot as plt
from io import BytesIO
import base64
class BroadcastGraphicsEngine:
"""
Real-time graphics generation for broadcast.
"""
def __init__(self, match_id: str, output_dir: str = "./broadcast"):
self.match_id = match_id
self.output_dir = output_dir
self.cache = {}
self.last_update = None
async def connect_to_feed(self, feed_url: str):
"""Connect to live data feed."""
# In production, connect to StatsBomb/Opta live feed
pass
def generate_xg_graphic(self, events: list) -> bytes:
"""Generate xG timeline for broadcast."""
fig = create_xg_timeline(events, "Home", "Away")
# Export to bytes
buf = BytesIO()
fig.savefig(buf, format="png", dpi=100,
facecolor="#0D1117", bbox_inches="tight")
plt.close(fig)
buf.seek(0)
return buf.getvalue()
def generate_shot_map(self, events: list, team: str) -> bytes:
"""Generate shot map with xG coloring."""
from mplsoccer import VerticalPitch
pitch = VerticalPitch(half=True, pitch_color="#0D1117",
line_color="white")
fig, ax = pitch.draw(figsize=(8, 8))
shots = [e for e in events if e["type"] == "Shot"
and e["team"] == team]
for shot in shots:
color = "#E31937" if shot["outcome"] == "Goal" else "#666666"
size = shot["xg"] * 1000
ax.scatter(shot["x"], shot["y"], s=size, c=color,
alpha=0.7, edgecolors="white", linewidths=2)
# Export
buf = BytesIO()
fig.savefig(buf, format="png", dpi=100,
facecolor="#0D1117", bbox_inches="tight")
plt.close(fig)
buf.seek(0)
return buf.getvalue()
def generate_pass_network(self, events: list, team: str) -> bytes:
"""Generate pass network visualization."""
from mplsoccer import Pitch
import networkx as nx
# Build network from passes
passes = [e for e in events if e["type"] == "Pass"
and e["team"] == team and e["outcome"] == "Complete"]
# Calculate average positions and pass counts
# ... (network building logic)
pitch = Pitch(pitch_color="#0D1117", line_color="white")
fig, ax = pitch.draw(figsize=(12, 8))
# Draw network
# ... (drawing logic)
buf = BytesIO()
fig.savefig(buf, format="png", dpi=100,
facecolor="#0D1117", bbox_inches="tight")
plt.close(fig)
buf.seek(0)
return buf.getvalue()
async def run_live_updates(self, interval_seconds: int = 30):
"""Continuously update graphics during match."""
while True:
try:
# Fetch latest events
events = await self.fetch_events()
# Generate all graphics
xg_graphic = self.generate_xg_graphic(events)
shot_map = self.generate_shot_map(events, "Home")
# Save to output directory
timestamp = datetime.now().strftime("%H%M%S")
with open(f"{self.output_dir}/xg_{timestamp}.png", "wb") as f:
f.write(xg_graphic)
self.last_update = datetime.now()
except Exception as e:
print(f"Error updating graphics: {e}")
await asyncio.sleep(interval_seconds)# R: Real-time graphics pipeline concept
library(R6)
BroadcastGraphics <- R6Class("BroadcastGraphics",
public = list(
match_id = NULL,
data_feed = NULL,
initialize = function(match_id, feed_url) {
self$match_id <- match_id
# Connect to live data feed
},
generate_xg_graphic = function() {
# Fetch latest data
# Generate visualization
# Export in broadcast format (PNG/SVG)
},
generate_formation_graphic = function() {
# Current formations with player positions
},
generate_momentum_graphic = function(window_minutes = 10) {
# Calculate rolling metrics for momentum display
},
export_for_broadcast = function(graphic, format = "png",
resolution = c(1920, 1080)) {
# Export in broadcast-ready format
ggsave(
filename = paste0("broadcast_", Sys.time(), ".", format),
plot = graphic,
width = resolution[1]/100,
height = resolution[2]/100,
dpi = 100
)
}
)
)Data-Driven Writing
Effective analytics journalism combines statistical rigor with compelling narrative. The key is making data accessible without oversimplifying.
- Lead with the story, not the stat: "Liverpool's attack is broken" not "Liverpool's xG is down 0.4 per game"
- Explain metrics in context: Always provide comparison points and benchmarks
- Use visuals to support, not replace, narrative: Every chart needs explanation
- Acknowledge uncertainty: Sample sizes, luck, and limitations matter
- Make it actionable: What does this mean for the team/player?
# Python: Generate article data points and templates
import pandas as pd
import numpy as np
from scipy import stats
class ArticleGenerator:
"""Generate data points and text for analytics articles."""
def __init__(self, player_data, league_data):
self.player = player_data
self.league = league_data
self.stats = self._calculate_stats()
def _calculate_stats(self):
"""Calculate all relevant statistics for the article."""
position_peers = self.league[
(self.league["position"] == self.player["position"]) &
(self.league["minutes"] >= 900)
]
stats = {}
# Percentile rankings
stats["xg_percentile"] = stats.percentileofscore(
position_peers["xg_per90"], self.player["xg_per90"]
)
stats["xa_percentile"] = stats.percentileofscore(
position_peers["xa_per90"], self.player["xa_per90"]
)
# Sample size context
stats["sample_size"] = self.player["minutes"] / 90
stats["sample_reliability"] = "robust" if stats["sample_size"] > 15 \
else "emerging" if stats["sample_size"] > 8 \
else "limited"
# Trend analysis
stats["xg_trend"] = self.player["recent_xg_per90"] - self.player["season_xg_per90"]
stats["trend_direction"] = "improving" if stats["xg_trend"] > 0.05 \
else "declining" if stats["xg_trend"] < -0.05 \
else "stable"
# Over/underperformance
stats["goals_vs_xg"] = self.player["goals"] - self.player["xg"]
stats["performance_label"] = "overperforming" if stats["goals_vs_xg"] > 2 \
else "underperforming" if stats["goals_vs_xg"] < -2 \
else "performing as expected"
return stats
def generate_lede(self):
"""Generate article opening paragraph."""
templates = {
"overperforming": f"{self.player['name']} is riding a hot streak. "
f"With {self.player['goals']} goals from just "
f"{self.player['xg']:.1f} xG, the {self.player['position']} "
f"is currently {self.stats['performance_label']} expectations.",
"underperforming": f"The numbers suggest {self.player['name']} is due for a "
f"breakthrough. Despite registering {self.player['xg']:.1f} xG, "
f"the {self.player['position']} has only {self.player['goals']} "
f"goals to show for it.",
"performing as expected": f"{self.player['name']}'s output this season tells a "
f"story of consistency. The {self.player['position']}'s "
f"{self.player['goals']} goals align almost perfectly "
f"with their {self.player['xg']:.1f} xG."
}
return templates[self.stats["performance_label"]]
def generate_context_paragraph(self):
"""Generate contextual comparison paragraph."""
percentile = round(self.stats["xg_percentile"])
if percentile >= 90:
tier = "elite"
comparison = "among the league's best"
elif percentile >= 75:
tier = "above average"
comparison = "better than most peers"
elif percentile >= 50:
tier = "average"
comparison = "roughly average for their position"
else:
tier = "below average"
comparison = "below the typical output"
return (f"In context, {self.player['name']}'s {self.player['xg_per90']:.2f} xG "
f"per 90 ranks in the {percentile}th percentile among "
f"{self.player['position']}s with 900+ minutes. That places them "
f"{comparison} - {tier} production for their role.")
def generate_sample_size_caveat(self):
"""Generate appropriate sample size caveat."""
minutes = self.player["minutes"]
if minutes < 450:
return (f"With only {minutes} minutes played, these numbers should be "
f"treated with significant caution. Small samples can be misleading.")
elif minutes < 900:
return (f"At {minutes} minutes, we're seeing an emerging picture, "
f"though more playing time would increase confidence in these trends.")
else:
return "" # No caveat needed
# Usage
# generator = ArticleGenerator(player_data, league_data)
# print(generator.generate_lede())
# print(generator.generate_context_paragraph())# R: Generate article data points
library(tidyverse)
generate_article_stats <- function(player_data, league_data) {
# Calculate key talking points
stats <- list()
# Percentile rankings
stats$xg_percentile <- ecdf(league_data$xg_per90)(player_data$xg_per90) * 100
stats$xa_percentile <- ecdf(league_data$xa_per90)(player_data$xa_per90) * 100
# Trend analysis
stats$xg_trend <- player_data$recent_xg - player_data$season_xg
stats$form_description <- case_when(
stats$xg_trend > 0.2 ~ "dramatically improved",
stats$xg_trend > 0.1 ~ "trending upward",
stats$xg_trend < -0.2 ~ "significantly declined",
stats$xg_trend < -0.1 ~ "trending downward",
TRUE ~ "maintained consistent"
)
# Comparison to peers
similar_players <- league_data %>%
filter(position == player_data$position,
minutes >= 900) %>%
mutate(similarity = abs(xg_per90 - player_data$xg_per90) +
abs(xa_per90 - player_data$xa_per90)) %>%
arrange(similarity) %>%
head(5)
stats$comparisons <- similar_players$player
# Generate sentence templates
stats$sentences <- list(
performance = glue::glue(
"{player_data$name} ranks in the {round(stats$xg_percentile)}th percentile ",
"for xG per 90 among {player_data$position}s in the Premier League."
),
trend = glue::glue(
"Their attacking output has {stats$form_description} over the past 10 matches, ",
"with xG per 90 {ifelse(stats$xg_trend >= 0, 'up', 'down')} ",
"{abs(round(stats$xg_trend, 2))} from their season average."
),
comparison = glue::glue(
"Statistically similar players include ",
"{paste(head(stats$comparisons, 3), collapse = ', ')}."
)
)
stats
}Interactive Web Visualizations
Modern media outlets increasingly feature interactive graphics that let readers explore data themselves. These require web development skills alongside data analysis.
# Python: Create interactive visualizations
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, callback, Input, Output
import pandas as pd
class InteractiveVisualizations:
"""Create interactive web-based football visualizations."""
@staticmethod
def create_interactive_shot_map(shots_df):
"""Create interactive shot map with Plotly."""
# Add hover text
shots_df["hover_text"] = shots_df.apply(
lambda r: f"Player: {r['player']}<br>"
f"xG: {r['xg']:.2f}<br>"
f"Minute: {r['minute']}",
axis=1
)
fig = go.Figure()
# Draw pitch (simplified)
fig.add_shape(type="rect", x0=60, x1=120, y0=0, y1=80,
fillcolor="#1a472a", line_color="white")
fig.add_shape(type="rect", x0=102, x1=120, y0=18, y1=62,
line_color="white", fillcolor="rgba(0,0,0,0)")
fig.add_shape(type="rect", x0=114, x1=120, y0=30, y1=50,
line_color="white", fillcolor="rgba(0,0,0,0)")
# Add shots
goals = shots_df[shots_df["outcome"] == "Goal"]
non_goals = shots_df[shots_df["outcome"] != "Goal"]
fig.add_trace(go.Scatter(
x=non_goals["x"], y=non_goals["y"],
mode="markers",
marker=dict(
size=non_goals["xg"] * 50 + 5,
color="white",
opacity=0.6,
line=dict(color="white", width=1)
),
text=non_goals["hover_text"],
hoverinfo="text",
name="No Goal"
))
fig.add_trace(go.Scatter(
x=goals["x"], y=goals["y"],
mode="markers",
marker=dict(
size=goals["xg"] * 50 + 5,
color="#FFD700",
opacity=0.9,
line=dict(color="white", width=2)
),
text=goals["hover_text"],
hoverinfo="text",
name="Goal"
))
fig.update_layout(
plot_bgcolor="#0D1117",
paper_bgcolor="#0D1117",
font_color="white",
showlegend=True,
xaxis=dict(visible=False, range=[55, 125]),
yaxis=dict(visible=False, range=[-5, 85]),
height=600
)
return fig
@staticmethod
def create_xg_timeline_interactive(events_df, home_team, away_team):
"""Create interactive xG timeline."""
shots = events_df[events_df["type"] == "Shot"].copy()
shots["time"] = shots["minute"] + shots["second"] / 60
fig = go.Figure()
for team, color in [(home_team, "#E31937"), (away_team, "#132257")]:
team_shots = shots[shots["team"] == team].sort_values("time")
team_shots["cum_xg"] = team_shots["xg"].cumsum()
# Add start point
times = [0] + team_shots["time"].tolist()
cum_xg = [0] + team_shots["cum_xg"].tolist()
fig.add_trace(go.Scatter(
x=times, y=cum_xg,
mode="lines+markers",
name=team,
line=dict(color=color, width=3, shape="hv"),
marker=dict(size=8)
))
# Add goal annotations
goals = team_shots[team_shots["outcome"] == "Goal"]
for _, goal in goals.iterrows():
fig.add_annotation(
x=goal["time"], y=goal["cum_xg"],
text="GOAL",
showarrow=True,
arrowhead=2,
arrowcolor=color,
font=dict(color="white", size=10)
)
fig.update_layout(
title="Expected Goals Timeline",
xaxis_title="Minutes",
yaxis_title="Cumulative xG",
plot_bgcolor="#0D1117",
paper_bgcolor="#0D1117",
font_color="white",
xaxis=dict(range=[0, 95], tickvals=[0, 15, 30, 45, 60, 75, 90]),
height=500
)
return fig
# Dash app for interactive dashboard
def create_dash_app():
"""Create full interactive dashboard with Dash."""
app = Dash(__name__)
app.layout = html.Div([
html.H1("Football Analytics Dashboard",
style={"color": "white", "textAlign": "center"}),
html.Div([
html.Label("Select Team:", style={"color": "white"}),
dcc.Dropdown(
id="team-dropdown",
options=[
{"label": "Arsenal", "value": "Arsenal"},
{"label": "Chelsea", "value": "Chelsea"},
{"label": "Liverpool", "value": "Liverpool"}
],
value="Arsenal",
style={"width": "200px"}
),
html.Label("Select Metric:", style={"color": "white", "marginLeft": "20px"}),
dcc.Dropdown(
id="metric-dropdown",
options=[
{"label": "Expected Goals (xG)", "value": "xg"},
{"label": "Expected Assists (xA)", "value": "xa"},
{"label": "Passes", "value": "passes"}
],
value="xg",
style={"width": "200px"}
)
], style={"display": "flex", "marginBottom": "20px"}),
html.Div([
dcc.Graph(id="shot-map", style={"width": "50%"}),
dcc.Graph(id="xg-timeline", style={"width": "50%"})
], style={"display": "flex"}),
html.Div([
dcc.Graph(id="player-stats")
])
], style={"backgroundColor": "#0D1117", "padding": "20px"})
@callback(
Output("shot-map", "figure"),
Input("team-dropdown", "value")
)
def update_shot_map(team):
shots = get_team_shots(team) # Load data
return InteractiveVisualizations.create_interactive_shot_map(shots)
return app
# Run with: app.run_server(debug=True)# R: Create interactive visualizations with Shiny
library(shiny)
library(plotly)
library(tidyverse)
# Interactive shot map app
shot_map_app <- function() {
ui <- fluidPage(
theme = bslib::bs_theme(bootswatch = "darkly"),
titlePanel("Interactive Shot Map"),
sidebarLayout(
sidebarPanel(
selectInput("team", "Select Team",
choices = c("Arsenal", "Chelsea", "Liverpool")),
selectInput("player", "Select Player",
choices = NULL),
sliderInput("xg_min", "Minimum xG",
min = 0, max = 1, value = 0, step = 0.05),
checkboxGroupInput("outcome", "Shot Outcome",
choices = c("Goal", "Saved", "Blocked", "Off Target"),
selected = c("Goal", "Saved", "Blocked", "Off Target"))
),
mainPanel(
plotlyOutput("shot_map", height = "600px"),
tableOutput("shot_stats")
)
)
)
server <- function(input, output, session) {
# Update player choices based on team
observe({
players <- get_players_for_team(input$team)
updateSelectInput(session, "player", choices = players)
})
filtered_shots <- reactive({
shots_data %>%
filter(team == input$team,
player == input$player,
xg >= input$xg_min,
outcome %in% input$outcome)
})
output$shot_map <- renderPlotly({
shots <- filtered_shots()
p <- ggplot(shots, aes(x = x, y = y, size = xg,
color = outcome,
text = paste("xG:", round(xg, 2),
"<br>Min:", minute))) +
# Pitch drawing
annotate("rect", xmin = 60, xmax = 120, ymin = 0, ymax = 80,
fill = "#1a472a") +
geom_point(alpha = 0.7) +
scale_color_manual(values = c("Goal" = "#FFD700",
"Saved" = "#FFFFFF",
"Blocked" = "#888888",
"Off Target" = "#444444")) +
theme_void() +
theme(plot.background = element_rect(fill = "#0D1117"))
ggplotly(p, tooltip = "text")
})
output$shot_stats <- renderTable({
shots <- filtered_shots()
tibble(
`Total Shots` = nrow(shots),
`Total xG` = sum(shots$xg),
Goals = sum(shots$outcome == "Goal"),
`Goals - xG` = sum(shots$outcome == "Goal") - sum(shots$xg)
)
})
}
shinyApp(ui, server)
}Podcast and Video Content
Audio and video content allows for deeper exploration of analytics topics. Successful content combines entertainment with education.
- Hook in 5 seconds: Start with the insight, not the methodology
- Show, don't tell: Use animations and visual examples
- Keep graphics on screen: Viewers need time to absorb
- Use concrete examples: Reference real matches and players
- Include timestamps: Let viewers jump to sections
- Call to action: Ask questions, encourage engagement
- Prepare talking points: Have stats ready but conversation natural
- Explain context: Listeners can't see your graphs
- Use round numbers: "About 0.5 xG" not "0.487 xG"
- Tell stories: Data supports narrative, doesn't replace it
- Invite guests: Different perspectives add value
- Reference show notes: "Check the link for the full analysis"
# Python: Video content planning and automation
import pandas as pd
from dataclasses import dataclass
from typing import List, Dict
@dataclass
class VideoSection:
title: str
duration_seconds: int
content: str
visuals: List[str]
data_points: List[str]
class VideoContentPlanner:
"""Plan and structure analytics video content."""
def __init__(self, topic: str, target_duration_minutes: int = 10):
self.topic = topic
self.target_duration = target_duration_minutes * 60
def create_script_outline(self, data_analysis: Dict) -> List[VideoSection]:
"""Generate video script outline from analysis."""
sections = [
VideoSection(
title="Hook",
duration_seconds=15,
content=f"Did you know that {data_analysis['headline']}?",
visuals=["Dramatic stat reveal"],
data_points=[data_analysis["hook_stat"]]
),
VideoSection(
title="Introduction",
duration_seconds=30,
content="Welcome back to the channel. Today we're diving into...",
visuals=["Channel branding", "Topic title card"],
data_points=[]
),
VideoSection(
title="Context",
duration_seconds=60,
content=f"Before we get into the numbers, let's understand why "
f"this matters: {data_analysis['context']}",
visuals=["Season timeline", "League table position"],
data_points=[data_analysis["context_stats"]]
),
VideoSection(
title="The Data",
duration_seconds=180,
content="Let's look at what the data actually shows us...",
visuals=["Shot map", "xG timeline", "Comparison charts"],
data_points=data_analysis["main_points"]
),
VideoSection(
title="Key Insight",
duration_seconds=120,
content=f"Here's the most interesting finding: "
f"{data_analysis['key_insight']}",
visuals=["Animated highlight", "Statistical breakdown"],
data_points=[data_analysis["insight_stat"]]
),
VideoSection(
title="Implications",
duration_seconds=90,
content="So what does this mean going forward?",
visuals=["Future projections", "Recommendations"],
data_points=data_analysis.get("predictions", [])
),
VideoSection(
title="Conclusion",
duration_seconds=30,
content="That's all for today. If you found this useful...",
visuals=["Subscribe button", "Related video suggestions"],
data_points=[]
)
]
return sections
def calculate_duration(self, sections: List[VideoSection]) -> Dict:
"""Calculate total duration and pacing metrics."""
total = sum(s.duration_seconds for s in sections)
data_heavy = sum(s.duration_seconds for s in sections
if len(s.data_points) > 0)
return {
"total_seconds": total,
"total_minutes": round(total / 60, 1),
"data_coverage_pct": round(data_heavy / total * 100, 1),
"over_target_by": total - self.target_duration
}
def generate_visual_list(self, sections: List[VideoSection]) -> List[str]:
"""Generate list of all visuals needed."""
all_visuals = []
for section in sections:
for visual in section.visuals:
all_visuals.append({
"section": section.title,
"visual": visual,
"duration": section.duration_seconds
})
return all_visuals
class PodcastEpisodePlanner:
"""Plan analytics podcast episodes."""
def __init__(self, episode_title: str, target_minutes: int = 45):
self.title = episode_title
self.target_minutes = target_minutes
def create_episode_structure(self, topics: List[Dict]) -> List[Dict]:
"""Create episode structure with talking points."""
structure = [
{
"segment": "Opening",
"duration": 3,
"content": "Welcome, preview of episode, housekeeping"
}
]
# Distribute time across topics
content_time = self.target_minutes - 8 # Reserve intro/outro
per_topic = content_time // len(topics)
for topic in topics:
structure.append({
"segment": topic["title"],
"duration": per_topic,
"content": topic["description"],
"talking_points": topic["key_stats"],
"questions_to_ask": topic.get("questions", [])
})
structure.append({
"segment": "Closing",
"duration": 5,
"content": "Summary, preview next episode, call to action"
})
return structure
def prepare_stat_cards(self, topics: List[Dict]) -> List[Dict]:
"""Prepare stat cards for quick reference during recording."""
cards = []
for topic in topics:
cards.append({
"topic": topic["title"],
"headline_stat": topic["key_stats"][0],
"supporting_stats": topic["key_stats"][1:3],
"comparison": topic.get("comparison"),
"caveat": topic.get("sample_size_note")
})
return cards
# Usage
planner = VideoContentPlanner("Liverpool's Defensive Transformation", 10)
analysis = {
"headline": "Liverpool have conceded 40% fewer xG since switching to a back 3",
"hook_stat": "0.8 xG against per game vs 1.4 last season",
"context": "After a slow start, Slot made tactical changes",
"context_stats": "First 10 games vs last 10 games comparison",
"main_points": ["xG against dropped", "Press success up", "Line height changed"],
"key_insight": "The back 3 allows Robertson to push higher",
"insight_stat": "Robertson progressive passes up 35%"
}
sections = planner.create_script_outline(analysis)
duration = planner.calculate_duration(sections)
print(f"Video Duration: {duration['total_minutes']} minutes")# R: Generate video script outline
library(tidyverse)
library(glue)
generate_video_script <- function(topic, data_points, duration_minutes = 10) {
# Structure for analytics video
script <- list(
hook = list(
duration = 15,
content = glue("Open with the key insight: {data_points$headline}")
),
intro = list(
duration = 30,
content = "Brief intro, what viewers will learn"
),
context = list(
duration = 60,
content = glue("Why this matters: {data_points$context}"),
visuals = c("Team badge", "Season overview graphic")
),
methodology = list(
duration = 45,
content = "Quick explanation of metrics used",
visuals = c("xG explainer", "Data source logos")
),
analysis = list(
duration = 300,
content = "Main analysis with multiple data points",
sections = data_points$sections,
visuals = c("Shot map", "Timeline", "Comparison charts")
),
implications = list(
duration = 90,
content = "What this means for the team/player",
visuals = c("Future fixtures", "Key matchups")
),
conclusion = list(
duration = 30,
content = "Summary and call to action",
visuals = c("Subscribe reminder", "Next video preview")
)
)
# Calculate total duration
total <- sum(sapply(script, function(x) x$duration))
cat(glue("Total script duration: {total} seconds ({round(total/60, 1)} minutes)\n"))
script
}Monetization and Content Strategy
Sustainable analytics content requires a business model. Understanding monetization options helps you invest in quality content creation.
| Revenue Stream | Audience Required | Typical Earnings | Considerations |
|---|---|---|---|
| Substack/Newsletter | 1,000+ subscribers | £5-15/month per paid sub | Need consistent quality content |
| YouTube AdSense | 1,000 subs + 4,000 watch hours | £1-5 per 1,000 views | Niche content has lower CPM |
| Patreon/Ko-fi | 500+ engaged followers | £3-20/month per patron | Need exclusive content tiers |
| Freelance Writing | Strong portfolio | £100-500 per article | Builds on public reputation |
| Consulting | Expertise + network | £50-200/hour | Time-intensive but high value |
| Courses/Products | 10,000+ audience | Variable (£50-500 per sale) | High upfront creation cost |
# Python: Content strategy and analytics
import pandas as pd
from datetime import datetime, timedelta
from typing import List, Dict
class ContentStrategyManager:
"""Manage and optimize analytics content strategy."""
def __init__(self):
self.content_log = []
self.revenue_log = []
def log_content(self, platform: str, content_type: str,
topic: str, views: int, engagement: int,
time_hours: float, revenue: float = 0):
"""Log content performance."""
self.content_log.append({
"date": datetime.now(),
"platform": platform,
"content_type": content_type,
"topic": topic,
"views": views,
"engagement": engagement,
"time_hours": time_hours,
"revenue": revenue,
"views_per_hour": views / time_hours,
"engagement_rate": engagement / views * 100 if views > 0 else 0,
"revenue_per_hour": revenue / time_hours
})
def analyze_performance(self) -> pd.DataFrame:
"""Analyze content performance by platform and type."""
df = pd.DataFrame(self.content_log)
summary = df.groupby(["platform", "content_type"]).agg({
"views": "sum",
"engagement": "sum",
"time_hours": "sum",
"revenue": "sum",
"views_per_hour": "mean",
"engagement_rate": "mean"
}).round(2)
summary["total_roi"] = summary["views"] / summary["time_hours"]
return summary.sort_values("total_roi", ascending=False)
def recommend_focus(self) -> Dict:
"""Recommend content focus based on performance."""
df = pd.DataFrame(self.content_log)
if len(df) < 5:
return {"recommendation": "Not enough data yet"}
best_roi = df.nlargest(3, "views_per_hour")
best_engagement = df.nlargest(3, "engagement_rate")
best_revenue = df.nlargest(3, "revenue_per_hour")
return {
"highest_reach": {
"platform": best_roi["platform"].mode()[0],
"content_type": best_roi["content_type"].mode()[0],
"avg_views_per_hour": best_roi["views_per_hour"].mean()
},
"highest_engagement": {
"platform": best_engagement["platform"].mode()[0],
"content_type": best_engagement["content_type"].mode()[0],
"avg_engagement_rate": best_engagement["engagement_rate"].mean()
},
"highest_revenue": {
"platform": best_revenue["platform"].mode()[0],
"content_type": best_revenue["content_type"].mode()[0],
"avg_revenue_per_hour": best_revenue["revenue_per_hour"].mean()
},
"recommendation": self._generate_recommendation(df)
}
def _generate_recommendation(self, df: pd.DataFrame) -> str:
"""Generate strategic recommendation."""
best_platform = df.groupby("platform")["views_per_hour"].mean().idxmax()
best_type = df.groupby("content_type")["engagement_rate"].mean().idxmax()
return (f"Focus on {best_type} content on {best_platform} "
f"for optimal reach and engagement balance.")
def content_calendar(self, weeks: int = 4) -> List[Dict]:
"""Generate content calendar based on analysis."""
# Analyze best performing days/topics
df = pd.DataFrame(self.content_log)
calendar = []
start_date = datetime.now()
for week in range(weeks):
week_start = start_date + timedelta(weeks=week)
# Sample schedule
calendar.append({
"week": week + 1,
"monday": {"type": "Thread", "platform": "Twitter"},
"wednesday": {"type": "Article", "platform": "Substack"},
"friday": {"type": "Visualization", "platform": "Twitter"},
"saturday": {"type": "Match Review", "platform": "YouTube"}
})
return calendar
# Newsletter growth tracking
class NewsletterGrowth:
"""Track newsletter subscriber growth and engagement."""
def __init__(self, start_subscribers: int = 0):
self.subscribers = start_subscribers
self.history = []
def add_week(self, new_subs: int, churned: int, open_rate: float):
"""Add weekly metrics."""
self.subscribers = self.subscribers + new_subs - churned
self.history.append({
"date": datetime.now(),
"total_subscribers": self.subscribers,
"new": new_subs,
"churned": churned,
"net_growth": new_subs - churned,
"open_rate": open_rate
})
def project_growth(self, weeks: int = 12) -> pd.DataFrame:
"""Project subscriber growth."""
if len(self.history) < 4:
return None
df = pd.DataFrame(self.history)
avg_net_growth = df["net_growth"].mean()
projections = []
current = self.subscribers
for week in range(1, weeks + 1):
current += avg_net_growth
projections.append({
"week": week,
"projected_subscribers": int(current)
})
return pd.DataFrame(projections)
def monetization_potential(self, paid_conversion_rate: float = 0.05,
monthly_price: float = 5) -> Dict:
"""Calculate monetization potential."""
potential_paid = int(self.subscribers * paid_conversion_rate)
monthly_revenue = potential_paid * monthly_price
annual_revenue = monthly_revenue * 12
return {
"total_subscribers": self.subscribers,
"potential_paid_subscribers": potential_paid,
"conversion_rate": paid_conversion_rate,
"monthly_price": monthly_price,
"estimated_monthly_revenue": monthly_revenue,
"estimated_annual_revenue": annual_revenue
}
# Example usage
strategy = ContentStrategyManager()
strategy.log_content("Twitter", "Thread", "xG Explainer", 45000, 1200, 2)
strategy.log_content("Substack", "Article", "Liverpool Analysis", 3500, 150, 8)
strategy.log_content("YouTube", "Video", "Match Review", 12000, 800, 15, 25)
print("Content Performance:")
print(strategy.analyze_performance())# R: Content performance tracking
library(tidyverse)
# Track content performance
content_tracker <- tribble(
~date, ~platform, ~content_type, ~topic, ~views, ~engagement, ~time_spent_hours,
"2024-01-15", "Twitter", "Thread", "xG Explainer", 45000, 1200, 2,
"2024-01-16", "Substack", "Article", "Liverpool Analysis", 3500, 150, 8,
"2024-01-18", "YouTube", "Video", "Match Review", 12000, 800, 15,
"2024-01-20", "Twitter", "Visualization", "Shot Map", 28000, 650, 1
)
# Calculate ROI metrics
calculate_content_roi <- function(data) {
data %>%
mutate(
views_per_hour = views / time_spent_hours,
engagement_rate = engagement / views * 100
) %>%
group_by(platform, content_type) %>%
summarise(
total_views = sum(views),
avg_engagement_rate = mean(engagement_rate),
avg_time_investment = mean(time_spent_hours),
roi_score = total_views / sum(time_spent_hours)
) %>%
arrange(desc(roi_score))
}
roi_analysis <- calculate_content_roi(content_tracker)
print(roi_analysis)Case Study: Building a Media Analytics Brand
Let's examine how successful analytics content creators have built their platforms.
Common Success Factors:
- Consistency: Regular posting schedule (minimum 2-3x per week)
- Visual identity: Recognizable style and color scheme
- Niche focus: Deep expertise in specific area (team, league, metric)
- Engagement: Responding to comments, asking questions
- Cross-platform: Repurposing content across Twitter, Substack, YouTube
- Collaboration: Guest posts, podcast appearances, co-analysis
Growth Timeline (Typical):
- Months 1-3: Build portfolio, find voice, minimal engagement
- Months 4-6: First viral post, growing followers, early recognition
- Months 7-12: Consistent audience, networking opportunities
- Year 2: Monetization options, freelance opportunities, industry connections
- Year 3+: Established brand, multiple revenue streams, potential full-time
Practice Exercises
Hands-On Practice
Complete these exercises to master media analytics:
Create a broadcast-quality xG timeline graphic for a recent match. Use a dark theme, clear typography, and team colors. Export at 1920x1080 resolution.
Create a 5-part Twitter thread analyzing a team's season so far. Include: summary graphic, shot map, defensive metrics, trend line, and conclusion.
Write a 500-word analysis piece about a player using the article generation framework. Include at least 3 data points with proper context.
Build a Shiny or Dash dashboard for match analysis with the following features:
- Shot map with filters (team, period, shot type)
- Pass network visualization with minimum pass threshold slider
- Player stats comparison selector
- Exportable summary report
Deploy the dashboard to a free hosting platform (shinyapps.io or Render).
Plan a 30-minute analytics podcast episode:
- Write an outline with timing for each segment
- Prepare 5 data visualizations to discuss
- Create a companion Twitter thread with key graphics
- Draft show notes with links and data sources
Simulate real-time match coverage:
- Set up a data refresh pipeline (use historical match data with timestamps)
- Create auto-updating graphics every 5 minutes
- Write 10 "live" social posts with increasing tension/narrative
- Generate a post-match summary within 5 minutes of "final whistle"
Create a launch plan for a football analytics newsletter:
- Design a visual template using Canva or similar tool
- Write your first 3 newsletter editions (draft format)
- Create a growth tracking spreadsheet with KPIs
- Plan your first 8 weeks of content
- Set up on Substack, Buttondown, or similar platform
Script and produce a 10-minute analytics video:
- Write a complete script with visual cues
- Create all necessary graphics and animations
- Record a voiceover or screen recording walkthrough
- Design a thumbnail following YouTube best practices
- Write an SEO-optimized title and description
Summary
Key Takeaways
- Broadcast graphics must convey information clearly in seconds
- Different social platforms require different dimensions and formats
- Effective analytics writing leads with narrative, not numbers
- Always provide context, comparisons, and sample size caveats
- Real-time graphics require efficient pipelines and caching
- Visual design should prioritize clarity over complexity
- Interactive dashboards extend reach beyond static visualizations
- Building a personal brand takes consistent effort over 12-24 months
- Monetization follows audience—focus on value creation first
Common Pitfalls to Avoid
- Overcomplicating graphics: Broadcast viewers have 3-5 seconds to absorb information—complexity kills comprehension
- Ignoring aspect ratios: A beautiful 16:9 graphic looks terrible cropped to 1:1 for Instagram
- Using jargon without explanation: "xG" means nothing to 95% of casual fans—always explain or simplify
- Inconsistent posting: Algorithms punish sporadic posting—consistency beats quality for growth
- Neglecting mobile users: 70%+ of social media consumption is on mobile—test all graphics on phones
- Over-claiming from small samples: "Player X has 0.8 xG/90!" loses credibility when based on 3 matches
- Ignoring narrative timing: Posting match analysis 48 hours later misses the engagement window
- Poor color choices: Red text on green pitch backgrounds is unreadable for color-blind viewers
- Forgetting attribution: Always credit data sources—StatsBomb, FBref, etc.
- Monetizing too early: Asking for money before building trust destroys audience growth
Essential Tools and Libraries
| Category | R Libraries | Python Libraries | Purpose |
|---|---|---|---|
| Static Visualization | ggplot2, ggsoccer | matplotlib, mplsoccer | Creating pitch graphics and charts |
| Animation | gganimate | matplotlib.animation, Manim | Moving graphics for video content |
| Interactive Dashboards | Shiny, flexdashboard | Dash, Streamlit, Panel | Web-based interactive analysis |
| Image Export | ragg, Cairo | Pillow, cairosvg | High-resolution export for broadcast |
| Video Creation | av (FFmpeg bindings) | moviepy, ffmpeg-python | Programmatic video generation |
| Report Generation | rmarkdown, Quarto | Jupyter, nbconvert | Automated match reports |
| Newsletter/Email | blastula, mailR | yagmail, emails | Automated email delivery |
| Social Media APIs | rtweet, httr2 | tweepy, instagrapi | Automated posting and analytics |
Platform Specifications Reference
| Platform | Image Dimensions | Video Specs | Best Posting Times (UK) |
|---|---|---|---|
| Twitter/X | 1200x675 (16:9) or 1080x1080 (1:1) | 1920x1080, 2:20 max, MP4 | 12-1pm, 5-6pm weekdays; Match days |
| Instagram Feed | 1080x1080 (1:1) or 1080x1350 (4:5) | 1080x1920 (Stories/Reels), 60s | 11am-1pm, 7-9pm weekdays |
| 1200x627 or 1080x1080 | 1920x1080, 10min max | 8-10am, 12pm weekdays | |
| YouTube | Thumbnail: 1280x720 (16:9) | 1920x1080 or 4K, no limit | 2-4pm weekdays; 9-11am weekends |
| TikTok | 1080x1920 (9:16) | 1080x1920, 10min max | 7-9am, 12-3pm, 7-11pm |
| Broadcast (TV) | 1920x1080 (16:9) | 1080p/50fps or 4K | Pre-match, half-time, post-match |
Content Strategy Framework
The PACE Framework for Analytics Content:
- P - Platform-native: Each platform has its own language and format expectations
- A - Accessible: Explain concepts simply without sacrificing accuracy
- C - Consistent: Same style, same schedule, same quality every time
- E - Engaging: Ask questions, respond to comments, build community
Content Mix Recommendation:
- 60% Reactive: Match analysis, breaking news, current topics
- 30% Evergreen: Explainers, tutorials, historical analysis
- 10% Personal: Behind-the-scenes, methodology discussions, opinions
Monetization Milestones
| Follower Count | Revenue Opportunities | Expected Monthly Revenue |
|---|---|---|
| 0-1,000 | None (focus on content quality) | £0 |
| 1,000-5,000 | Small freelance gigs, guest posts | £0-100 |
| 5,000-10,000 | Substack paid tier, consulting inquiries | £100-500 |
| 10,000-25,000 | Sponsorships, courses, regular freelance | £500-2,000 |
| 25,000-50,000 | Multiple revenue streams, media partnerships | £2,000-5,000 |
| 50,000+ | Full-time potential, brand deals, speaking | £5,000+ |
# Python: Complete media analytics workflow
import matplotlib.pyplot as plt
from mplsoccer import Pitch, VerticalPitch
import pandas as pd
from dataclasses import dataclass
from typing import Dict, List, Optional
from pathlib import Path
import json
@dataclass
class MediaPackage:
"""Complete media package for match analysis."""
broadcast_graphic: plt.Figure
social_graphic: plt.Figure
stats: pd.DataFrame
tweet_text: str
thread: List[str]
article_draft: str
class MatchMediaGenerator:
"""Generate complete media package from match data."""
def __init__(self, team_a: str, team_b: str,
primary_colors: Dict[str, str] = None):
self.team_a = team_a
self.team_b = team_b
self.colors = primary_colors or {
team_a: "#d32f2f",
team_b: "#1976d2"
}
def generate_broadcast_graphic(self, shots_df: pd.DataFrame) -> plt.Figure:
"""Create 16:9 broadcast-quality shot map."""
fig, ax = plt.subplots(figsize=(16, 9), facecolor="#1a472a")
pitch = Pitch(pitch_color="#1a472a", line_color="white",
goal_type="box")
pitch.draw(ax=ax)
for team in shots_df["team"].unique():
team_shots = shots_df[shots_df["team"] == team]
scatter = pitch.scatter(
team_shots["x"], team_shots["y"],
s=team_shots["xG"] * 500,
c=self.colors.get(team, "gray"),
alpha=0.7,
edgecolors="white",
linewidth=1,
ax=ax,
label=f"{team} (xG: {team_shots['xG'].sum():.2f})"
)
ax.legend(loc="lower center", ncol=2, fontsize=12,
facecolor="#1a472a", edgecolor="white",
labelcolor="white")
ax.set_title(f"{self.team_a} vs {self.team_b} - Shot Map",
color="white", fontsize=24, fontweight="bold", pad=20)
fig.text(0.5, 0.02, "Data: StatsBomb | @YourHandle",
ha="center", color="white", fontsize=10)
plt.tight_layout()
return fig
def generate_social_graphic(self, shots_df: pd.DataFrame,
platform: str = "twitter") -> plt.Figure:
"""Create platform-specific social graphic."""
dimensions = {
"twitter": (12, 6.75),
"instagram": (10.8, 10.8),
"instagram_story": (10.8, 19.2),
"linkedin": (12, 6.27)
}
figsize = dimensions.get(platform, (12, 6.75))
fig, ax = plt.subplots(figsize=figsize, facecolor="#1a472a")
pitch = Pitch(pitch_color="#1a472a", line_color="white")
pitch.draw(ax=ax)
for team in shots_df["team"].unique():
team_shots = shots_df[shots_df["team"] == team]
pitch.scatter(
team_shots["x"], team_shots["y"],
s=team_shots["xG"] * 400,
c=self.colors.get(team, "gray"),
alpha=0.8,
ax=ax
)
ax.set_title(f"{self.team_a} vs {self.team_b}",
color="white", fontsize=18, fontweight="bold")
plt.tight_layout()
return fig
def generate_stats_summary(self, shots_df: pd.DataFrame) -> pd.DataFrame:
"""Generate summary statistics."""
summary = shots_df.groupby("team").agg({
"xG": ["count", "sum"],
"is_goal": "sum"
}).round(2)
summary.columns = ["Shots", "xG", "Goals"]
summary["Conversion"] = (summary["Goals"] / summary["Shots"] * 100).round(1)
return summary.reset_index()
def generate_tweet(self, stats: pd.DataFrame) -> str:
"""Generate tweet text."""
row_a = stats[stats["team"] == self.team_a].iloc[0]
row_b = stats[stats["team"] == self.team_b].iloc[0]
return f"""📊 {self.team_a} {int(row_a["Goals"])}-{int(row_b["Goals"])} {self.team_b}
xG: {row_a["xG"]:.2f} - {row_b["xG"]:.2f}
Shots: {int(row_a["Shots"])} - {int(row_b["Shots"])}
Conversion: {row_a["Conversion"]:.1f}% - {row_b["Conversion"]:.1f}%
#FootballAnalytics #xG"""
def generate_thread(self, shots_df: pd.DataFrame,
stats: pd.DataFrame) -> List[str]:
"""Generate Twitter thread."""
row_a = stats[stats["team"] == self.team_a].iloc[0]
row_b = stats[stats["team"] == self.team_b].iloc[0]
thread = [
f"🧵 THREAD: {self.team_a} vs {self.team_b} - Complete Analysis\n\n"
f"Final Score: {int(row_a[\"Goals\"])}-{int(row_b[\"Goals\"])}\n"
f"xG: {row_a[\"xG\"]:.2f} - {row_b[\"xG\"]:.2f}\n\n"
f"Let's break it down 👇",
f"📈 SHOT QUALITY\n\n"
f"{self.team_a}: {int(row_a[\"Shots\"])} shots, avg xG/shot: {row_a[\"xG\"]/row_a[\"Shots\"]:.3f}\n"
f"{self.team_b}: {int(row_b[\"Shots\"])} shots, avg xG/shot: {row_b[\"xG\"]/row_b[\"Shots\"]:.3f}\n\n"
f"Higher average = better chance creation",
f"🎯 FINISHING\n\n"
f"{self.team_a}: {row_a[\"Conversion\"]:.1f}% conversion\n"
f"{self.team_b}: {row_b[\"Conversion\"]:.1f}% conversion\n\n"
f"League average is ~10-12%",
f"📊 KEY INSIGHT\n\n"
+ self._generate_insight(stats),
f"That's it for this analysis!\n\n"
f"🔔 Follow for more analytics content\n"
f"💬 What did you think of the match?\n\n"
f"#FootballAnalytics"
]
return thread
def _generate_insight(self, stats: pd.DataFrame) -> str:
"""Generate key insight based on data."""
row_a = stats[stats["team"] == self.team_a].iloc[0]
row_b = stats[stats["team"] == self.team_b].iloc[0]
xg_diff = row_a["xG"] - row_b["xG"]
goal_diff = row_a["Goals"] - row_b["Goals"]
if abs(xg_diff) < 0.3:
return "Very even match by xG - result could have gone either way."
elif xg_diff > 0 and goal_diff < 0:
return f"{self.team_a} dominated xG but lost - clinical finishing won the day for {self.team_b}."
elif xg_diff < 0 and goal_diff > 0:
return f"{self.team_a} won despite lower xG - efficient finishing made the difference."
elif xg_diff > 0.5:
return f"{self.team_a} dominated the chances - deserved result."
else:
return f"{self.team_b} created the better chances."
def generate_package(self, shots_df: pd.DataFrame) -> MediaPackage:
"""Generate complete media package."""
stats = self.generate_stats_summary(shots_df)
return MediaPackage(
broadcast_graphic=self.generate_broadcast_graphic(shots_df),
social_graphic=self.generate_social_graphic(shots_df),
stats=stats,
tweet_text=self.generate_tweet(stats),
thread=self.generate_thread(shots_df, stats),
article_draft=self._generate_article(shots_df, stats)
)
def _generate_article(self, shots_df: pd.DataFrame,
stats: pd.DataFrame) -> str:
"""Generate article draft."""
row_a = stats[stats["team"] == self.team_a].iloc[0]
row_b = stats[stats["team"] == self.team_b].iloc[0]
return f"""# {self.team_a} {int(row_a["Goals"])}-{int(row_b["Goals"])} {self.team_b}: A Statistical Deep Dive
## Match Overview
{self.team_a} faced {self.team_b} in what proved to be a fascinating tactical battle.
The final scoreline of {int(row_a["Goals"])}-{int(row_b["Goals"])} tells part of the story,
but the underlying numbers reveal much more about how this match unfolded.
## Expected Goals Analysis
{self.team_a} generated {row_a["xG"]:.2f} xG from {int(row_a["Shots"])} shots,
while {self.team_b} accumulated {row_b["xG"]:.2f} xG from {int(row_b["Shots"])} attempts.
{self._generate_insight(stats)}
## Shot Quality Breakdown
Looking at shot quality, {self.team_a} averaged {row_a["xG"]/row_a["Shots"]:.3f} xG per shot
compared to {row_b["xG"]/row_b["Shots"]:.3f} for {self.team_b}.
## Conclusion
[Add tactical observations and forward-looking analysis here]
---
*Data: StatsBomb via FBref | Analysis: @YourHandle*
"""
def export_all(self, package: MediaPackage, output_dir: str = "./output"):
"""Export all media assets."""
Path(output_dir).mkdir(exist_ok=True)
# Save graphics
package.broadcast_graphic.savefig(
f"{output_dir}/broadcast_16x9.png", dpi=150, bbox_inches="tight"
)
package.social_graphic.savefig(
f"{output_dir}/social_twitter.png", dpi=300, bbox_inches="tight"
)
# Save text content
with open(f"{output_dir}/tweet.txt", "w") as f:
f.write(package.tweet_text)
with open(f"{output_dir}/thread.json", "w") as f:
json.dump(package.thread, f, indent=2)
with open(f"{output_dir}/article_draft.md", "w") as f:
f.write(package.article_draft)
print(f"All assets exported to {output_dir}/")
# Example usage
generator = MatchMediaGenerator("Liverpool", "Manchester City")
print("Media generator initialized and ready!")# R: Complete media analytics workflow
library(tidyverse)
library(ggsoccer)
library(scales)
# Define a complete media content generator
create_media_package <- function(match_data, team_a, team_b) {
# 1. Create broadcast-quality shot map
broadcast_shot_map <- ggplot(match_data, aes(x = x, y = y)) +
annotate_pitch(colour = "white", fill = "#1a472a") +
geom_point(aes(size = xG, color = team),
alpha = 0.8) +
scale_size(range = c(2, 8)) +
scale_color_manual(values = c("#d32f2f", "#1976d2")) +
theme_pitch() +
theme(
legend.position = "bottom",
plot.background = element_rect(fill = "#1a472a"),
text = element_text(color = "white", family = "Arial Bold"),
plot.title = element_text(size = 24, hjust = 0.5)
) +
labs(
title = paste(team_a, "vs", team_b, "- Shot Map"),
caption = "Data: StatsBomb | @YourHandle"
)
# 2. Social media version (square crop)
social_shot_map <- broadcast_shot_map +
coord_fixed(ratio = 1, xlim = c(0, 120), ylim = c(0, 80))
# 3. Generate summary stats
stats_summary <- match_data %>%
group_by(team) %>%
summarise(
Shots = n(),
xG = round(sum(xG), 2),
Goals = sum(is_goal),
Conversion = paste0(round(mean(is_goal) * 100, 1), "%")
)
# 4. Create tweet text
winner <- if (stats_summary$Goals[1] > stats_summary$Goals[2]) team_a
else if (stats_summary$Goals[1] < stats_summary$Goals[2]) team_b
else "Draw"
tweet <- paste0(
"📊 ", team_a, " ", stats_summary$Goals[1], "-",
stats_summary$Goals[2], " ", team_b, "\n\n",
"xG: ", stats_summary$xG[1], " - ", stats_summary$xG[2], "\n",
"Shots: ", stats_summary$Shots[1], " - ", stats_summary$Shots[2], "\n\n",
"#FootballAnalytics #xG"
)
# Return package
list(
broadcast_graphic = broadcast_shot_map,
social_graphic = social_shot_map,
stats = stats_summary,
tweet_text = tweet
)
}
# Export functions for different platforms
export_for_broadcast <- function(plot, filename) {
ggsave(filename, plot, width = 16, height = 9, dpi = 150)
}
export_for_social <- function(plot, filename, platform = "twitter") {
dims <- switch(platform,
twitter = c(12, 6.75),
instagram = c(10.8, 10.8),
linkedin = c(12, 6.27)
)
ggsave(filename, plot, width = dims[1], height = dims[2], dpi = 300)
}
print("Media package generator ready!")Media analytics requires balancing statistical rigor with accessibility. The best analysts make complex data understandable without dumbing it down. Whether you're working for a broadcaster, building your own audience, or creating content for a club, the principles remain the same: clarity, consistency, and compelling narrative.
In the next chapter, we'll explore fantasy football and betting analytics applications—where the intersection of data and entertainment creates unique analytical challenges.
Social Media Content
Analytics content thrives on social media platforms. Creating shareable visualizations requires understanding platform constraints and audience expectations.