※参考<統計ソフトRに入力するコマンド>
統計ソフトRのインストール手順をまとめた記事も作成していますので、よろしければご参考ください。
library(BasketballAnalyzeR)
library(gridExtra)
library(dplyr)
# Rookie列付与をASCII変換
install.packages("stringi")
library(stringi)
all_players <- read.csv("Pbox_2425_All.csv", stringsAsFactors = FALSE, fileEncoding = "UTF-8")
rookies <- read.csv("Pbox_2425_Rookie.csv", stringsAsFactors = FALSE, fileEncoding = "UTF-8")
all_players$Player_ascii <- stri_trans_general(all_players$Player, "Latin-ASCII")
rookies$Player_ascii <- stri_trans_general(rookies$Player, "Latin-ASCII")
# Rookie属性をall_playersに付与
all_players$Rookie <- ifelse(all_players$Player_ascii %in% rookies$Player_ascii, "Y", "")
all_players <- all_players[, c(setdiff(names(all_players), c("Rookie", "Player_ascii")), "Rookie")]
install.packages("readr")
library(readr)
write_excel_csv(all_players, "Pbox_2425_All_with_Rookie.csv")
# ファイルの読み込み
rookie <- read.csv("Pbox_2425_Rookie.csv", stringsAsFactors = FALSE)
draft <- read.csv("2024Draft.csv", stringsAsFactors = FALSE)
# Player列をキーにマージ(ルーキー全件をベースに、ドラフト順位を追加)
rookie_merged <- merge(rookie, draft[, c("Player", "Overall.Pick")], by = "Player", all.x = TRUE)
# 列順を調整:ドラフト順位(Overall.Pick)を一番最後の列に移動
rookie_merged <- rookie_merged[, c(setdiff(names(rookie_merged), "Overall.Pick"), "Overall.Pick")]
# 必要なデータの読み込み
Pbox2425 <- read.csv("Pbox_2425_All_with_Rookie.csv", stringsAsFactors = FALSE)
# ルーキー判定は「Rookie」列 == "Y"、かつ1200分以上出場
rookies <- subset(Pbox2425, Rookie == "Y" & MIN >= 1200)
# 2P/3P/FT試投数(合計)を1試合平均に正規化
rookies$Total_Attempts <- (rookies$P2A + rookies$P3A + rookies$FTA) / rookies$GP
# 0~100スケーリング
min_att <- min(rookies$Total_Attempts, na.rm=TRUE)
max_att <- max(rookies$Total_Attempts, na.rm=TRUE)
rookies$Bubble_Size <- 100 * (rookies$Total_Attempts - min_att) / (max_att - min_att)
# 2P・3P成功数も1試合平均に換算
rookies$P2M_pg <- rookies$P2M / rookies$GP
rookies$P3M_pg <- rookies$P3M / rookies$GP
# フリースロー成功率(そのまま色に使う)
# ※必要なら「FTp」が%表記の場合は数値化確認
# ラベル用
labs1 <- c("2-point shots made per Game", "3-point shots made per Game",
"Free throws (% made)", "Total shots attempted per Game")
# MIN500以上の全プレイヤーで平均値を算出
players_500 <- subset(Pbox2425, MIN >= 500)
mx <- mean(players_500$P2M / players_500$GP, na.rm=TRUE)
my <- mean(players_500$P3M / players_500$GP, na.rm=TRUE)
# rookiesのPlayerに対応するOverall.Pickをrookie_mergedから取得
rookies$Overall.Pick <- rookie_merged$Overall.Pick[match(rookies$Player, rookie_merged$Player)]
# ラベル付け
rookies$Label <- ifelse(
is.na(rookies$Overall.Pick),
rookies$Player,
paste0(rookies$Player, ", ", rookies$Overall.Pick)
)
# プロット用データフレーム
X2 <- data.frame(
P = rookies$Label,
P2M = rookies$P2M_pg,
P3M = rookies$P3M_pg,
FTp = rookies$FTp, # フリースロー成功率
AS = rookies$Bubble_Size # バブルサイズ(0-100スケーリング)
)
# プロット(bubbleplotは既存関数)
bubbleplot(
X2,
id = "P",
x = "P2M",
y = "P3M",
col = "FTp",
size = "AS",
labels = labs1,
title = "Bubble plot of NBA rookies: Shooting stats and shots attempted (2024-25 Regular season)",
text.size = 4.5,
mx = mx,
my = my
)
# スティール(STL)の1試合平均
rookies$STL_pg <- rookies$STL / rookies$GP
min_stl <- min(rookies$STL_pg, na.rm = TRUE)
max_stl <- max(rookies$STL_pg, na.rm = TRUE)
# 0~100スケーリング
rookies$Bubble_Size <- 100 * (rookies$STL_pg - min_stl) / (max_stl - min_stl)
# ディフェンスリバウンド、ブロック、PMも1試合平均
rookies$DREB_pg <- rookies$DREB / rookies$GP
rookies$BLK_pg <- rookies$BLK / rookies$GP
rookies$PM_pg <- rookies$PM / rookies$GP
# ラベル
labs2 <- c("Defensive Rebounds per Game", "Blocks per Game",
"Plus-Minus per Game", "Steals per Game")
# リーグ平均値(MIN500以上の全プレイヤーで計算)
players_500 <- subset(Pbox2425, MIN >= 500)
mx <- mean(players_500$DREB / players_500$GP, na.rm=TRUE)
my <- mean(players_500$BLK / players_500$GP, na.rm=TRUE)
# プロット用データ
Y2 <- data.frame(
P = rookies$Label,
DREB = rookies$DREB_pg,
BLK = rookies$BLK_pg,
PM = rookies$PM_pg,
STL = rookies$Bubble_Size
)
# バブルチャート(bubbleplotは既存関数)
bubbleplot(
Y2,
id = "P",
x = "DREB",
y = "BLK",
col = "PM",
size = "STL",
labels = labs2,
title = "Bubble plot of NBA rookies: Defensive stats and Plus-Minus (2024-25 Regular season)",
text.size = 4.5,
mx = mx,
my = my
)
# データの読み込み
Pbox2425 <- read.csv("Pbox_2425_All_with_Rookie.csv", stringsAsFactors = FALSE)
# 1200分以上プレイヤーだけ抽出
idx_1200 <- which(Pbox2425$MIN >= 1200)
# 7変数(1分あたり)で正規化
data2425_1200 <- data.frame(
P2M = Pbox2425$P2M[idx_1200] / Pbox2425$MIN[idx_1200],
P3M = Pbox2425$P3M[idx_1200] / Pbox2425$MIN[idx_1200],
FTM = Pbox2425$FTM[idx_1200] / Pbox2425$MIN[idx_1200],
REB = (Pbox2425$OREB[idx_1200] + Pbox2425$DREB[idx_1200]) / Pbox2425$MIN[idx_1200],
AST = Pbox2425$AST[idx_1200] / Pbox2425$MIN[idx_1200],
STL = Pbox2425$STL[idx_1200] / Pbox2425$MIN[idx_1200],
BLK = Pbox2425$BLK[idx_1200] / Pbox2425$MIN[idx_1200]
)
id <- Pbox2425$Player[idx_1200]
# MDSマップ生成
mds <- MDSmap(data2425_1200)
# 1200分以上のルーキーだけ抽出してインデックス取得
rookie_1200 <- which(Pbox2425$MIN >= 1200 & Pbox2425$Rookie == "Y")
rookie_highlight <- which(id %in% Pbox2425$Player[rookie_1200])
# プロット
plot(
mds,
labels = id,
subset = rookie_highlight,
col.subset = "tomato"
)
xy <- as.data.frame(mds$points)
colnames(xy) <- c("Dim.1", "Dim.2")
xy$Player <- id
xy$Highlight <- FALSE
xy$Highlight[rookie_highlight] <- TRUE
# ズーム範囲でフィルタ
x_range <- c(-1.5, 2)
y_range <- c(-2.1, 2)
xy_zoom <- subset(xy, Dim.1 >= x_range[1] & Dim.1 <= x_range[2] & Dim.2 >= y_range[1] & Dim.2 <= y_range[2])
# プロット
library(ggplot2)
p <- ggplot(xy_zoom, aes(x = Dim.1, y = Dim.2, label = Player)) +
# 1. プロットの●を非表示 → geom_point()を削除・コメントアウト
# geom_point(aes(color = Highlight)) +
# 2. ルーキーは赤色、3. ルーキー以外は黒色
geom_text(
aes(color = Highlight),
size = 4, show.legend = FALSE # show.legend=FALSE で4.凡例非表示
) +
# ルーキー(Highlight==TRUE)は赤、他は黒に明示
scale_color_manual(values = c("FALSE" = "black", "TRUE" = "red")) +
theme_minimal() +
coord_cartesian(xlim = x_range, ylim = y_range) +
theme(legend.position = "none") # 念のため凡例も消す
print(p)
# ズーム範囲でフィルタ
x_range <- c(-4, -1.5)
y_range <- c(1, 3.2)
xy_zoom <- subset(xy, Dim.1 >= x_range[1] & Dim.1 <= x_range[2] & Dim.2 >= y_range[1] & Dim.2 <= y_range[2])
p <- ggplot(xy_zoom, aes(x = Dim.1, y = Dim.2, label = Player)) +
geom_text(
aes(color = Highlight),
size = 4, show.legend = FALSE
) +
scale_color_manual(values = c("FALSE" = "black", "TRUE" = "red")) +
theme_minimal() +
coord_cartesian(xlim = x_range, ylim = y_range) +
theme(legend.position = "none")
print(p)
plot(mds, z.var=c("P2M", "P3M", "FTM", "REB", "AST", "STL", "BLK"), contour=TRUE, palette=topo.colors)
# PGズーム範囲でフィルタ
x_range <- c(-1, 2.5)
y_range <- c(-3, -0.5)
xy_zoom <- subset(xy, Dim.1 >= x_range[1] & Dim.1 <= x_range[2] & Dim.2 >= y_range[1] & Dim.2 <= y_range[2])
p <- ggplot(xy_zoom, aes(x = Dim.1, y = Dim.2, label = Player)) +
geom_text(
aes(color = Highlight),
size = 4, show.legend = FALSE
) +
scale_color_manual(values = c("FALSE" = "black", "TRUE" = "red")) +
theme_minimal() +
coord_cartesian(xlim = x_range, ylim = y_range) +
theme(legend.position = "none")
print(p)
# otherズーム範囲でフィルタ
x_range <- c(-1.5, 1.5)
y_range <- c(0, 2)
xy_zoom <- subset(xy, Dim.1 >= x_range[1] & Dim.1 <= x_range[2] & Dim.2 >= y_range[1] & Dim.2 <= y_range[2])
p <- ggplot(xy_zoom, aes(x = Dim.1, y = Dim.2, label = Player)) +
geom_text(
aes(color = Highlight),
size = 4, show.legend = FALSE
) +
scale_color_manual(values = c("FALSE" = "black", "TRUE" = "red")) +
theme_minimal() +
coord_cartesian(xlim = x_range, ylim = y_range) +
theme(legend.position = "none")
print(p)