Import driver ranking script for scouting
This is what we used at Madtown 2022 for scoring driver ability.
Will add test data in a follow up patch.
Change-Id: I5b5763f89ea1f6a4876be5af20553756f303b918
Signed-off-by: Michael Bick <mafbick@gmail.com>
diff --git a/scouting/DriverRank/src/DriverRank.jl b/scouting/DriverRank/src/DriverRank.jl
new file mode 100644
index 0000000..c6e9aaa
--- /dev/null
+++ b/scouting/DriverRank/src/DriverRank.jl
@@ -0,0 +1,207 @@
+module DriverRank
+
+using GoogleSheets: sheets_client, Spreadsheet, CellRange, get, AUTH_SCOPE_READONLY
+using CSV
+using DataFrames: DataFrame
+using Transducers: Cat, MapCat, Map
+using DataStructures: OrderedSet
+using HypothesisTests: OneSampleZTest, pvalue
+using Roots: find_zero
+using Statistics: mean
+import Optim
+using Optim: optimize
+using BlackBoxOptim: bboptimize, best_candidate, best_fitness
+# using PlotlyJS
+using Plots: scatter, hline!, plotlyjs, savefig, plotly
+import PlotlyBase: to_html
+
+struct TeamKey
+ key::String
+end
+
+Base.@kwdef struct DriverMatchup{K}
+ winner::K
+ loser::K
+end
+
+Base.@kwdef struct DriverRankings{K}
+ team_keys::OrderedSet{K}
+ matchup_contributions::Matrix{Float64}
+ expected_win_rate_func
+end
+
+function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K
+ team_keys =
+ matchups |>
+ MapCat(matchup -> (matchup.winner, matchup.loser)) |>
+ OrderedSet{K}
+ team_key_indecies =
+ zip(team_keys, 1:length(team_keys)) |>
+ Dict{TeamKey, Int}
+
+ matchup_contributions = zeros(length(matchups), length(team_keys))
+ for (i, matchup) in enumerate(matchups)
+ contribution = view(matchup_contributions, i, :)
+
+ winner_index = team_key_indecies[matchup.winner]
+ loser_index = team_key_indecies[matchup.loser]
+
+ contribution[winner_index] = 1
+ contribution[loser_index] = -1
+ end
+
+ # Create a distribution that represents
+ # how to translate player ranking point differences
+ # into win rates
+ point_difference = 100
+ win_rate_at_point_difference = 0.9
+ dist_std_dev = find_zero(
+ x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left),
+ (0,Inf),
+ )
+ expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left)
+
+ return DriverRankings{K}(;
+ team_keys,
+ matchup_contributions,
+ expected_win_rate_func,
+ )
+end
+
+num_teams(dr::DriverRankings) = length(dr.team_keys)
+
+function objective_value(
+ driver_rankings::DriverRankings,
+ ranking_points::Vector{F},
+) where F
+ ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings)))
+ return objective_value(driver_rankings, ranking_points_row)
+end
+
+function objective_value(
+ driver_rankings::DriverRankings,
+ ranking_points::Matrix{F},
+) where F
+ average_ranking_point_value::F =
+ mean(ranking_points)
+
+ k = 100 / length(ranking_points) # magic number
+ return -(k * log_likelihood(driver_rankings, ranking_points)) +
+ (average_ranking_point_value^2)
+end
+
+function log_likelihood(
+ driver_rankings::DriverRankings,
+ ranking_points::Matrix{F},
+) where F
+ matchup_ranking_point_differentials =
+ driver_rankings.matchup_contributions .* ranking_points |>
+ x -> sum(x, dims=2)
+
+ result::F =
+ driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |>
+ Map(log) |>
+ sum
+ return result
+end
+
+function rank()
+ # client = sheets_client(AUTH_SCOPE_READONLY)
+ # # spreadsheet_id = "13Cit7WrUxWz79iYVnoMoPc56W7H_cfr92jyT67tb2Xo"
+ # spreadsheet_id = "1q-Cl2aW4IkHk8Vcfd7OuFt0g4o3itn4SXgBi8Z1b7UE"
+ # range_name = "Form Responses 1"
+
+ # sheet = Spreadsheet(spreadsheet_id)
+ # range = CellRange(sheet, range_name)
+ # result = get(client, range).values
+
+ # # Filter empty rows
+ # is_not_empty = result[:, 1] .!= ""
+ # result = result[is_not_empty, :]
+ # df = DataFrame(TeamKey.(result[2:end, :]), result[1, :])
+
+ df = DataFrame(CSV.File("./data/2022_madtown.csv"))
+
+ # rank1 = "Rank 1 (best)"
+ # rank2 = "Rank 2"
+ # rank3 = "Rank 3"
+ # rank4 = "Rank 4"
+ # rank5 = "Rank 5"
+ # rank6 = "Rank 6 (worst)"
+ # matchups =
+ # [
+ # (df[!, rank1], df[!, rank2]),
+ # (df[!, rank1], df[!, rank3]),
+ # (df[!, rank1], df[!, rank4]),
+ # (df[!, rank1], df[!, rank5]),
+ # (df[!, rank1], df[!, rank6]),
+ # (df[!, rank2], df[!, rank3]),
+ # (df[!, rank2], df[!, rank4]),
+ # (df[!, rank2], df[!, rank5]),
+ # (df[!, rank2], df[!, rank6]),
+ # (df[!, rank3], df[!, rank4]),
+ # (df[!, rank3], df[!, rank5]),
+ # (df[!, rank3], df[!, rank6]),
+ # (df[!, rank4], df[!, rank5]),
+ # (df[!, rank4], df[!, rank6]),
+ # (df[!, rank5], df[!, rank6]),
+ # ] |>
+ # MapCat(((winners, losers),) -> zip(winners, losers)) |>
+ # Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
+ # collect
+
+ rank1 = "Rank 1 (best)"
+ rank2 = "Rank 2"
+ rank3 = "Rank 3 (worst)"
+ rank_cols = [rank1, rank2, rank3]
+
+
+ df[!, rank_cols] = TeamKey.(df[!, rank_cols])
+ matchups =
+ [
+ (df[!, rank1], df[!, rank2]),
+ (df[!, rank1], df[!, rank3]),
+ (df[!, rank2], df[!, rank3]),
+ ] |>
+ MapCat(((winners, losers),) -> zip(winners, losers)) |>
+ Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
+ collect
+
+ driver_rankings = DriverRankings(matchups)
+
+ # Optimize!
+ x0 = zeros(num_teams(driver_rankings))
+ res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward)
+
+ ranking_points =
+ DataFrame(
+ :team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect,
+ :score=>Optim.minimizer(res),
+ ) |>
+ x -> sort!(x, [:score], rev=true)
+ show(ranking_points, allrows=true)
+
+ plotly()
+ idx = 1:length(ranking_points.team)
+ plt = scatter(
+ idx, ranking_points.score,
+ title="Driver Ranking",
+ xlabel="Team Number",
+ xticks=(idx, ranking_points.team),
+ xrotation=90,
+ ylabel="Score",
+ legend=false,
+ )
+ hline!(plt, [0.])
+
+ savefig(plt, "./driver_ranking.html")
+ # open("./driver_ranking.html", "w") do io
+ # PlotlyBase.to_html(io, plt)
+ # end
+
+ return plt
+end
+
+export rank
+
+end # module