blob: c6e9aaa70a1b731aad5b7828427bb248335ea98b [file] [log] [blame]
module DriverRank
using GoogleSheets: sheets_client, Spreadsheet, CellRange, get, AUTH_SCOPE_READONLY
using CSV
using DataFrames: DataFrame
using Transducers: Cat, MapCat, Map
using DataStructures: OrderedSet
using HypothesisTests: OneSampleZTest, pvalue
using Roots: find_zero
using Statistics: mean
import Optim
using Optim: optimize
using BlackBoxOptim: bboptimize, best_candidate, best_fitness
# using PlotlyJS
using Plots: scatter, hline!, plotlyjs, savefig, plotly
import PlotlyBase: to_html
struct TeamKey
key::String
end
Base.@kwdef struct DriverMatchup{K}
winner::K
loser::K
end
Base.@kwdef struct DriverRankings{K}
team_keys::OrderedSet{K}
matchup_contributions::Matrix{Float64}
expected_win_rate_func
end
function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K
team_keys =
matchups |>
MapCat(matchup -> (matchup.winner, matchup.loser)) |>
OrderedSet{K}
team_key_indecies =
zip(team_keys, 1:length(team_keys)) |>
Dict{TeamKey, Int}
matchup_contributions = zeros(length(matchups), length(team_keys))
for (i, matchup) in enumerate(matchups)
contribution = view(matchup_contributions, i, :)
winner_index = team_key_indecies[matchup.winner]
loser_index = team_key_indecies[matchup.loser]
contribution[winner_index] = 1
contribution[loser_index] = -1
end
# Create a distribution that represents
# how to translate player ranking point differences
# into win rates
point_difference = 100
win_rate_at_point_difference = 0.9
dist_std_dev = find_zero(
x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left),
(0,Inf),
)
expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left)
return DriverRankings{K}(;
team_keys,
matchup_contributions,
expected_win_rate_func,
)
end
num_teams(dr::DriverRankings) = length(dr.team_keys)
function objective_value(
driver_rankings::DriverRankings,
ranking_points::Vector{F},
) where F
ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings)))
return objective_value(driver_rankings, ranking_points_row)
end
function objective_value(
driver_rankings::DriverRankings,
ranking_points::Matrix{F},
) where F
average_ranking_point_value::F =
mean(ranking_points)
k = 100 / length(ranking_points) # magic number
return -(k * log_likelihood(driver_rankings, ranking_points)) +
(average_ranking_point_value^2)
end
function log_likelihood(
driver_rankings::DriverRankings,
ranking_points::Matrix{F},
) where F
matchup_ranking_point_differentials =
driver_rankings.matchup_contributions .* ranking_points |>
x -> sum(x, dims=2)
result::F =
driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |>
Map(log) |>
sum
return result
end
function rank()
# client = sheets_client(AUTH_SCOPE_READONLY)
# # spreadsheet_id = "13Cit7WrUxWz79iYVnoMoPc56W7H_cfr92jyT67tb2Xo"
# spreadsheet_id = "1q-Cl2aW4IkHk8Vcfd7OuFt0g4o3itn4SXgBi8Z1b7UE"
# range_name = "Form Responses 1"
# sheet = Spreadsheet(spreadsheet_id)
# range = CellRange(sheet, range_name)
# result = get(client, range).values
# # Filter empty rows
# is_not_empty = result[:, 1] .!= ""
# result = result[is_not_empty, :]
# df = DataFrame(TeamKey.(result[2:end, :]), result[1, :])
df = DataFrame(CSV.File("./data/2022_madtown.csv"))
# rank1 = "Rank 1 (best)"
# rank2 = "Rank 2"
# rank3 = "Rank 3"
# rank4 = "Rank 4"
# rank5 = "Rank 5"
# rank6 = "Rank 6 (worst)"
# matchups =
# [
# (df[!, rank1], df[!, rank2]),
# (df[!, rank1], df[!, rank3]),
# (df[!, rank1], df[!, rank4]),
# (df[!, rank1], df[!, rank5]),
# (df[!, rank1], df[!, rank6]),
# (df[!, rank2], df[!, rank3]),
# (df[!, rank2], df[!, rank4]),
# (df[!, rank2], df[!, rank5]),
# (df[!, rank2], df[!, rank6]),
# (df[!, rank3], df[!, rank4]),
# (df[!, rank3], df[!, rank5]),
# (df[!, rank3], df[!, rank6]),
# (df[!, rank4], df[!, rank5]),
# (df[!, rank4], df[!, rank6]),
# (df[!, rank5], df[!, rank6]),
# ] |>
# MapCat(((winners, losers),) -> zip(winners, losers)) |>
# Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
# collect
rank1 = "Rank 1 (best)"
rank2 = "Rank 2"
rank3 = "Rank 3 (worst)"
rank_cols = [rank1, rank2, rank3]
df[!, rank_cols] = TeamKey.(df[!, rank_cols])
matchups =
[
(df[!, rank1], df[!, rank2]),
(df[!, rank1], df[!, rank3]),
(df[!, rank2], df[!, rank3]),
] |>
MapCat(((winners, losers),) -> zip(winners, losers)) |>
Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
collect
driver_rankings = DriverRankings(matchups)
# Optimize!
x0 = zeros(num_teams(driver_rankings))
res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward)
ranking_points =
DataFrame(
:team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect,
:score=>Optim.minimizer(res),
) |>
x -> sort!(x, [:score], rev=true)
show(ranking_points, allrows=true)
plotly()
idx = 1:length(ranking_points.team)
plt = scatter(
idx, ranking_points.score,
title="Driver Ranking",
xlabel="Team Number",
xticks=(idx, ranking_points.team),
xrotation=90,
ylabel="Score",
legend=false,
)
hline!(plt, [0.])
savefig(plt, "./driver_ranking.html")
# open("./driver_ranking.html", "w") do io
# PlotlyBase.to_html(io, plt)
# end
return plt
end
export rank
end # module