MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 1 | module DriverRank |
| 2 | |
| 3 | using GoogleSheets: sheets_client, Spreadsheet, CellRange, get, AUTH_SCOPE_READONLY |
| 4 | using CSV |
| 5 | using DataFrames: DataFrame |
| 6 | using Transducers: Cat, MapCat, Map |
| 7 | using DataStructures: OrderedSet |
| 8 | using HypothesisTests: OneSampleZTest, pvalue |
| 9 | using Roots: find_zero |
| 10 | using Statistics: mean |
| 11 | import Optim |
| 12 | using Optim: optimize |
| 13 | using BlackBoxOptim: bboptimize, best_candidate, best_fitness |
| 14 | # using PlotlyJS |
| 15 | using Plots: scatter, hline!, plotlyjs, savefig, plotly |
| 16 | import PlotlyBase: to_html |
| 17 | |
| 18 | struct TeamKey |
| 19 | key::String |
| 20 | end |
| 21 | |
| 22 | Base.@kwdef struct DriverMatchup{K} |
| 23 | winner::K |
| 24 | loser::K |
| 25 | end |
| 26 | |
| 27 | Base.@kwdef struct DriverRankings{K} |
| 28 | team_keys::OrderedSet{K} |
| 29 | matchup_contributions::Matrix{Float64} |
| 30 | expected_win_rate_func |
| 31 | end |
| 32 | |
| 33 | function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K |
| 34 | team_keys = |
| 35 | matchups |> |
| 36 | MapCat(matchup -> (matchup.winner, matchup.loser)) |> |
| 37 | OrderedSet{K} |
| 38 | team_key_indecies = |
| 39 | zip(team_keys, 1:length(team_keys)) |> |
| 40 | Dict{TeamKey, Int} |
| 41 | |
| 42 | matchup_contributions = zeros(length(matchups), length(team_keys)) |
| 43 | for (i, matchup) in enumerate(matchups) |
| 44 | contribution = view(matchup_contributions, i, :) |
| 45 | |
| 46 | winner_index = team_key_indecies[matchup.winner] |
| 47 | loser_index = team_key_indecies[matchup.loser] |
| 48 | |
| 49 | contribution[winner_index] = 1 |
| 50 | contribution[loser_index] = -1 |
| 51 | end |
| 52 | |
| 53 | # Create a distribution that represents |
| 54 | # how to translate player ranking point differences |
| 55 | # into win rates |
| 56 | point_difference = 100 |
| 57 | win_rate_at_point_difference = 0.9 |
| 58 | dist_std_dev = find_zero( |
| 59 | x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left), |
| 60 | (0,Inf), |
| 61 | ) |
| 62 | expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left) |
| 63 | |
| 64 | return DriverRankings{K}(; |
| 65 | team_keys, |
| 66 | matchup_contributions, |
| 67 | expected_win_rate_func, |
| 68 | ) |
| 69 | end |
| 70 | |
| 71 | num_teams(dr::DriverRankings) = length(dr.team_keys) |
| 72 | |
| 73 | function objective_value( |
| 74 | driver_rankings::DriverRankings, |
| 75 | ranking_points::Vector{F}, |
| 76 | ) where F |
| 77 | ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings))) |
| 78 | return objective_value(driver_rankings, ranking_points_row) |
| 79 | end |
| 80 | |
| 81 | function objective_value( |
| 82 | driver_rankings::DriverRankings, |
| 83 | ranking_points::Matrix{F}, |
| 84 | ) where F |
| 85 | average_ranking_point_value::F = |
| 86 | mean(ranking_points) |
| 87 | |
| 88 | k = 100 / length(ranking_points) # magic number |
| 89 | return -(k * log_likelihood(driver_rankings, ranking_points)) + |
| 90 | (average_ranking_point_value^2) |
| 91 | end |
| 92 | |
| 93 | function log_likelihood( |
| 94 | driver_rankings::DriverRankings, |
| 95 | ranking_points::Matrix{F}, |
| 96 | ) where F |
| 97 | matchup_ranking_point_differentials = |
| 98 | driver_rankings.matchup_contributions .* ranking_points |> |
| 99 | x -> sum(x, dims=2) |
| 100 | |
| 101 | result::F = |
| 102 | driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |> |
| 103 | Map(log) |> |
| 104 | sum |
| 105 | return result |
| 106 | end |
| 107 | |
| 108 | function rank() |
| 109 | # client = sheets_client(AUTH_SCOPE_READONLY) |
| 110 | # # spreadsheet_id = "13Cit7WrUxWz79iYVnoMoPc56W7H_cfr92jyT67tb2Xo" |
| 111 | # spreadsheet_id = "1q-Cl2aW4IkHk8Vcfd7OuFt0g4o3itn4SXgBi8Z1b7UE" |
| 112 | # range_name = "Form Responses 1" |
| 113 | |
| 114 | # sheet = Spreadsheet(spreadsheet_id) |
| 115 | # range = CellRange(sheet, range_name) |
| 116 | # result = get(client, range).values |
| 117 | |
| 118 | # # Filter empty rows |
| 119 | # is_not_empty = result[:, 1] .!= "" |
| 120 | # result = result[is_not_empty, :] |
| 121 | # df = DataFrame(TeamKey.(result[2:end, :]), result[1, :]) |
| 122 | |
| 123 | df = DataFrame(CSV.File("./data/2022_madtown.csv")) |
| 124 | |
| 125 | # rank1 = "Rank 1 (best)" |
| 126 | # rank2 = "Rank 2" |
| 127 | # rank3 = "Rank 3" |
| 128 | # rank4 = "Rank 4" |
| 129 | # rank5 = "Rank 5" |
| 130 | # rank6 = "Rank 6 (worst)" |
| 131 | # matchups = |
| 132 | # [ |
| 133 | # (df[!, rank1], df[!, rank2]), |
| 134 | # (df[!, rank1], df[!, rank3]), |
| 135 | # (df[!, rank1], df[!, rank4]), |
| 136 | # (df[!, rank1], df[!, rank5]), |
| 137 | # (df[!, rank1], df[!, rank6]), |
| 138 | # (df[!, rank2], df[!, rank3]), |
| 139 | # (df[!, rank2], df[!, rank4]), |
| 140 | # (df[!, rank2], df[!, rank5]), |
| 141 | # (df[!, rank2], df[!, rank6]), |
| 142 | # (df[!, rank3], df[!, rank4]), |
| 143 | # (df[!, rank3], df[!, rank5]), |
| 144 | # (df[!, rank3], df[!, rank6]), |
| 145 | # (df[!, rank4], df[!, rank5]), |
| 146 | # (df[!, rank4], df[!, rank6]), |
| 147 | # (df[!, rank5], df[!, rank6]), |
| 148 | # ] |> |
| 149 | # MapCat(((winners, losers),) -> zip(winners, losers)) |> |
| 150 | # Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |> |
| 151 | # collect |
| 152 | |
| 153 | rank1 = "Rank 1 (best)" |
| 154 | rank2 = "Rank 2" |
| 155 | rank3 = "Rank 3 (worst)" |
| 156 | rank_cols = [rank1, rank2, rank3] |
| 157 | |
| 158 | |
| 159 | df[!, rank_cols] = TeamKey.(df[!, rank_cols]) |
| 160 | matchups = |
| 161 | [ |
| 162 | (df[!, rank1], df[!, rank2]), |
| 163 | (df[!, rank1], df[!, rank3]), |
| 164 | (df[!, rank2], df[!, rank3]), |
| 165 | ] |> |
| 166 | MapCat(((winners, losers),) -> zip(winners, losers)) |> |
| 167 | Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |> |
| 168 | collect |
| 169 | |
| 170 | driver_rankings = DriverRankings(matchups) |
| 171 | |
| 172 | # Optimize! |
| 173 | x0 = zeros(num_teams(driver_rankings)) |
| 174 | res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward) |
| 175 | |
| 176 | ranking_points = |
| 177 | DataFrame( |
| 178 | :team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect, |
| 179 | :score=>Optim.minimizer(res), |
| 180 | ) |> |
| 181 | x -> sort!(x, [:score], rev=true) |
| 182 | show(ranking_points, allrows=true) |
| 183 | |
| 184 | plotly() |
| 185 | idx = 1:length(ranking_points.team) |
| 186 | plt = scatter( |
| 187 | idx, ranking_points.score, |
| 188 | title="Driver Ranking", |
| 189 | xlabel="Team Number", |
| 190 | xticks=(idx, ranking_points.team), |
| 191 | xrotation=90, |
| 192 | ylabel="Score", |
| 193 | legend=false, |
| 194 | ) |
| 195 | hline!(plt, [0.]) |
| 196 | |
| 197 | savefig(plt, "./driver_ranking.html") |
| 198 | # open("./driver_ranking.html", "w") do io |
| 199 | # PlotlyBase.to_html(io, plt) |
| 200 | # end |
| 201 | |
| 202 | return plt |
| 203 | end |
| 204 | |
| 205 | export rank |
| 206 | |
| 207 | end # module |