MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 1 | module DriverRank |
| 2 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 3 | using CSV |
| 4 | using DataFrames: DataFrame |
Philipp Schrader | bf17161 | 2023-03-05 13:16:18 -0800 | [diff] [blame^] | 5 | using Transducers: MapCat, Map |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 6 | using DataStructures: OrderedSet |
| 7 | using HypothesisTests: OneSampleZTest, pvalue |
| 8 | using Roots: find_zero |
| 9 | using Statistics: mean |
| 10 | import Optim |
| 11 | using Optim: optimize |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 12 | |
| 13 | struct TeamKey |
| 14 | key::String |
| 15 | end |
| 16 | |
| 17 | Base.@kwdef struct DriverMatchup{K} |
| 18 | winner::K |
| 19 | loser::K |
| 20 | end |
| 21 | |
| 22 | Base.@kwdef struct DriverRankings{K} |
| 23 | team_keys::OrderedSet{K} |
| 24 | matchup_contributions::Matrix{Float64} |
| 25 | expected_win_rate_func |
| 26 | end |
| 27 | |
| 28 | function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K |
| 29 | team_keys = |
| 30 | matchups |> |
| 31 | MapCat(matchup -> (matchup.winner, matchup.loser)) |> |
| 32 | OrderedSet{K} |
| 33 | team_key_indecies = |
| 34 | zip(team_keys, 1:length(team_keys)) |> |
| 35 | Dict{TeamKey, Int} |
| 36 | |
| 37 | matchup_contributions = zeros(length(matchups), length(team_keys)) |
| 38 | for (i, matchup) in enumerate(matchups) |
| 39 | contribution = view(matchup_contributions, i, :) |
| 40 | |
| 41 | winner_index = team_key_indecies[matchup.winner] |
| 42 | loser_index = team_key_indecies[matchup.loser] |
| 43 | |
| 44 | contribution[winner_index] = 1 |
| 45 | contribution[loser_index] = -1 |
| 46 | end |
| 47 | |
| 48 | # Create a distribution that represents |
| 49 | # how to translate player ranking point differences |
| 50 | # into win rates |
| 51 | point_difference = 100 |
| 52 | win_rate_at_point_difference = 0.9 |
| 53 | dist_std_dev = find_zero( |
| 54 | x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left), |
| 55 | (0,Inf), |
| 56 | ) |
| 57 | expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left) |
| 58 | |
| 59 | return DriverRankings{K}(; |
| 60 | team_keys, |
| 61 | matchup_contributions, |
| 62 | expected_win_rate_func, |
| 63 | ) |
| 64 | end |
| 65 | |
| 66 | num_teams(dr::DriverRankings) = length(dr.team_keys) |
| 67 | |
| 68 | function objective_value( |
| 69 | driver_rankings::DriverRankings, |
| 70 | ranking_points::Vector{F}, |
| 71 | ) where F |
| 72 | ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings))) |
| 73 | return objective_value(driver_rankings, ranking_points_row) |
| 74 | end |
| 75 | |
| 76 | function objective_value( |
| 77 | driver_rankings::DriverRankings, |
| 78 | ranking_points::Matrix{F}, |
| 79 | ) where F |
| 80 | average_ranking_point_value::F = |
| 81 | mean(ranking_points) |
| 82 | |
| 83 | k = 100 / length(ranking_points) # magic number |
| 84 | return -(k * log_likelihood(driver_rankings, ranking_points)) + |
| 85 | (average_ranking_point_value^2) |
| 86 | end |
| 87 | |
| 88 | function log_likelihood( |
| 89 | driver_rankings::DriverRankings, |
| 90 | ranking_points::Matrix{F}, |
| 91 | ) where F |
| 92 | matchup_ranking_point_differentials = |
| 93 | driver_rankings.matchup_contributions .* ranking_points |> |
| 94 | x -> sum(x, dims=2) |
| 95 | |
| 96 | result::F = |
| 97 | driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |> |
| 98 | Map(log) |> |
| 99 | sum |
| 100 | return result |
| 101 | end |
| 102 | |
| 103 | function rank() |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 104 | df = DataFrame(CSV.File("./data/2022_madtown.csv")) |
| 105 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 106 | rank1 = "Rank 1 (best)" |
| 107 | rank2 = "Rank 2" |
| 108 | rank3 = "Rank 3 (worst)" |
| 109 | rank_cols = [rank1, rank2, rank3] |
| 110 | |
| 111 | |
| 112 | df[!, rank_cols] = TeamKey.(df[!, rank_cols]) |
| 113 | matchups = |
| 114 | [ |
| 115 | (df[!, rank1], df[!, rank2]), |
| 116 | (df[!, rank1], df[!, rank3]), |
| 117 | (df[!, rank2], df[!, rank3]), |
| 118 | ] |> |
| 119 | MapCat(((winners, losers),) -> zip(winners, losers)) |> |
| 120 | Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |> |
| 121 | collect |
| 122 | |
| 123 | driver_rankings = DriverRankings(matchups) |
Philipp Schrader | bf17161 | 2023-03-05 13:16:18 -0800 | [diff] [blame^] | 124 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 125 | # Optimize! |
| 126 | x0 = zeros(num_teams(driver_rankings)) |
| 127 | res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward) |
| 128 | |
| 129 | ranking_points = |
| 130 | DataFrame( |
| 131 | :team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect, |
| 132 | :score=>Optim.minimizer(res), |
| 133 | ) |> |
| 134 | x -> sort!(x, [:score], rev=true) |
| 135 | show(ranking_points, allrows=true) |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 136 | end |
| 137 | |
| 138 | export rank |
| 139 | |
| 140 | end # module |