Philipp Schrader | df0cbed | 2023-03-07 21:26:02 -0800 | [diff] [blame] | 1 | #!/usr/bin/env julia |
| 2 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 3 | module DriverRank |
| 4 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 5 | using CSV |
| 6 | using DataFrames: DataFrame |
Philipp Schrader | bf17161 | 2023-03-05 13:16:18 -0800 | [diff] [blame] | 7 | using Transducers: MapCat, Map |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 8 | using DataStructures: OrderedSet |
| 9 | using HypothesisTests: OneSampleZTest, pvalue |
| 10 | using Roots: find_zero |
| 11 | using Statistics: mean |
| 12 | import Optim |
| 13 | using Optim: optimize |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 14 | |
| 15 | struct TeamKey |
| 16 | key::String |
| 17 | end |
| 18 | |
| 19 | Base.@kwdef struct DriverMatchup{K} |
| 20 | winner::K |
| 21 | loser::K |
| 22 | end |
| 23 | |
| 24 | Base.@kwdef struct DriverRankings{K} |
| 25 | team_keys::OrderedSet{K} |
| 26 | matchup_contributions::Matrix{Float64} |
| 27 | expected_win_rate_func |
| 28 | end |
| 29 | |
| 30 | function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K |
| 31 | team_keys = |
| 32 | matchups |> |
| 33 | MapCat(matchup -> (matchup.winner, matchup.loser)) |> |
| 34 | OrderedSet{K} |
| 35 | team_key_indecies = |
| 36 | zip(team_keys, 1:length(team_keys)) |> |
| 37 | Dict{TeamKey, Int} |
| 38 | |
| 39 | matchup_contributions = zeros(length(matchups), length(team_keys)) |
| 40 | for (i, matchup) in enumerate(matchups) |
| 41 | contribution = view(matchup_contributions, i, :) |
| 42 | |
| 43 | winner_index = team_key_indecies[matchup.winner] |
| 44 | loser_index = team_key_indecies[matchup.loser] |
| 45 | |
| 46 | contribution[winner_index] = 1 |
| 47 | contribution[loser_index] = -1 |
| 48 | end |
| 49 | |
| 50 | # Create a distribution that represents |
| 51 | # how to translate player ranking point differences |
| 52 | # into win rates |
| 53 | point_difference = 100 |
| 54 | win_rate_at_point_difference = 0.9 |
| 55 | dist_std_dev = find_zero( |
| 56 | x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left), |
| 57 | (0,Inf), |
| 58 | ) |
| 59 | expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left) |
| 60 | |
| 61 | return DriverRankings{K}(; |
| 62 | team_keys, |
| 63 | matchup_contributions, |
| 64 | expected_win_rate_func, |
| 65 | ) |
| 66 | end |
| 67 | |
| 68 | num_teams(dr::DriverRankings) = length(dr.team_keys) |
| 69 | |
| 70 | function objective_value( |
| 71 | driver_rankings::DriverRankings, |
| 72 | ranking_points::Vector{F}, |
| 73 | ) where F |
| 74 | ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings))) |
| 75 | return objective_value(driver_rankings, ranking_points_row) |
| 76 | end |
| 77 | |
| 78 | function objective_value( |
| 79 | driver_rankings::DriverRankings, |
| 80 | ranking_points::Matrix{F}, |
| 81 | ) where F |
| 82 | average_ranking_point_value::F = |
| 83 | mean(ranking_points) |
| 84 | |
| 85 | k = 100 / length(ranking_points) # magic number |
| 86 | return -(k * log_likelihood(driver_rankings, ranking_points)) + |
| 87 | (average_ranking_point_value^2) |
| 88 | end |
| 89 | |
| 90 | function log_likelihood( |
| 91 | driver_rankings::DriverRankings, |
| 92 | ranking_points::Matrix{F}, |
| 93 | ) where F |
| 94 | matchup_ranking_point_differentials = |
| 95 | driver_rankings.matchup_contributions .* ranking_points |> |
| 96 | x -> sum(x, dims=2) |
| 97 | |
| 98 | result::F = |
| 99 | driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |> |
| 100 | Map(log) |> |
| 101 | sum |
| 102 | return result |
| 103 | end |
| 104 | |
Philipp Schrader | df0cbed | 2023-03-07 21:26:02 -0800 | [diff] [blame] | 105 | function rank( |
| 106 | input_csv::String, |
| 107 | output_csv::String, |
| 108 | ) |
Philipp Schrader | e825dad | 2023-03-09 21:22:44 -0800 | [diff] [blame] | 109 | # Force all team numbers to be parsed as strings. |
| 110 | df = DataFrame(CSV.File(input_csv, types=String)) |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 111 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 112 | rank1 = "Rank 1 (best)" |
| 113 | rank2 = "Rank 2" |
| 114 | rank3 = "Rank 3 (worst)" |
| 115 | rank_cols = [rank1, rank2, rank3] |
| 116 | |
| 117 | |
| 118 | df[!, rank_cols] = TeamKey.(df[!, rank_cols]) |
| 119 | matchups = |
| 120 | [ |
| 121 | (df[!, rank1], df[!, rank2]), |
| 122 | (df[!, rank1], df[!, rank3]), |
| 123 | (df[!, rank2], df[!, rank3]), |
| 124 | ] |> |
| 125 | MapCat(((winners, losers),) -> zip(winners, losers)) |> |
| 126 | Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |> |
| 127 | collect |
| 128 | |
| 129 | driver_rankings = DriverRankings(matchups) |
Philipp Schrader | bf17161 | 2023-03-05 13:16:18 -0800 | [diff] [blame] | 130 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 131 | # Optimize! |
| 132 | x0 = zeros(num_teams(driver_rankings)) |
| 133 | res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward) |
| 134 | |
| 135 | ranking_points = |
| 136 | DataFrame( |
| 137 | :team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect, |
| 138 | :score=>Optim.minimizer(res), |
| 139 | ) |> |
| 140 | x -> sort!(x, [:score], rev=true) |
Philipp Schrader | df0cbed | 2023-03-07 21:26:02 -0800 | [diff] [blame] | 141 | |
| 142 | # Uncomment to print the results on the console as well. |
| 143 | #show(ranking_points, allrows=true) |
| 144 | |
| 145 | CSV.write(output_csv, ranking_points) |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 146 | end |
| 147 | |
| 148 | export rank |
| 149 | |
Philipp Schrader | df0cbed | 2023-03-07 21:26:02 -0800 | [diff] [blame] | 150 | # Run the program if this script is being executed from the command line. |
| 151 | if abspath(PROGRAM_FILE) == @__FILE__ |
| 152 | rank(ARGS[1], ARGS[2]) |
| 153 | end |
| 154 | |
MICHAELABICK | 12c9226 | 2023-03-04 13:52:32 -0800 | [diff] [blame] | 155 | end # module |