blob: c99be1feb0bfc4d1cc8289cf68e65fd7d2f75eb0 [file] [log] [blame]
MICHAELABICK12c92262023-03-04 13:52:32 -08001module DriverRank
2
MICHAELABICK12c92262023-03-04 13:52:32 -08003using CSV
4using DataFrames: DataFrame
Philipp Schraderbf171612023-03-05 13:16:18 -08005using Transducers: MapCat, Map
MICHAELABICK12c92262023-03-04 13:52:32 -08006using DataStructures: OrderedSet
7using HypothesisTests: OneSampleZTest, pvalue
8using Roots: find_zero
9using Statistics: mean
10import Optim
11using Optim: optimize
MICHAELABICK12c92262023-03-04 13:52:32 -080012
13struct TeamKey
14 key::String
15end
16
17Base.@kwdef struct DriverMatchup{K}
18 winner::K
19 loser::K
20end
21
22Base.@kwdef struct DriverRankings{K}
23 team_keys::OrderedSet{K}
24 matchup_contributions::Matrix{Float64}
25 expected_win_rate_func
26end
27
28function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K
29 team_keys =
30 matchups |>
31 MapCat(matchup -> (matchup.winner, matchup.loser)) |>
32 OrderedSet{K}
33 team_key_indecies =
34 zip(team_keys, 1:length(team_keys)) |>
35 Dict{TeamKey, Int}
36
37 matchup_contributions = zeros(length(matchups), length(team_keys))
38 for (i, matchup) in enumerate(matchups)
39 contribution = view(matchup_contributions, i, :)
40
41 winner_index = team_key_indecies[matchup.winner]
42 loser_index = team_key_indecies[matchup.loser]
43
44 contribution[winner_index] = 1
45 contribution[loser_index] = -1
46 end
47
48 # Create a distribution that represents
49 # how to translate player ranking point differences
50 # into win rates
51 point_difference = 100
52 win_rate_at_point_difference = 0.9
53 dist_std_dev = find_zero(
54 x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left),
55 (0,Inf),
56 )
57 expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left)
58
59 return DriverRankings{K}(;
60 team_keys,
61 matchup_contributions,
62 expected_win_rate_func,
63 )
64end
65
66num_teams(dr::DriverRankings) = length(dr.team_keys)
67
68function objective_value(
69 driver_rankings::DriverRankings,
70 ranking_points::Vector{F},
71) where F
72 ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings)))
73 return objective_value(driver_rankings, ranking_points_row)
74end
75
76function objective_value(
77 driver_rankings::DriverRankings,
78 ranking_points::Matrix{F},
79) where F
80 average_ranking_point_value::F =
81 mean(ranking_points)
82
83 k = 100 / length(ranking_points) # magic number
84 return -(k * log_likelihood(driver_rankings, ranking_points)) +
85 (average_ranking_point_value^2)
86end
87
88function log_likelihood(
89 driver_rankings::DriverRankings,
90 ranking_points::Matrix{F},
91) where F
92 matchup_ranking_point_differentials =
93 driver_rankings.matchup_contributions .* ranking_points |>
94 x -> sum(x, dims=2)
95
96 result::F =
97 driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |>
98 Map(log) |>
99 sum
100 return result
101end
102
103function rank()
Philipp Schrader46304f62023-03-05 14:11:19 -0800104 # TODO(phil): Make the input path configurable.
MICHAELABICK12c92262023-03-04 13:52:32 -0800105 df = DataFrame(CSV.File("./data/2022_madtown.csv"))
106
MICHAELABICK12c92262023-03-04 13:52:32 -0800107 rank1 = "Rank 1 (best)"
108 rank2 = "Rank 2"
109 rank3 = "Rank 3 (worst)"
110 rank_cols = [rank1, rank2, rank3]
111
112
113 df[!, rank_cols] = TeamKey.(df[!, rank_cols])
114 matchups =
115 [
116 (df[!, rank1], df[!, rank2]),
117 (df[!, rank1], df[!, rank3]),
118 (df[!, rank2], df[!, rank3]),
119 ] |>
120 MapCat(((winners, losers),) -> zip(winners, losers)) |>
121 Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
122 collect
123
124 driver_rankings = DriverRankings(matchups)
Philipp Schraderbf171612023-03-05 13:16:18 -0800125
MICHAELABICK12c92262023-03-04 13:52:32 -0800126 # Optimize!
127 x0 = zeros(num_teams(driver_rankings))
128 res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward)
129
130 ranking_points =
131 DataFrame(
132 :team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect,
133 :score=>Optim.minimizer(res),
134 ) |>
135 x -> sort!(x, [:score], rev=true)
Philipp Schrader46304f62023-03-05 14:11:19 -0800136 # TODO(phil): Save the output to a CSV file.
MICHAELABICK12c92262023-03-04 13:52:32 -0800137 show(ranking_points, allrows=true)
MICHAELABICK12c92262023-03-04 13:52:32 -0800138end
139
140export rank
141
142end # module