blob: e759feac0d52e46e7470aa3f321cdc73b288c9f2 [file] [log] [blame]
Philipp Schraderdf0cbed2023-03-07 21:26:02 -08001#!/usr/bin/env julia
2
MICHAELABICK12c92262023-03-04 13:52:32 -08003module DriverRank
4
MICHAELABICK12c92262023-03-04 13:52:32 -08005using CSV
6using DataFrames: DataFrame
Philipp Schraderbf171612023-03-05 13:16:18 -08007using Transducers: MapCat, Map
MICHAELABICK12c92262023-03-04 13:52:32 -08008using DataStructures: OrderedSet
9using HypothesisTests: OneSampleZTest, pvalue
10using Roots: find_zero
11using Statistics: mean
12import Optim
13using Optim: optimize
MICHAELABICK12c92262023-03-04 13:52:32 -080014
15struct TeamKey
16 key::String
17end
18
19Base.@kwdef struct DriverMatchup{K}
20 winner::K
21 loser::K
22end
23
24Base.@kwdef struct DriverRankings{K}
25 team_keys::OrderedSet{K}
26 matchup_contributions::Matrix{Float64}
27 expected_win_rate_func
28end
29
30function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K
31 team_keys =
32 matchups |>
33 MapCat(matchup -> (matchup.winner, matchup.loser)) |>
34 OrderedSet{K}
35 team_key_indecies =
36 zip(team_keys, 1:length(team_keys)) |>
37 Dict{TeamKey, Int}
38
39 matchup_contributions = zeros(length(matchups), length(team_keys))
40 for (i, matchup) in enumerate(matchups)
41 contribution = view(matchup_contributions, i, :)
42
43 winner_index = team_key_indecies[matchup.winner]
44 loser_index = team_key_indecies[matchup.loser]
45
46 contribution[winner_index] = 1
47 contribution[loser_index] = -1
48 end
49
50 # Create a distribution that represents
51 # how to translate player ranking point differences
52 # into win rates
53 point_difference = 100
54 win_rate_at_point_difference = 0.9
55 dist_std_dev = find_zero(
56 x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left),
57 (0,Inf),
58 )
59 expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left)
60
61 return DriverRankings{K}(;
62 team_keys,
63 matchup_contributions,
64 expected_win_rate_func,
65 )
66end
67
68num_teams(dr::DriverRankings) = length(dr.team_keys)
69
70function objective_value(
71 driver_rankings::DriverRankings,
72 ranking_points::Vector{F},
73) where F
74 ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings)))
75 return objective_value(driver_rankings, ranking_points_row)
76end
77
78function objective_value(
79 driver_rankings::DriverRankings,
80 ranking_points::Matrix{F},
81) where F
82 average_ranking_point_value::F =
83 mean(ranking_points)
84
85 k = 100 / length(ranking_points) # magic number
86 return -(k * log_likelihood(driver_rankings, ranking_points)) +
87 (average_ranking_point_value^2)
88end
89
90function log_likelihood(
91 driver_rankings::DriverRankings,
92 ranking_points::Matrix{F},
93) where F
94 matchup_ranking_point_differentials =
95 driver_rankings.matchup_contributions .* ranking_points |>
96 x -> sum(x, dims=2)
97
98 result::F =
99 driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |>
100 Map(log) |>
101 sum
102 return result
103end
104
Philipp Schraderdf0cbed2023-03-07 21:26:02 -0800105function rank(
106 input_csv::String,
107 output_csv::String,
108)
Philipp Schradere825dad2023-03-09 21:22:44 -0800109 # Force all team numbers to be parsed as strings.
110 df = DataFrame(CSV.File(input_csv, types=String))
MICHAELABICK12c92262023-03-04 13:52:32 -0800111
MICHAELABICK12c92262023-03-04 13:52:32 -0800112 rank1 = "Rank 1 (best)"
113 rank2 = "Rank 2"
114 rank3 = "Rank 3 (worst)"
115 rank_cols = [rank1, rank2, rank3]
116
117
118 df[!, rank_cols] = TeamKey.(df[!, rank_cols])
119 matchups =
120 [
121 (df[!, rank1], df[!, rank2]),
122 (df[!, rank1], df[!, rank3]),
123 (df[!, rank2], df[!, rank3]),
124 ] |>
125 MapCat(((winners, losers),) -> zip(winners, losers)) |>
126 Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
127 collect
128
129 driver_rankings = DriverRankings(matchups)
Philipp Schraderbf171612023-03-05 13:16:18 -0800130
MICHAELABICK12c92262023-03-04 13:52:32 -0800131 # Optimize!
132 x0 = zeros(num_teams(driver_rankings))
133 res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward)
134
135 ranking_points =
136 DataFrame(
137 :team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect,
138 :score=>Optim.minimizer(res),
139 ) |>
140 x -> sort!(x, [:score], rev=true)
Philipp Schraderdf0cbed2023-03-07 21:26:02 -0800141
142 # Uncomment to print the results on the console as well.
143 #show(ranking_points, allrows=true)
144
145 CSV.write(output_csv, ranking_points)
MICHAELABICK12c92262023-03-04 13:52:32 -0800146end
147
148export rank
149
Philipp Schraderdf0cbed2023-03-07 21:26:02 -0800150# Run the program if this script is being executed from the command line.
151if abspath(PROGRAM_FILE) == @__FILE__
152 rank(ARGS[1], ARGS[2])
153end
154
MICHAELABICK12c92262023-03-04 13:52:32 -0800155end # module