blob: c6e9aaa70a1b731aad5b7828427bb248335ea98b [file] [log] [blame]
MICHAELABICK12c92262023-03-04 13:52:32 -08001module DriverRank
2
3using GoogleSheets: sheets_client, Spreadsheet, CellRange, get, AUTH_SCOPE_READONLY
4using CSV
5using DataFrames: DataFrame
6using Transducers: Cat, MapCat, Map
7using DataStructures: OrderedSet
8using HypothesisTests: OneSampleZTest, pvalue
9using Roots: find_zero
10using Statistics: mean
11import Optim
12using Optim: optimize
13using BlackBoxOptim: bboptimize, best_candidate, best_fitness
14# using PlotlyJS
15using Plots: scatter, hline!, plotlyjs, savefig, plotly
16import PlotlyBase: to_html
17
18struct TeamKey
19 key::String
20end
21
22Base.@kwdef struct DriverMatchup{K}
23 winner::K
24 loser::K
25end
26
27Base.@kwdef struct DriverRankings{K}
28 team_keys::OrderedSet{K}
29 matchup_contributions::Matrix{Float64}
30 expected_win_rate_func
31end
32
33function DriverRankings(matchups::Vector{DriverMatchup{K}}) where K
34 team_keys =
35 matchups |>
36 MapCat(matchup -> (matchup.winner, matchup.loser)) |>
37 OrderedSet{K}
38 team_key_indecies =
39 zip(team_keys, 1:length(team_keys)) |>
40 Dict{TeamKey, Int}
41
42 matchup_contributions = zeros(length(matchups), length(team_keys))
43 for (i, matchup) in enumerate(matchups)
44 contribution = view(matchup_contributions, i, :)
45
46 winner_index = team_key_indecies[matchup.winner]
47 loser_index = team_key_indecies[matchup.loser]
48
49 contribution[winner_index] = 1
50 contribution[loser_index] = -1
51 end
52
53 # Create a distribution that represents
54 # how to translate player ranking point differences
55 # into win rates
56 point_difference = 100
57 win_rate_at_point_difference = 0.9
58 dist_std_dev = find_zero(
59 x -> win_rate_at_point_difference - pvalue(OneSampleZTest(point_difference, x, 1), tail=:left),
60 (0,Inf),
61 )
62 expected_win_rate_func(x) = pvalue(OneSampleZTest(x, dist_std_dev, 1), tail=:left)
63
64 return DriverRankings{K}(;
65 team_keys,
66 matchup_contributions,
67 expected_win_rate_func,
68 )
69end
70
71num_teams(dr::DriverRankings) = length(dr.team_keys)
72
73function objective_value(
74 driver_rankings::DriverRankings,
75 ranking_points::Vector{F},
76) where F
77 ranking_points_row = reshape(ranking_points, (1, num_teams(driver_rankings)))
78 return objective_value(driver_rankings, ranking_points_row)
79end
80
81function objective_value(
82 driver_rankings::DriverRankings,
83 ranking_points::Matrix{F},
84) where F
85 average_ranking_point_value::F =
86 mean(ranking_points)
87
88 k = 100 / length(ranking_points) # magic number
89 return -(k * log_likelihood(driver_rankings, ranking_points)) +
90 (average_ranking_point_value^2)
91end
92
93function log_likelihood(
94 driver_rankings::DriverRankings,
95 ranking_points::Matrix{F},
96) where F
97 matchup_ranking_point_differentials =
98 driver_rankings.matchup_contributions .* ranking_points |>
99 x -> sum(x, dims=2)
100
101 result::F =
102 driver_rankings.expected_win_rate_func.(matchup_ranking_point_differentials) |>
103 Map(log) |>
104 sum
105 return result
106end
107
108function rank()
109 # client = sheets_client(AUTH_SCOPE_READONLY)
110 # # spreadsheet_id = "13Cit7WrUxWz79iYVnoMoPc56W7H_cfr92jyT67tb2Xo"
111 # spreadsheet_id = "1q-Cl2aW4IkHk8Vcfd7OuFt0g4o3itn4SXgBi8Z1b7UE"
112 # range_name = "Form Responses 1"
113
114 # sheet = Spreadsheet(spreadsheet_id)
115 # range = CellRange(sheet, range_name)
116 # result = get(client, range).values
117
118 # # Filter empty rows
119 # is_not_empty = result[:, 1] .!= ""
120 # result = result[is_not_empty, :]
121 # df = DataFrame(TeamKey.(result[2:end, :]), result[1, :])
122
123 df = DataFrame(CSV.File("./data/2022_madtown.csv"))
124
125 # rank1 = "Rank 1 (best)"
126 # rank2 = "Rank 2"
127 # rank3 = "Rank 3"
128 # rank4 = "Rank 4"
129 # rank5 = "Rank 5"
130 # rank6 = "Rank 6 (worst)"
131 # matchups =
132 # [
133 # (df[!, rank1], df[!, rank2]),
134 # (df[!, rank1], df[!, rank3]),
135 # (df[!, rank1], df[!, rank4]),
136 # (df[!, rank1], df[!, rank5]),
137 # (df[!, rank1], df[!, rank6]),
138 # (df[!, rank2], df[!, rank3]),
139 # (df[!, rank2], df[!, rank4]),
140 # (df[!, rank2], df[!, rank5]),
141 # (df[!, rank2], df[!, rank6]),
142 # (df[!, rank3], df[!, rank4]),
143 # (df[!, rank3], df[!, rank5]),
144 # (df[!, rank3], df[!, rank6]),
145 # (df[!, rank4], df[!, rank5]),
146 # (df[!, rank4], df[!, rank6]),
147 # (df[!, rank5], df[!, rank6]),
148 # ] |>
149 # MapCat(((winners, losers),) -> zip(winners, losers)) |>
150 # Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
151 # collect
152
153 rank1 = "Rank 1 (best)"
154 rank2 = "Rank 2"
155 rank3 = "Rank 3 (worst)"
156 rank_cols = [rank1, rank2, rank3]
157
158
159 df[!, rank_cols] = TeamKey.(df[!, rank_cols])
160 matchups =
161 [
162 (df[!, rank1], df[!, rank2]),
163 (df[!, rank1], df[!, rank3]),
164 (df[!, rank2], df[!, rank3]),
165 ] |>
166 MapCat(((winners, losers),) -> zip(winners, losers)) |>
167 Map(((winner, loser),) -> DriverMatchup(; winner, loser)) |>
168 collect
169
170 driver_rankings = DriverRankings(matchups)
171
172 # Optimize!
173 x0 = zeros(num_teams(driver_rankings))
174 res = optimize(x -> objective_value(driver_rankings, x), x0, Optim.LBFGS(), autodiff=:forward)
175
176 ranking_points =
177 DataFrame(
178 :team=>driver_rankings.team_keys |> Map(x -> x.key) |> collect,
179 :score=>Optim.minimizer(res),
180 ) |>
181 x -> sort!(x, [:score], rev=true)
182 show(ranking_points, allrows=true)
183
184 plotly()
185 idx = 1:length(ranking_points.team)
186 plt = scatter(
187 idx, ranking_points.score,
188 title="Driver Ranking",
189 xlabel="Team Number",
190 xticks=(idx, ranking_points.team),
191 xrotation=90,
192 ylabel="Score",
193 legend=false,
194 )
195 hline!(plt, [0.])
196
197 savefig(plt, "./driver_ranking.html")
198 # open("./driver_ranking.html", "w") do io
199 # PlotlyBase.to_html(io, plt)
200 # end
201
202 return plt
203end
204
205export rank
206
207end # module