Austin Schuh | 40c1652 | 2018-10-28 20:27:54 -0700 | [diff] [blame^] | 1 | import argparse |
| 2 | import os |
| 3 | import re |
| 4 | import copy |
| 5 | import uuid |
| 6 | import calendar |
| 7 | import time |
| 8 | import big_query_utils |
| 9 | import datetime |
| 10 | import json |
| 11 | # This import depends on the automake rule protoc_middleman, please make sure |
| 12 | # protoc_middleman has been built before run this file. |
| 13 | import os.path, sys |
| 14 | sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) |
| 15 | import tmp.benchmarks_pb2 as benchmarks_pb2 |
| 16 | from click.types import STRING |
| 17 | |
| 18 | _PROJECT_ID = 'grpc-testing' |
| 19 | _DATASET = 'protobuf_benchmark_result' |
| 20 | _TABLE = 'opensource_result_v1' |
| 21 | _NOW = "%d%02d%02d" % (datetime.datetime.now().year, |
| 22 | datetime.datetime.now().month, |
| 23 | datetime.datetime.now().day) |
| 24 | |
| 25 | file_size_map = {} |
| 26 | |
| 27 | def get_data_size(file_name): |
| 28 | if file_name in file_size_map: |
| 29 | return file_size_map[file_name] |
| 30 | benchmark_dataset = benchmarks_pb2.BenchmarkDataset() |
| 31 | benchmark_dataset.ParseFromString( |
| 32 | open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read()) |
| 33 | size = 0 |
| 34 | count = 0 |
| 35 | for payload in benchmark_dataset.payload: |
| 36 | size += len(payload) |
| 37 | count += 1 |
| 38 | file_size_map[file_name] = (size, 1.0 * size / count) |
| 39 | return size, 1.0 * size / count |
| 40 | |
| 41 | |
| 42 | def extract_file_name(file_name): |
| 43 | name_list = re.split("[/\.]", file_name) |
| 44 | short_file_name = "" |
| 45 | for name in name_list: |
| 46 | if name[:14] == "google_message": |
| 47 | short_file_name = name |
| 48 | return short_file_name |
| 49 | |
| 50 | |
| 51 | cpp_result = [] |
| 52 | python_result = [] |
| 53 | java_result = [] |
| 54 | go_result = [] |
| 55 | |
| 56 | |
| 57 | # CPP results example: |
| 58 | # [ |
| 59 | # "benchmarks": [ |
| 60 | # { |
| 61 | # "bytes_per_second": int, |
| 62 | # "cpu_time": int, |
| 63 | # "name: string, |
| 64 | # "time_unit: string, |
| 65 | # ... |
| 66 | # }, |
| 67 | # ... |
| 68 | # ], |
| 69 | # ... |
| 70 | # ] |
| 71 | def parse_cpp_result(filename): |
| 72 | global cpp_result |
| 73 | if filename == "": |
| 74 | return |
| 75 | if filename[0] != '/': |
| 76 | filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
| 77 | with open(filename) as f: |
| 78 | results = json.loads(f.read()) |
| 79 | for benchmark in results["benchmarks"]: |
| 80 | data_filename = "".join( |
| 81 | re.split("(_parse_|_serialize)", benchmark["name"])[0]) |
| 82 | behavior = benchmark["name"][len(data_filename) + 1:] |
| 83 | cpp_result.append({ |
| 84 | "language": "cpp", |
| 85 | "dataFileName": data_filename, |
| 86 | "behavior": behavior, |
| 87 | "throughput": benchmark["bytes_per_second"] / 2.0 ** 20 |
| 88 | }) |
| 89 | |
| 90 | |
| 91 | # Python results example: |
| 92 | # [ |
| 93 | # [ |
| 94 | # { |
| 95 | # "filename": string, |
| 96 | # "benchmarks": { |
| 97 | # behavior: results, |
| 98 | # ... |
| 99 | # }, |
| 100 | # "message_name": STRING |
| 101 | # }, |
| 102 | # ... |
| 103 | # ], #pure-python |
| 104 | # ... |
| 105 | # ] |
| 106 | def parse_python_result(filename): |
| 107 | global python_result |
| 108 | if filename == "": |
| 109 | return |
| 110 | if filename[0] != '/': |
| 111 | filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
| 112 | with open(filename) as f: |
| 113 | results_list = json.loads(f.read()) |
| 114 | for results in results_list: |
| 115 | for result in results: |
| 116 | _, avg_size = get_data_size(result["filename"]) |
| 117 | for behavior in result["benchmarks"]: |
| 118 | python_result.append({ |
| 119 | "language": "python", |
| 120 | "dataFileName": extract_file_name(result["filename"]), |
| 121 | "behavior": behavior, |
| 122 | "throughput": avg_size / |
| 123 | result["benchmarks"][behavior] * 1e9 / 2 ** 20 |
| 124 | }) |
| 125 | |
| 126 | |
| 127 | # Java results example: |
| 128 | # [ |
| 129 | # { |
| 130 | # "id": string, |
| 131 | # "instrumentSpec": {...}, |
| 132 | # "measurements": [ |
| 133 | # { |
| 134 | # "weight": float, |
| 135 | # "value": { |
| 136 | # "magnitude": float, |
| 137 | # "unit": string |
| 138 | # }, |
| 139 | # ... |
| 140 | # }, |
| 141 | # ... |
| 142 | # ], |
| 143 | # "run": {...}, |
| 144 | # "scenario": { |
| 145 | # "benchmarkSpec": { |
| 146 | # "methodName": string, |
| 147 | # "parameters": { |
| 148 | # defined parameters in the benchmark: parameters value |
| 149 | # }, |
| 150 | # ... |
| 151 | # }, |
| 152 | # ... |
| 153 | # } |
| 154 | # |
| 155 | # }, |
| 156 | # ... |
| 157 | # ] |
| 158 | def parse_java_result(filename): |
| 159 | global average_bytes_per_message, java_result |
| 160 | if filename == "": |
| 161 | return |
| 162 | if filename[0] != '/': |
| 163 | filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
| 164 | with open(filename) as f: |
| 165 | results = json.loads(f.read()) |
| 166 | for result in results: |
| 167 | total_weight = 0 |
| 168 | total_value = 0 |
| 169 | for measurement in result["measurements"]: |
| 170 | total_weight += measurement["weight"] |
| 171 | total_value += measurement["value"]["magnitude"] |
| 172 | avg_time = total_value * 1.0 / total_weight |
| 173 | total_size, _ = get_data_size( |
| 174 | result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) |
| 175 | java_result.append({ |
| 176 | "language": "java", |
| 177 | "throughput": total_size / avg_time * 1e9 / 2 ** 20, |
| 178 | "behavior": result["scenario"]["benchmarkSpec"]["methodName"], |
| 179 | "dataFileName": extract_file_name( |
| 180 | result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) |
| 181 | }) |
| 182 | |
| 183 | |
| 184 | # Go benchmark results: |
| 185 | # |
| 186 | # goos: linux |
| 187 | # goarch: amd64 |
| 188 | # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op |
| 189 | # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op |
| 190 | # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op |
| 191 | # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op |
| 192 | # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op |
| 193 | # PASS |
| 194 | # ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s |
| 195 | def parse_go_result(filename): |
| 196 | global go_result |
| 197 | if filename == "": |
| 198 | return |
| 199 | if filename[0] != '/': |
| 200 | filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
| 201 | with open(filename) as f: |
| 202 | for line in f: |
| 203 | result_list = re.split("[\ \t]+", line) |
| 204 | if result_list[0][:9] != "Benchmark": |
| 205 | continue |
| 206 | first_slash_index = result_list[0].find('/') |
| 207 | last_slash_index = result_list[0].rfind('/') |
| 208 | full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix |
| 209 | total_bytes, _ = get_data_size(full_filename) |
| 210 | behavior_with_suffix = result_list[0][last_slash_index+1:] |
| 211 | last_dash = behavior_with_suffix.rfind("-") |
| 212 | if last_dash == -1: |
| 213 | behavior = behavior_with_suffix |
| 214 | else: |
| 215 | behavior = behavior_with_suffix[:last_dash] |
| 216 | go_result.append({ |
| 217 | "dataFilename": extract_file_name(full_filename), |
| 218 | "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, |
| 219 | "behavior": behavior, |
| 220 | "language": "go" |
| 221 | }) |
| 222 | |
| 223 | |
| 224 | def get_metadata(): |
| 225 | build_number = os.getenv('BUILD_NUMBER') |
| 226 | build_url = os.getenv('BUILD_URL') |
| 227 | job_name = os.getenv('JOB_NAME') |
| 228 | git_commit = os.getenv('GIT_COMMIT') |
| 229 | # actual commit is the actual head of PR that is getting tested |
| 230 | git_actual_commit = os.getenv('ghprbActualCommit') |
| 231 | |
| 232 | utc_timestamp = str(calendar.timegm(time.gmtime())) |
| 233 | metadata = {'created': utc_timestamp} |
| 234 | |
| 235 | if build_number: |
| 236 | metadata['buildNumber'] = build_number |
| 237 | if build_url: |
| 238 | metadata['buildUrl'] = build_url |
| 239 | if job_name: |
| 240 | metadata['jobName'] = job_name |
| 241 | if git_commit: |
| 242 | metadata['gitCommit'] = git_commit |
| 243 | if git_actual_commit: |
| 244 | metadata['gitActualCommit'] = git_actual_commit |
| 245 | |
| 246 | return metadata |
| 247 | |
| 248 | |
| 249 | def upload_result(result_list, metadata): |
| 250 | for result in result_list: |
| 251 | new_result = copy.deepcopy(result) |
| 252 | new_result['metadata'] = metadata |
| 253 | bq = big_query_utils.create_big_query() |
| 254 | row = big_query_utils.make_row(str(uuid.uuid4()), new_result) |
| 255 | if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, |
| 256 | _TABLE + "$" + _NOW, |
| 257 | [row]): |
| 258 | print 'Error when uploading result', new_result |
| 259 | |
| 260 | |
| 261 | if __name__ == "__main__": |
| 262 | parser = argparse.ArgumentParser() |
| 263 | parser.add_argument("-cpp", "--cpp_input_file", |
| 264 | help="The CPP benchmark result file's name", |
| 265 | default="") |
| 266 | parser.add_argument("-java", "--java_input_file", |
| 267 | help="The Java benchmark result file's name", |
| 268 | default="") |
| 269 | parser.add_argument("-python", "--python_input_file", |
| 270 | help="The Python benchmark result file's name", |
| 271 | default="") |
| 272 | parser.add_argument("-go", "--go_input_file", |
| 273 | help="The golang benchmark result file's name", |
| 274 | default="") |
| 275 | args = parser.parse_args() |
| 276 | |
| 277 | parse_cpp_result(args.cpp_input_file) |
| 278 | parse_python_result(args.python_input_file) |
| 279 | parse_java_result(args.java_input_file) |
| 280 | parse_go_result(args.go_input_file) |
| 281 | |
| 282 | metadata = get_metadata() |
| 283 | print "uploading cpp results..." |
| 284 | upload_result(cpp_result, metadata) |
| 285 | print "uploading java results..." |
| 286 | upload_result(java_result, metadata) |
| 287 | print "uploading python results..." |
| 288 | upload_result(python_result, metadata) |
| 289 | print "uploading go results..." |
| 290 | upload_result(go_result, metadata) |