Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame^] | 1 | // Copyright 2022 Google LLC |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | use std::{ |
| 10 | borrow::Cow, |
| 11 | collections::HashSet, |
| 12 | fmt::Display, |
| 13 | io::{self, Read}, |
| 14 | path::PathBuf, |
| 15 | process, |
| 16 | }; |
| 17 | |
| 18 | use anyhow::Error; |
| 19 | use clap::{crate_authors, crate_version, Arg, ArgMatches, Command}; |
| 20 | use itertools::Itertools; |
| 21 | use mdbook::{book::Book, preprocess::CmdPreprocessor}; |
| 22 | use proc_macro2::{Span, TokenStream}; |
| 23 | use rayon::prelude::*; |
| 24 | use syn::{Expr, __private::ToTokens, spanned::Spanned}; |
| 25 | |
| 26 | static LONG_ABOUT: &str = |
| 27 | "This is an mdbook preprocessor tailored for autocxx code examples. Autocxx |
| 28 | code examples don't fit well 'mdbook test' or even alternatives such as |
| 29 | 'skeptic' or 'doc_comment' for these reasons: |
| 30 | |
| 31 | a) A single code example consists of both Rust and C++ code. They must be |
| 32 | linked into a separate executable, i.e. we must make one executable per |
| 33 | doc test. |
| 34 | b) The code examples must be presented/formatted nicely with suitable |
| 35 | separate blocks for the Rust and C++ code. |
| 36 | c) mdbook test is not good at handling doctests which have dependencies. |
| 37 | |
| 38 | This preprocessor will find code snippets like this: |
| 39 | ```rust,autocxx |
| 40 | autocxx_integration_tests::doctest( |
| 41 | \" /* any C++ implementation code */\", |
| 42 | \" /* C++ header code */\", |
| 43 | { |
| 44 | /* complete Rust code including 'main' */ |
| 45 | ) |
| 46 | ``` |
| 47 | |
| 48 | and will build and run them, while emitting better formatted markdown blocks |
| 49 | for subsequent preprocessors and renderers. |
| 50 | "; |
| 51 | |
| 52 | static RUST_MDBOOK_SINGLE_TEST: &str = "RUST_MDBOOK_SINGLE_TEST"; |
| 53 | |
| 54 | fn main() { |
| 55 | let matches = Command::new("autocxx-mdbook-preprocessor") |
| 56 | .version(crate_version!()) |
| 57 | .author(crate_authors!()) |
| 58 | .about("Expands and tests code examples in the autocxx book.") |
| 59 | .long_about(LONG_ABOUT) |
| 60 | .subcommand( |
| 61 | Command::new("supports") |
| 62 | .arg(Arg::new("renderer").required(true)) |
| 63 | .about("Whether a given renderer is supported by this preprocessor"), |
| 64 | ) |
| 65 | .arg( |
| 66 | Arg::new("skip_tests") |
| 67 | .short('s') |
| 68 | .help("Skip running doctests"), |
| 69 | ) |
| 70 | .arg( |
| 71 | Arg::new("manifest_dir") |
| 72 | .long("manifest-dir") |
| 73 | .help("Path to directory containing outermost autocxx Cargo.toml; necessary for trybuild to build test code successfully") |
| 74 | .default_value_os(calculate_cargo_dir().as_os_str()) |
| 75 | ) |
| 76 | .get_matches(); |
| 77 | if let Some(supports_matches) = matches.subcommand_matches("supports") { |
| 78 | // Only do our preprocessing and testing for the html renderer, not linkcheck. |
| 79 | if supports_matches.value_of("renderer") == Some("html") { |
| 80 | process::exit(0); |
| 81 | } else { |
| 82 | process::exit(1); |
| 83 | } |
| 84 | } |
| 85 | preprocess(&matches).unwrap(); |
| 86 | } |
| 87 | |
| 88 | fn calculate_cargo_dir() -> PathBuf { |
| 89 | let mut path = std::env::current_exe().unwrap(); |
| 90 | for _ in 0..3 { |
| 91 | path = path.parent().map(|p| p.to_path_buf()).unwrap_or(path); |
| 92 | } |
| 93 | path.join("integration-tests") |
| 94 | } |
| 95 | |
| 96 | fn preprocess(args: &ArgMatches) -> Result<(), Error> { |
| 97 | let (_, mut book) = CmdPreprocessor::parse_input(io::stdin())?; |
| 98 | |
| 99 | env_logger::builder().init(); |
| 100 | let mut test_cases = Vec::new(); |
| 101 | |
| 102 | Book::for_each_mut(&mut book, |sec| { |
| 103 | if let mdbook::BookItem::Chapter(chapter) = sec { |
| 104 | let filename = chapter |
| 105 | .path |
| 106 | .as_ref() |
| 107 | .map(|pb| pb.to_string_lossy()) |
| 108 | .unwrap_or_default() |
| 109 | .to_string(); |
| 110 | chapter.content = substitute_chapter(&chapter.content, &filename, &mut test_cases); |
| 111 | } |
| 112 | }); |
| 113 | |
| 114 | // Now run any test cases we accumulated. |
| 115 | if !args.is_present("skip_tests") { |
| 116 | let stdout_gag = gag::BufferRedirect::stdout().unwrap(); |
| 117 | let num_tests = test_cases.len(); |
| 118 | let fails: Vec<_> = test_cases |
| 119 | .into_par_iter() |
| 120 | .enumerate() |
| 121 | .filter_map(|(counter, case)| { |
| 122 | if let Ok(test) = std::env::var(RUST_MDBOOK_SINGLE_TEST) { |
| 123 | let desired_id: usize = test.parse().unwrap(); |
| 124 | if desired_id != (counter + 1) { |
| 125 | return None; |
| 126 | } |
| 127 | } |
| 128 | eprintln!( |
| 129 | "Running doctest {}/{} at {}", |
| 130 | counter + 1, |
| 131 | num_tests, |
| 132 | &case.location |
| 133 | ); |
| 134 | let err = autocxx_integration_tests::doctest( |
| 135 | &case.cpp, |
| 136 | &case.hdr, |
| 137 | case.rs, |
| 138 | args.value_of_os("manifest_dir").unwrap(), |
| 139 | ); |
| 140 | let desc = match err { |
| 141 | Ok(_) => "passed".to_string(), |
| 142 | Err(ref err) => format!("failed: {:?}", err), |
| 143 | }; |
| 144 | eprintln!( |
| 145 | "Doctest {}/{} at {} {}.", |
| 146 | counter + 1, |
| 147 | num_tests, |
| 148 | &case.location, |
| 149 | desc |
| 150 | ); |
| 151 | if err.is_err() { |
| 152 | Some(TestId { |
| 153 | location: case.location, |
| 154 | test_id: counter + 1, |
| 155 | }) |
| 156 | } else { |
| 157 | None |
| 158 | } |
| 159 | }) |
| 160 | .collect(); |
| 161 | let mut stdout_str = String::new(); |
| 162 | stdout_gag |
| 163 | .into_inner() |
| 164 | .read_to_string(&mut stdout_str) |
| 165 | .unwrap(); |
| 166 | if !stdout_str.is_empty() { |
| 167 | eprintln!("Stdout from tests:\n{}", stdout_str); |
| 168 | } |
| 169 | if !fails.is_empty() { |
| 170 | panic!( |
| 171 | "One or more tests failed: {}. To rerun an individual test use {}.", |
| 172 | fails.into_iter().sorted().map(|s| s.to_string()).join(", "), |
| 173 | RUST_MDBOOK_SINGLE_TEST |
| 174 | ); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | serde_json::to_writer(io::stdout(), &book)?; |
| 179 | |
| 180 | Ok(()) |
| 181 | } |
| 182 | |
| 183 | fn substitute_chapter(chapter: &str, filename: &str, test_cases: &mut Vec<TestCase>) -> String { |
| 184 | let mut state = ChapterParseState::Start; |
| 185 | let mut out = Vec::new(); |
| 186 | for (line_no, line) in chapter.lines().enumerate() { |
| 187 | let line_type = recognize_line(line); |
| 188 | let mut push_line = true; |
| 189 | state = match state { |
| 190 | ChapterParseState::Start => match line_type { |
| 191 | LineType::CodeBlockStart | LineType::CodeBlockEnd => { |
| 192 | ChapterParseState::OtherCodeBlock |
| 193 | } |
| 194 | LineType::CodeBlockStartAutocxx(block_flags) => { |
| 195 | push_line = false; |
| 196 | ChapterParseState::OurCodeBlock(block_flags, Vec::new()) |
| 197 | } |
| 198 | LineType::Misc => ChapterParseState::Start, |
| 199 | }, |
| 200 | ChapterParseState::OtherCodeBlock => match line_type { |
| 201 | LineType::CodeBlockEnd => ChapterParseState::Start, |
| 202 | LineType::Misc => ChapterParseState::OtherCodeBlock, |
| 203 | _ => panic!("Found confusing conflicting block markers"), |
| 204 | }, |
| 205 | ChapterParseState::OurCodeBlock(flags, mut lines) => match line_type { |
| 206 | LineType::Misc => { |
| 207 | push_line = false; |
| 208 | lines.push(line.to_string()); |
| 209 | ChapterParseState::OurCodeBlock(flags, lines) |
| 210 | } |
| 211 | LineType::CodeBlockEnd => { |
| 212 | let location = MiniSpan { |
| 213 | filename: filename.to_string(), |
| 214 | start_line: line_no - lines.len(), |
| 215 | }; |
| 216 | out.extend(handle_code_block(flags, lines, location, test_cases)); |
| 217 | push_line = false; |
| 218 | ChapterParseState::Start |
| 219 | } |
| 220 | _ => panic!("Found something unexpected in one of our code blocks"), |
| 221 | }, |
| 222 | }; |
| 223 | if push_line { |
| 224 | out.push(line.to_string()); |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | out.join("\n") |
| 229 | } |
| 230 | |
| 231 | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
| 232 | struct TestId { |
| 233 | location: MiniSpan, |
| 234 | test_id: usize, |
| 235 | } |
| 236 | |
| 237 | impl Display for TestId { |
| 238 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 239 | write!(f, "(ID {}): {}", self.test_id, self.location) |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | /// Like `proc_macro2::Span` but only has the starting line. For basic |
| 244 | /// diagnostics. |
| 245 | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
| 246 | struct MiniSpan { |
| 247 | filename: String, |
| 248 | start_line: usize, |
| 249 | } |
| 250 | |
| 251 | impl Display for MiniSpan { |
| 252 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 253 | write!(f, "{} line {}", self.filename, self.start_line) |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | struct TestCase { |
| 258 | cpp: String, |
| 259 | hdr: String, |
| 260 | rs: TokenStream, |
| 261 | location: MiniSpan, |
| 262 | } |
| 263 | |
| 264 | unsafe impl Send for TestCase {} |
| 265 | |
| 266 | enum ChapterParseState { |
| 267 | Start, |
| 268 | OtherCodeBlock, |
| 269 | OurCodeBlock(HashSet<String>, Vec<String>), // have found rust,autocxx |
| 270 | } |
| 271 | |
| 272 | enum LineType { |
| 273 | CodeBlockStart, |
| 274 | CodeBlockStartAutocxx(HashSet<String>), |
| 275 | CodeBlockEnd, |
| 276 | Misc, |
| 277 | } |
| 278 | |
| 279 | fn code_block_flags(line: &str) -> HashSet<String> { |
| 280 | let line = &line[3..]; |
| 281 | line.split(',').map(|s| s.to_string()).collect() |
| 282 | } |
| 283 | |
| 284 | fn recognize_line(line: &str) -> LineType { |
| 285 | if line.starts_with("```") && line.len() > 3 { |
| 286 | let flags = code_block_flags(line); |
| 287 | if flags.contains("autocxx") { |
| 288 | LineType::CodeBlockStartAutocxx(flags) |
| 289 | } else { |
| 290 | LineType::CodeBlockStart |
| 291 | } |
| 292 | } else if line == "```" { |
| 293 | LineType::CodeBlockEnd |
| 294 | } else { |
| 295 | LineType::Misc |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | fn handle_code_block( |
| 300 | flags: HashSet<String>, |
| 301 | lines: Vec<String>, |
| 302 | location: MiniSpan, |
| 303 | test_cases: &mut Vec<TestCase>, |
| 304 | ) -> impl Iterator<Item = String> { |
| 305 | let input_str = lines.join("\n"); |
| 306 | let fn_call = syn::parse_str::<syn::Expr>(&input_str) |
| 307 | .unwrap_or_else(|_| panic!("Unable to parse outer function at {}", location)); |
| 308 | let fn_call = match fn_call { |
| 309 | Expr::Call(expr) => expr, |
| 310 | _ => panic!("Parsing unexpected"), |
| 311 | }; |
| 312 | let mut args_iter = fn_call.args.iter(); |
| 313 | let cpp = unescape_quotes(&extract_span(&lines, args_iter.next().unwrap().span())); |
| 314 | let hdr = unescape_quotes(&extract_span(&lines, args_iter.next().unwrap().span())); |
| 315 | let rs = extract_span(&lines, args_iter.next().unwrap().span()); |
| 316 | let mut output = vec![ |
| 317 | "#### C++ header:".to_string(), |
| 318 | "```cpp".to_string(), |
| 319 | hdr.to_string(), |
| 320 | "```".to_string(), |
| 321 | ]; |
| 322 | if !cpp.is_empty() && !flags.contains("hidecpp") { |
| 323 | output.push("#### C++ implementation:".to_string()); |
| 324 | output.push("```cpp".to_string()); |
| 325 | output.push(cpp.to_string()); |
| 326 | output.push("```".to_string()); |
| 327 | } |
| 328 | output.push("#### Rust:".to_string()); |
| 329 | output.push("```rust,noplayground".to_string()); |
| 330 | output.push(escape_hexathorpes(&rs).to_string()); |
| 331 | output.push("```".to_string()); |
| 332 | |
| 333 | // Don't run the test cases yet, because we want the preprocessor to spot |
| 334 | // basic formatting errors before getting into the time consuming business of |
| 335 | // running tests. |
| 336 | if !flags.contains("nocompile") { |
| 337 | test_cases.push(TestCase { |
| 338 | cpp, |
| 339 | hdr, |
| 340 | rs: syn::parse_file(&rs) |
| 341 | .unwrap_or_else(|_| panic!("Unable to parse code at {}", location)) |
| 342 | .to_token_stream(), |
| 343 | location, |
| 344 | }); |
| 345 | } |
| 346 | |
| 347 | output.into_iter() |
| 348 | } |
| 349 | |
| 350 | fn extract_span(text: &[String], span: Span) -> Cow<str> { |
| 351 | let start_line = span.start().line - 1; |
| 352 | let start_col = span.start().column; |
| 353 | let end_line = span.end().line - 1; |
| 354 | let end_col = span.end().column; |
| 355 | if start_line == end_line { |
| 356 | Cow::Borrowed(&text[start_line][start_col + 1..end_col - 1]) |
| 357 | } else { |
| 358 | let start_subset = &text[start_line][start_col + 1..]; |
| 359 | let end_subset = &text[end_line][..end_col - 1]; |
| 360 | let mid_lines = &text[start_line + 1..end_line]; |
| 361 | Cow::Owned( |
| 362 | std::iter::once(start_subset.to_string()) |
| 363 | .chain(mid_lines.iter().cloned()) |
| 364 | .chain(std::iter::once(end_subset.to_string())) |
| 365 | .join("\n"), |
| 366 | ) |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | fn escape_hexathorpes(input: &str) -> Cow<str> { |
| 371 | let re = regex::Regex::new(r"(?m)^(?P<ws>\s*)#(?P<c>.*)").unwrap(); |
| 372 | re.replace_all(input, "$ws##$c") |
| 373 | } |
| 374 | |
| 375 | fn unescape_quotes(input: &str) -> String { |
| 376 | input.replace("\\\"", "\"") |
| 377 | } |