Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame] | 1 | // Copyright 2022 Google LLC |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | use std::{ |
| 10 | borrow::Cow, |
| 11 | collections::HashSet, |
Austin Schuh | 6ea9bfa | 2023-08-06 19:05:10 -0700 | [diff] [blame^] | 12 | ffi::OsString, |
Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame] | 13 | fmt::Display, |
| 14 | io::{self, Read}, |
| 15 | path::PathBuf, |
| 16 | process, |
| 17 | }; |
| 18 | |
| 19 | use anyhow::Error; |
| 20 | use clap::{crate_authors, crate_version, Arg, ArgMatches, Command}; |
| 21 | use itertools::Itertools; |
| 22 | use mdbook::{book::Book, preprocess::CmdPreprocessor}; |
| 23 | use proc_macro2::{Span, TokenStream}; |
| 24 | use rayon::prelude::*; |
| 25 | use syn::{Expr, __private::ToTokens, spanned::Spanned}; |
| 26 | |
| 27 | static LONG_ABOUT: &str = |
| 28 | "This is an mdbook preprocessor tailored for autocxx code examples. Autocxx |
| 29 | code examples don't fit well 'mdbook test' or even alternatives such as |
| 30 | 'skeptic' or 'doc_comment' for these reasons: |
| 31 | |
| 32 | a) A single code example consists of both Rust and C++ code. They must be |
| 33 | linked into a separate executable, i.e. we must make one executable per |
| 34 | doc test. |
| 35 | b) The code examples must be presented/formatted nicely with suitable |
| 36 | separate blocks for the Rust and C++ code. |
| 37 | c) mdbook test is not good at handling doctests which have dependencies. |
| 38 | |
| 39 | This preprocessor will find code snippets like this: |
| 40 | ```rust,autocxx |
| 41 | autocxx_integration_tests::doctest( |
| 42 | \" /* any C++ implementation code */\", |
| 43 | \" /* C++ header code */\", |
| 44 | { |
| 45 | /* complete Rust code including 'main' */ |
| 46 | ) |
| 47 | ``` |
| 48 | |
| 49 | and will build and run them, while emitting better formatted markdown blocks |
| 50 | for subsequent preprocessors and renderers. |
| 51 | "; |
| 52 | |
| 53 | static RUST_MDBOOK_SINGLE_TEST: &str = "RUST_MDBOOK_SINGLE_TEST"; |
| 54 | |
| 55 | fn main() { |
| 56 | let matches = Command::new("autocxx-mdbook-preprocessor") |
| 57 | .version(crate_version!()) |
| 58 | .author(crate_authors!()) |
| 59 | .about("Expands and tests code examples in the autocxx book.") |
| 60 | .long_about(LONG_ABOUT) |
| 61 | .subcommand( |
| 62 | Command::new("supports") |
| 63 | .arg(Arg::new("renderer").required(true)) |
| 64 | .about("Whether a given renderer is supported by this preprocessor"), |
| 65 | ) |
| 66 | .arg( |
| 67 | Arg::new("skip_tests") |
| 68 | .short('s') |
| 69 | .help("Skip running doctests"), |
| 70 | ) |
| 71 | .arg( |
| 72 | Arg::new("manifest_dir") |
| 73 | .long("manifest-dir") |
| 74 | .help("Path to directory containing outermost autocxx Cargo.toml; necessary for trybuild to build test code successfully") |
| 75 | .default_value_os(calculate_cargo_dir().as_os_str()) |
| 76 | ) |
| 77 | .get_matches(); |
| 78 | if let Some(supports_matches) = matches.subcommand_matches("supports") { |
| 79 | // Only do our preprocessing and testing for the html renderer, not linkcheck. |
| 80 | if supports_matches.value_of("renderer") == Some("html") { |
| 81 | process::exit(0); |
| 82 | } else { |
| 83 | process::exit(1); |
| 84 | } |
| 85 | } |
| 86 | preprocess(&matches).unwrap(); |
| 87 | } |
| 88 | |
| 89 | fn calculate_cargo_dir() -> PathBuf { |
| 90 | let mut path = std::env::current_exe().unwrap(); |
| 91 | for _ in 0..3 { |
| 92 | path = path.parent().map(|p| p.to_path_buf()).unwrap_or(path); |
| 93 | } |
| 94 | path.join("integration-tests") |
| 95 | } |
| 96 | |
| 97 | fn preprocess(args: &ArgMatches) -> Result<(), Error> { |
| 98 | let (_, mut book) = CmdPreprocessor::parse_input(io::stdin())?; |
| 99 | |
| 100 | env_logger::builder().init(); |
| 101 | let mut test_cases = Vec::new(); |
| 102 | |
| 103 | Book::for_each_mut(&mut book, |sec| { |
| 104 | if let mdbook::BookItem::Chapter(chapter) = sec { |
| 105 | let filename = chapter |
| 106 | .path |
| 107 | .as_ref() |
| 108 | .map(|pb| pb.to_string_lossy()) |
| 109 | .unwrap_or_default() |
| 110 | .to_string(); |
| 111 | chapter.content = substitute_chapter(&chapter.content, &filename, &mut test_cases); |
| 112 | } |
| 113 | }); |
| 114 | |
| 115 | // Now run any test cases we accumulated. |
| 116 | if !args.is_present("skip_tests") { |
| 117 | let stdout_gag = gag::BufferRedirect::stdout().unwrap(); |
| 118 | let num_tests = test_cases.len(); |
| 119 | let fails: Vec<_> = test_cases |
| 120 | .into_par_iter() |
| 121 | .enumerate() |
| 122 | .filter_map(|(counter, case)| { |
| 123 | if let Ok(test) = std::env::var(RUST_MDBOOK_SINGLE_TEST) { |
| 124 | let desired_id: usize = test.parse().unwrap(); |
| 125 | if desired_id != (counter + 1) { |
| 126 | return None; |
| 127 | } |
| 128 | } |
| 129 | eprintln!( |
| 130 | "Running doctest {}/{} at {}", |
| 131 | counter + 1, |
| 132 | num_tests, |
| 133 | &case.location |
| 134 | ); |
| 135 | let err = autocxx_integration_tests::doctest( |
| 136 | &case.cpp, |
| 137 | &case.hdr, |
| 138 | case.rs, |
Austin Schuh | 6ea9bfa | 2023-08-06 19:05:10 -0700 | [diff] [blame^] | 139 | &OsString::from(args.value_of("manifest_dir").unwrap()), |
Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame] | 140 | ); |
| 141 | let desc = match err { |
| 142 | Ok(_) => "passed".to_string(), |
Austin Schuh | 6ea9bfa | 2023-08-06 19:05:10 -0700 | [diff] [blame^] | 143 | Err(ref err) => format!("failed: {err:?}"), |
Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame] | 144 | }; |
| 145 | eprintln!( |
| 146 | "Doctest {}/{} at {} {}.", |
| 147 | counter + 1, |
| 148 | num_tests, |
| 149 | &case.location, |
| 150 | desc |
| 151 | ); |
| 152 | if err.is_err() { |
| 153 | Some(TestId { |
| 154 | location: case.location, |
| 155 | test_id: counter + 1, |
| 156 | }) |
| 157 | } else { |
| 158 | None |
| 159 | } |
| 160 | }) |
| 161 | .collect(); |
| 162 | let mut stdout_str = String::new(); |
| 163 | stdout_gag |
| 164 | .into_inner() |
| 165 | .read_to_string(&mut stdout_str) |
| 166 | .unwrap(); |
| 167 | if !stdout_str.is_empty() { |
Austin Schuh | 6ea9bfa | 2023-08-06 19:05:10 -0700 | [diff] [blame^] | 168 | eprintln!("Stdout from tests:\n{stdout_str}"); |
Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame] | 169 | } |
| 170 | if !fails.is_empty() { |
| 171 | panic!( |
| 172 | "One or more tests failed: {}. To rerun an individual test use {}.", |
| 173 | fails.into_iter().sorted().map(|s| s.to_string()).join(", "), |
| 174 | RUST_MDBOOK_SINGLE_TEST |
| 175 | ); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | serde_json::to_writer(io::stdout(), &book)?; |
| 180 | |
| 181 | Ok(()) |
| 182 | } |
| 183 | |
| 184 | fn substitute_chapter(chapter: &str, filename: &str, test_cases: &mut Vec<TestCase>) -> String { |
| 185 | let mut state = ChapterParseState::Start; |
| 186 | let mut out = Vec::new(); |
| 187 | for (line_no, line) in chapter.lines().enumerate() { |
| 188 | let line_type = recognize_line(line); |
| 189 | let mut push_line = true; |
| 190 | state = match state { |
| 191 | ChapterParseState::Start => match line_type { |
| 192 | LineType::CodeBlockStart | LineType::CodeBlockEnd => { |
| 193 | ChapterParseState::OtherCodeBlock |
| 194 | } |
| 195 | LineType::CodeBlockStartAutocxx(block_flags) => { |
| 196 | push_line = false; |
| 197 | ChapterParseState::OurCodeBlock(block_flags, Vec::new()) |
| 198 | } |
| 199 | LineType::Misc => ChapterParseState::Start, |
| 200 | }, |
| 201 | ChapterParseState::OtherCodeBlock => match line_type { |
| 202 | LineType::CodeBlockEnd => ChapterParseState::Start, |
| 203 | LineType::Misc => ChapterParseState::OtherCodeBlock, |
| 204 | _ => panic!("Found confusing conflicting block markers"), |
| 205 | }, |
| 206 | ChapterParseState::OurCodeBlock(flags, mut lines) => match line_type { |
| 207 | LineType::Misc => { |
| 208 | push_line = false; |
| 209 | lines.push(line.to_string()); |
| 210 | ChapterParseState::OurCodeBlock(flags, lines) |
| 211 | } |
| 212 | LineType::CodeBlockEnd => { |
| 213 | let location = MiniSpan { |
| 214 | filename: filename.to_string(), |
| 215 | start_line: line_no - lines.len(), |
| 216 | }; |
| 217 | out.extend(handle_code_block(flags, lines, location, test_cases)); |
| 218 | push_line = false; |
| 219 | ChapterParseState::Start |
| 220 | } |
| 221 | _ => panic!("Found something unexpected in one of our code blocks"), |
| 222 | }, |
| 223 | }; |
| 224 | if push_line { |
| 225 | out.push(line.to_string()); |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | out.join("\n") |
| 230 | } |
| 231 | |
| 232 | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
| 233 | struct TestId { |
| 234 | location: MiniSpan, |
| 235 | test_id: usize, |
| 236 | } |
| 237 | |
| 238 | impl Display for TestId { |
| 239 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 240 | write!(f, "(ID {}): {}", self.test_id, self.location) |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | /// Like `proc_macro2::Span` but only has the starting line. For basic |
| 245 | /// diagnostics. |
| 246 | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
| 247 | struct MiniSpan { |
| 248 | filename: String, |
| 249 | start_line: usize, |
| 250 | } |
| 251 | |
| 252 | impl Display for MiniSpan { |
| 253 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 254 | write!(f, "{} line {}", self.filename, self.start_line) |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | struct TestCase { |
| 259 | cpp: String, |
| 260 | hdr: String, |
| 261 | rs: TokenStream, |
| 262 | location: MiniSpan, |
| 263 | } |
| 264 | |
| 265 | unsafe impl Send for TestCase {} |
| 266 | |
| 267 | enum ChapterParseState { |
| 268 | Start, |
| 269 | OtherCodeBlock, |
| 270 | OurCodeBlock(HashSet<String>, Vec<String>), // have found rust,autocxx |
| 271 | } |
| 272 | |
| 273 | enum LineType { |
| 274 | CodeBlockStart, |
| 275 | CodeBlockStartAutocxx(HashSet<String>), |
| 276 | CodeBlockEnd, |
| 277 | Misc, |
| 278 | } |
| 279 | |
| 280 | fn code_block_flags(line: &str) -> HashSet<String> { |
| 281 | let line = &line[3..]; |
| 282 | line.split(',').map(|s| s.to_string()).collect() |
| 283 | } |
| 284 | |
| 285 | fn recognize_line(line: &str) -> LineType { |
| 286 | if line.starts_with("```") && line.len() > 3 { |
| 287 | let flags = code_block_flags(line); |
| 288 | if flags.contains("autocxx") { |
| 289 | LineType::CodeBlockStartAutocxx(flags) |
| 290 | } else { |
| 291 | LineType::CodeBlockStart |
| 292 | } |
| 293 | } else if line == "```" { |
| 294 | LineType::CodeBlockEnd |
| 295 | } else { |
| 296 | LineType::Misc |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | fn handle_code_block( |
| 301 | flags: HashSet<String>, |
| 302 | lines: Vec<String>, |
| 303 | location: MiniSpan, |
| 304 | test_cases: &mut Vec<TestCase>, |
| 305 | ) -> impl Iterator<Item = String> { |
| 306 | let input_str = lines.join("\n"); |
| 307 | let fn_call = syn::parse_str::<syn::Expr>(&input_str) |
Austin Schuh | 6ea9bfa | 2023-08-06 19:05:10 -0700 | [diff] [blame^] | 308 | .unwrap_or_else(|_| panic!("Unable to parse outer function at {location}")); |
Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame] | 309 | let fn_call = match fn_call { |
| 310 | Expr::Call(expr) => expr, |
| 311 | _ => panic!("Parsing unexpected"), |
| 312 | }; |
| 313 | let mut args_iter = fn_call.args.iter(); |
| 314 | let cpp = unescape_quotes(&extract_span(&lines, args_iter.next().unwrap().span())); |
| 315 | let hdr = unescape_quotes(&extract_span(&lines, args_iter.next().unwrap().span())); |
| 316 | let rs = extract_span(&lines, args_iter.next().unwrap().span()); |
| 317 | let mut output = vec![ |
| 318 | "#### C++ header:".to_string(), |
| 319 | "```cpp".to_string(), |
| 320 | hdr.to_string(), |
| 321 | "```".to_string(), |
| 322 | ]; |
| 323 | if !cpp.is_empty() && !flags.contains("hidecpp") { |
| 324 | output.push("#### C++ implementation:".to_string()); |
| 325 | output.push("```cpp".to_string()); |
| 326 | output.push(cpp.to_string()); |
| 327 | output.push("```".to_string()); |
| 328 | } |
| 329 | output.push("#### Rust:".to_string()); |
| 330 | output.push("```rust,noplayground".to_string()); |
| 331 | output.push(escape_hexathorpes(&rs).to_string()); |
| 332 | output.push("```".to_string()); |
| 333 | |
| 334 | // Don't run the test cases yet, because we want the preprocessor to spot |
| 335 | // basic formatting errors before getting into the time consuming business of |
| 336 | // running tests. |
| 337 | if !flags.contains("nocompile") { |
| 338 | test_cases.push(TestCase { |
| 339 | cpp, |
| 340 | hdr, |
| 341 | rs: syn::parse_file(&rs) |
Austin Schuh | 6ea9bfa | 2023-08-06 19:05:10 -0700 | [diff] [blame^] | 342 | .unwrap_or_else(|_| panic!("Unable to parse code at {location}")) |
Brian Silverman | 4e662aa | 2022-05-11 23:10:19 -0700 | [diff] [blame] | 343 | .to_token_stream(), |
| 344 | location, |
| 345 | }); |
| 346 | } |
| 347 | |
| 348 | output.into_iter() |
| 349 | } |
| 350 | |
| 351 | fn extract_span(text: &[String], span: Span) -> Cow<str> { |
| 352 | let start_line = span.start().line - 1; |
| 353 | let start_col = span.start().column; |
| 354 | let end_line = span.end().line - 1; |
| 355 | let end_col = span.end().column; |
| 356 | if start_line == end_line { |
| 357 | Cow::Borrowed(&text[start_line][start_col + 1..end_col - 1]) |
| 358 | } else { |
| 359 | let start_subset = &text[start_line][start_col + 1..]; |
| 360 | let end_subset = &text[end_line][..end_col - 1]; |
| 361 | let mid_lines = &text[start_line + 1..end_line]; |
| 362 | Cow::Owned( |
| 363 | std::iter::once(start_subset.to_string()) |
| 364 | .chain(mid_lines.iter().cloned()) |
| 365 | .chain(std::iter::once(end_subset.to_string())) |
| 366 | .join("\n"), |
| 367 | ) |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | fn escape_hexathorpes(input: &str) -> Cow<str> { |
| 372 | let re = regex::Regex::new(r"(?m)^(?P<ws>\s*)#(?P<c>.*)").unwrap(); |
| 373 | re.replace_all(input, "$ws##$c") |
| 374 | } |
| 375 | |
| 376 | fn unescape_quotes(input: &str) -> String { |
| 377 | input.replace("\\\"", "\"") |
| 378 | } |