From bb674f37705312438ce1dce2bb191779592464b2 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 2 Dec 2025 13:03:41 +0530 Subject: [PATCH 1/5] feat: parallelize C and Rust building step --- crates/intrinsic-test/src/arm/mod.rs | 3 +++ crates/intrinsic-test/src/main.rs | 19 ++++++++++++++----- crates/intrinsic-test/src/x86/mod.rs | 3 +++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 99c8da854c..9135308140 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -18,6 +18,9 @@ pub struct ArmArchitectureTest { cli_options: ProcessedCli, } +unsafe impl Send for ArmArchitectureTest {} +unsafe impl Sync for ArmArchitectureTest {} + impl SupportedArchitectureTest for ArmArchitectureTest { type IntrinsicImpl = ArmIntrinsicType; diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index e5c846877c..35462aa33f 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -25,13 +25,22 @@ fn main() { } } -fn run(test_environment: impl SupportedArchitectureTest) { - info!("building C binaries"); - if !test_environment.build_c_file() { +fn run(test_environment: impl SupportedArchitectureTest + Sync) { + let (c_output, rust_output) = rayon::join( + || { + info!("building C binaries"); + test_environment.build_c_file() + }, + || { + info!("building Rust binaries"); + test_environment.build_rust_file() + }, + ); + + if !c_output { std::process::exit(2); } - info!("building Rust binaries"); - if !test_environment.build_rust_file() { + if !rust_output { std::process::exit(3); } info!("Running binaries"); diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index f2baf07071..227d61f415 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -18,6 +18,9 @@ pub struct X86ArchitectureTest { cli_options: ProcessedCli, } +unsafe impl Send for X86ArchitectureTest {} +unsafe impl Sync for X86ArchitectureTest {} + impl SupportedArchitectureTest for X86ArchitectureTest { type IntrinsicImpl = X86IntrinsicType; From 04db3427773f049419aac3749fef2fe52420a8b0 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 2 Dec 2025 16:37:56 +0530 Subject: [PATCH 2/5] feat: parallelizing the execution of C++ and Rust test-files --- crates/intrinsic-test/src/common/compare.rs | 85 ++++++++----- crates/intrinsic-test/src/common/gen_rust.rs | 7 ++ crates/intrinsic-test/src/common/mod.rs | 126 ++++++++++++------- 3 files changed, 141 insertions(+), 77 deletions(-) diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs index c4c2a1e3e4..e85f80e921 100644 --- a/crates/intrinsic-test/src/common/compare.rs +++ b/crates/intrinsic-test/src/common/compare.rs @@ -12,50 +12,71 @@ fn runner_command(runner: &str) -> Command { } pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: &str) -> bool { - let (c, rust) = rayon::join( - || { + let available_parallelism = std::thread::available_parallelism().unwrap().get(); + let c_outputs = (0..available_parallelism) + .into_par_iter() + .map(|i| { runner_command(runner) - .arg("./intrinsic-test-programs") + .arg(format!("./intrinsic-test-programs_{i}")) .current_dir("c_programs") .output() - }, - || { + }) + .collect::>(); + + let rust_outputs = (0..available_parallelism) + .into_par_iter() + .map(|i| { runner_command(runner) - .arg(format!("./target/{target}/release/intrinsic-test-programs")) + .arg(format!( + "./target/{target}/release/intrinsic-test-programs-{i}" + )) .current_dir("rust_programs") .output() - }, - ); - let (c, rust) = match (c, rust) { - (Ok(c), Ok(rust)) => (c, rust), + }) + .collect::>(); + + let c_error = c_outputs.iter().filter(|elem| elem.is_err()).next(); + let rust_error = rust_outputs.iter().filter(|elem| elem.is_err()).next(); + match (c_error, rust_error) { + (None, None) => (), failure => panic!("Failed to run: {failure:#?}"), }; - if !c.status.success() { - error!( - "Failed to run C program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&c.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), - ); - } + let c_stdout = c_outputs + .into_iter() + .map(|c_elem| { + let c = c_elem.unwrap(); + let c_stdout = std::str::from_utf8(&c.stdout).unwrap_or("").to_string(); + if !c.status.success() { + error!( + "Failed to run C program.\nstdout: {c_stdout}\nstderr: {stderr}", + stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), + ); + } + c_stdout + }) + .collect_vec() + .join("\n"); - if !rust.status.success() { - error!( - "Failed to run Rust program.\nstdout: {stdout}\nstderr: {stderr}", - stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""), - stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), - ); - } + let rust_stdout = rust_outputs + .into_iter() + .map(|rust_elem| { + let rust = rust_elem.unwrap(); + let rust_stdout = std::str::from_utf8(&rust.stdout).unwrap_or("").to_string(); + if !rust.status.success() { + error!( + "Failed to run Rust program.\nstdout: {rust_stdout}\nstderr: {stderr}", + stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), + ); + } + rust_stdout + }) + .collect_vec() + .join("\n"); info!("Completed running C++ and Rust test binaries"); - let c = std::str::from_utf8(&c.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); - let rust = std::str::from_utf8(&rust.stdout) - .unwrap() - .to_lowercase() - .replace("-nan", "nan"); + let c = c_stdout.to_lowercase().replace("-nan", "nan"); + let rust = rust_stdout.to_lowercase().replace("-nan", "nan"); let c_output_map = c .split(INTRINSIC_DELIMITER) diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index fb047e2612..acc2a7a718 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -39,6 +39,7 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io:: pub fn write_bin_cargo_toml( w: &mut impl std::io::Write, module_count: usize, + binary_count: usize, ) -> std::io::Result<()> { write_cargo_toml_header(w, "intrinsic-test-programs")?; @@ -49,6 +50,12 @@ pub fn write_bin_cargo_toml( writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?; } + for i in 0..binary_count { + writeln!(w, "[[bin]]")?; + writeln!(w, "name = \"intrinsic-test-programs_{i}\"")?; + writeln!(w, "path = \"src/main_{i}.rs\"")?; + } + Ok(()) } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 8b6bd943a7..41d3413dc9 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -29,7 +29,10 @@ pub mod values; /// Architectures must support this trait /// to be successfully tested. -pub trait SupportedArchitectureTest { +pub trait SupportedArchitectureTest +where + Self: Sync + Send, +{ type IntrinsicImpl: IntrinsicTypeDefinition + Sync; fn cli_options(&self) -> &ProcessedCli; @@ -95,55 +98,88 @@ pub trait SupportedArchitectureTest { .collect::>() .unwrap(); - let mut file = File::create("c_programs/main.cpp").unwrap(); - write_main_cpp( - &mut file, - Self::PLATFORM_C_DEFINITIONS, - Self::PLATFORM_C_HEADERS, - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); - - // This is done because `cpp_compiler_wrapped` is None when - // the --generate-only flag is passed + let (auto_chunk_size, auto_chunk_count) = auto_chunk(self.intrinsics().len()); + + self.intrinsics() + .par_chunks(auto_chunk_size) + .enumerate() + .map(|(i, chunk)| { + let mut file = File::create(format!("c_programs/main_{i}.cpp")).unwrap(); + write_main_cpp( + &mut file, + Self::PLATFORM_C_DEFINITIONS, + Self::PLATFORM_C_HEADERS, + chunk.iter().map(|i| i.name.as_str()), + ) + }) + .collect::>() + .unwrap(); + if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() { - // compile this cpp file into a .o file - trace!("compiling main.cpp"); - let output = cpp_compiler - .compile_object_file("main.cpp", "intrinsic-test-programs.o") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - - let object_files = (0..chunk_count) - .map(|i| format!("mod_{i}.o")) - .chain(["intrinsic-test-programs.o".to_owned()]); - - let output = cpp_compiler - .link_executable(object_files, "intrinsic-test-programs") - .unwrap(); - assert!(output.status.success(), "{output:?}"); - } + (0..auto_chunk_count) + .into_par_iter() + .map(|index| { + // This is done because `cpp_compiler_wrapped` is None when + // the --generate-only flag is passed + // compile this cpp file into a .o file + trace!("compiling main_{index}.cpp"); + let output = cpp_compiler.compile_object_file( + format!("main_{index}.cpp").as_str(), + format!("main_{index}.o").as_str(), + ); + + if output.is_err() { + return output; + }; + + let object_files = (0..chunk_count) + .map(|i| format!("mod_{i}.o")) + .chain([format!("main_{index}.o").to_owned()]); - true + let output = cpp_compiler.link_executable( + object_files, + format!("intrinsic-test-programs-{index}").as_str(), + ); + trace!("finished compiling main_{index}.cpp"); + + return output; + }) + .inspect(|output| { + assert!(output.is_ok(), "{output:?}"); + if let Ok(out) = &output { + assert!(out.status.success(), "{output:?}") + } + }) + .all(|output| output.is_ok()) + } else { + true + } } fn build_rust_file(&self) -> bool { std::fs::create_dir_all("rust_programs/src").unwrap(); let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + let (auto_chunk_size, auto_chunk_count) = auto_chunk(self.intrinsics().len()); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); - write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); - - let mut main_rs = File::create("rust_programs/src/main.rs").unwrap(); - write_main_rs( - &mut main_rs, - chunk_count, - Self::PLATFORM_RUST_CFGS, - "", - self.intrinsics().iter().map(|i| i.name.as_str()), - ) - .unwrap(); + write_bin_cargo_toml(&mut cargo, chunk_count, auto_chunk_count).unwrap(); + + self.intrinsics() + .par_chunks(auto_chunk_size) + .enumerate() + .map(|(i, chunk)| { + let mut main_rs = File::create(format!("rust_programs/src/main_{i}.rs")).unwrap(); + write_main_rs( + &mut main_rs, + chunk_count, + Self::PLATFORM_RUST_CFGS, + "", + chunk.iter().map(|i| i.name.as_str()), + ) + }) + .collect::>() + .unwrap(); let target = &self.cli_options().target; let toolchain = self.cli_options().toolchain.as_deref(); @@ -200,12 +236,12 @@ pub trait SupportedArchitectureTest { } } -// pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) { -// let available_parallelism = std::thread::available_parallelism().unwrap().get(); -// let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count)); +pub fn auto_chunk(intrinsic_count: usize) -> (usize, usize) { + let available_parallelism = std::thread::available_parallelism().unwrap().get(); + let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count)); -// (chunk_size, intrinsic_count.div_ceil(chunk_size)) -// } + (chunk_size, intrinsic_count.div_ceil(chunk_size)) +} pub fn manual_chunk(intrinsic_count: usize, chunk_size: usize) -> (usize, usize) { (chunk_size, intrinsic_count.div_ceil(chunk_size)) From 84915e3f27b55d8d5d634ef7e1ec6a8d659661f9 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 2 Dec 2025 20:47:31 +0530 Subject: [PATCH 3/5] feat: updating the number of x86 intrinsics tested to 50% --- ci/intrinsic-test.sh | 8 ++++---- crates/intrinsic-test/src/common/compare.rs | 2 +- crates/intrinsic-test/src/common/gen_rust.rs | 4 ++-- crates/intrinsic-test/src/common/mod.rs | 2 ++ 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index ff76a0c769..f6545674b3 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -75,7 +75,7 @@ case ${TARGET} in TEST_CXX_COMPILER="clang++" TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt - : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=20}" + : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=50}" ;; *) ;; @@ -85,7 +85,7 @@ esac # Arm specific case "${TARGET}" in aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \ + CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=trace \ cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ --runner "${TEST_RUNNER}" \ @@ -96,7 +96,7 @@ case "${TARGET}" in ;; aarch64_be-unknown-linux-gnu*) - CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \ + CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=trace \ cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ --runner "${TEST_RUNNER}" \ @@ -114,7 +114,7 @@ case "${TARGET}" in # Hence the use of `env -u`. env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \ - RUST_LOG=info RUST_BACKTRACE=1 \ + RUST_LOG=trace RUST_BACKTRACE=1 \ cargo run "${INTRINSIC_TEST}" "${PROFILE}" \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ --runner "${TEST_RUNNER}" \ diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs index e85f80e921..83e4011007 100644 --- a/crates/intrinsic-test/src/common/compare.rs +++ b/crates/intrinsic-test/src/common/compare.rs @@ -17,7 +17,7 @@ pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: .into_par_iter() .map(|i| { runner_command(runner) - .arg(format!("./intrinsic-test-programs_{i}")) + .arg(format!("./intrinsic-test-programs-{i}")) .current_dir("c_programs") .output() }) diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index acc2a7a718..fa816f78bf 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -51,8 +51,8 @@ pub fn write_bin_cargo_toml( } for i in 0..binary_count { - writeln!(w, "[[bin]]")?; - writeln!(w, "name = \"intrinsic-test-programs_{i}\"")?; + writeln!(w, "\n[[bin]]")?; + writeln!(w, "name = \"intrinsic-test-programs-{i}\"")?; writeln!(w, "path = \"src/main_{i}.rs\"")?; } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 41d3413dc9..e5922eaa40 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -145,8 +145,10 @@ where return output; }) .inspect(|output| { + trace!("{output:?}"); assert!(output.is_ok(), "{output:?}"); if let Ok(out) = &output { + trace!("{:?}", out.status.success()); assert!(out.status.success(), "{output:?}") } }) From c6b3f4479b97a5ae12fe74bbe609efbe67d97622 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 2 Dec 2025 23:08:17 +0530 Subject: [PATCH 4/5] feat: reduce the optimization for X86 C++ testfiles and increase the number of mod_n.cpp modules. --- crates/intrinsic-test/src/common/mod.rs | 2 +- crates/intrinsic-test/src/x86/compile.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index e5922eaa40..7ff475e63c 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -52,7 +52,7 @@ where fn cpp_compilation(&self) -> Option; fn build_c_file(&self) -> bool { - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400); + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 270); let cpp_compiler_wrapped = self.cpp_compilation(); diff --git a/crates/intrinsic-test/src/x86/compile.rs b/crates/intrinsic-test/src/x86/compile.rs index 65cd291b1b..c9ac6ddc03 100644 --- a/crates/intrinsic-test/src/x86/compile.rs +++ b/crates/intrinsic-test/src/x86/compile.rs @@ -9,7 +9,7 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { .add_arch_flags(["icelake-client"]) .set_compiler(cpp_compiler) .set_target(&config.target) - .set_opt_level("2") + .set_opt_level("1") .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) .set_project_root("c_programs") .add_extra_flags(vec![ From fb2d7f824cac7b7507fb3404e3073d27dc281bf1 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Tue, 2 Dec 2025 23:22:40 +0530 Subject: [PATCH 5/5] feat: further reduce the chunking for mod files --- crates/intrinsic-test/src/common/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 7ff475e63c..9cf4d5cbbd 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -52,7 +52,7 @@ where fn cpp_compilation(&self) -> Option; fn build_c_file(&self) -> bool { - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 270); + let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 50); let cpp_compiler_wrapped = self.cpp_compilation();