From e6d779f437054e5abb7f21b9d039a10177cb63f6 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 9 Jul 2024 08:10:29 +0200 Subject: [PATCH 01/47] arcv: add scheduling information for the Synopsys RMX-100 CPU This commit introduces a new -mtune=rmx100 tuning option together with relevant scheduler definitions. Instruction latencies and costs are based on the "RMX-100 Technical Reference Manual" document (revision 0.4, 13 September 2023) and are subject to change. The changes have been verified by running the Dhrystone and Coremark benchmarks and observing expected (small) improvements compared to the -mtune=generic results. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rmx100.md | 110 +++++++++++++++++++++++++++++++ gcc/config/riscv/riscv-cores.def | 1 + gcc/config/riscv/riscv-opts.h | 1 + gcc/config/riscv/riscv.cc | 24 +++++++ gcc/config/riscv/riscv.md | 3 +- gcc/doc/riscv-mtune.texi | 2 + 6 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 gcc/config/riscv/arcv-rmx100.md diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md new file mode 100644 index 000000000000..9194f510f9f8 --- /dev/null +++ b/gcc/config/riscv/arcv-rmx100.md @@ -0,0 +1,110 @@ +;; DFA scheduling description of the Synopsys RMX-100 cpu +;; for GNU C compiler +;; Copyright (C) 2023 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "arcv_rmx100") + +(define_cpu_unit "arcv_rmx100_ALU" "arcv_rmx100") +;(define_cpu_unit "arcv_rmx100_CSR" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_FPU" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_MPY" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_DIV" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_DMP" "arcv_rmx100") + +;; Instruction reservation for arithmetic instructions. +(define_insn_reservation "arcv_rmx100_alu_arith" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "unknown, const, arith, shift, slt, multi, auipc, nop, + logical, move, atomic, mvpair, bitmanip, clz, ctz, cpop, + zicond, condmove, clmul, min, max, minu, maxu, rotate")) + "arcv_rmx100_ALU") + +(define_insn_reservation "arcv_rmx100_jmp_insn" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "branch, jump, call, jalr, ret, trap")) + "arcv_rmx100_ALU") + +; DIV insn: latency may be overridden by a define_bypass +(define_insn_reservation "arcv_rmx100_div_insn" 35 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "idiv")) + "arcv_rmx100_DIV*35") + +; MPY insn: latency may be overridden by a define_bypass +(define_insn_reservation "arcv_rmx100_mpy32_insn" 9 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "imul")) + "arcv_rmx100_MPY") + +(define_insn_reservation "arcv_rmx100_load_insn" 3 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "load,fpload")) + "arcv_rmx100_DMP,nothing*2") + +(define_insn_reservation "arcv_rmx100_store_insn" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "store,fpstore")) + "arcv_rmx100_DMP") + +(define_insn_reservation "arcv_rmx100_farith_insn" 2 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fadd,fmul,fmadd,fcmp")) + "arcv_rmx100_FPU*2") + +(define_insn_reservation "arcv_rmx100_fdiv_insn" 17 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fdiv,fsqrt")) + "arcv_rmx100_FPU*17") + +(define_insn_reservation "arcv_rmx100_xfer" 2 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f")) + "arcv_rmx100_FPU*2") + +;;(define_insn_reservation "core" 1 +;; (eq_attr "type" "block, brk, dmb, flag, lr, sr, sync") +;; "arcv_rmx100_ALU0 + arcv_rmx100_ALU1 + arcv_rmx100_DMP + arcv_rmx100_MPY + arcv_rmx100_MPY64 + arcv_rmx100_DIV") + +(define_insn_reservation "arcv_rmx100_fmul_half" 5 + (and (eq_attr "tune" "arcv_rmx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (eq_attr "mode" "HF"))) + "arcv_rmx100_FPU") + +(define_insn_reservation "arcv_rmx100_fmul_single" 5 + (and (eq_attr "tune" "arcv_rmx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (eq_attr "mode" "SF"))) + "arcv_rmx100_FPU") + +(define_insn_reservation "arcv_rmx100_fmul_double" 7 + (and (eq_attr "tune" "arcv_rmx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (eq_attr "mode" "DF"))) + "arcv_rmx100_FPU") + +(define_insn_reservation "arcv_rmx100_fdiv" 20 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fdiv")) + "arcv_rmx100_FPU*20") + +(define_insn_reservation "arcv_rmx100_fsqrt" 25 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fsqrt")) + "arcv_rmx100_FPU*25") diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index cc9d5c03cb8c..d1708f3785b6 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -50,6 +50,7 @@ RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info) RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info) +RISCV_TUNE("arc-v-rmx-100-series", arcv_rmx100, arcv_rmx100_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 4e4e9d8930e2..3feb211767cb 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -61,6 +61,7 @@ enum riscv_microarchitecture_type { generic_ooo, mips_p8700, tt_ascalon_d8, + arcv_rmx100, }; extern enum riscv_microarchitecture_type riscv_microarchitecture; diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 63404d3d5143..237400d188be 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -685,6 +685,30 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = { true, /* prefer-agnostic. */ }; +/* Costs to use when optimizing for Synopsys RMX-100. */ +static const struct riscv_tune_param arcv_rmx100_tune_info = { + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_mul */ + {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* int_div */ + 1, /* issue_rate */ + 4, /* branch_cost */ + 2, /* memory_cost */ + 4, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ +}; + /* Costs to use when optimizing for size. */ static const struct riscv_tune_param optimize_size_tune_info = { {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 640ca5f9b0ea..823f8dda8a30 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -672,7 +672,7 @@ ;; Microarchitectures we know how to tune for. ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8" + "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. @@ -4966,3 +4966,4 @@ (include "generic-vector-ooo.md") (include "generic-ooo.md") (include "tt-ascalon-d8.md") +(include "arcv-rmx100.md") diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi index a2a4d3e77dbb..63a01db67726 100644 --- a/gcc/doc/riscv-mtune.texi +++ b/gcc/doc/riscv-mtune.texi @@ -50,6 +50,8 @@ particular CPU name. Permissible values for this option are: @samp{xiangshan-kunminghu}, +@samp{arc-v-rmx-100-series}, + @samp{generic-ooo}, @samp{size}, From 159cc330d221c40d6ecb5adaee1a938a848e97b7 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Fri, 5 Jul 2024 06:46:11 -0700 Subject: [PATCH 02/47] arcv: introduce and incorporate the --param=arcv-mpy-option flag This commit adds the new arcv-mpy-option compilation parameter with the valid (string) values of 1c, 2c, and 10c. This corresponds to different versions of the MPY/DIV unit of the RMX100 core, each of which has different latencies for imul/idiv instructions. Internally, this option is propagated to the pipeline description information in rmx100.md with the use of new helper functions defined in riscv.cc. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rmx100.md | 11 ++++++----- gcc/config/riscv/riscv-opts.h | 7 +++++++ gcc/config/riscv/riscv-protos.h | 3 +++ gcc/config/riscv/riscv.cc | 24 ++++++++++++++++++++++++ gcc/config/riscv/riscv.opt | 17 +++++++++++++++++ 5 files changed, 57 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md index 9194f510f9f8..003bf9ff268e 100644 --- a/gcc/config/riscv/arcv-rmx100.md +++ b/gcc/config/riscv/arcv-rmx100.md @@ -67,11 +67,6 @@ (eq_attr "type" "fadd,fmul,fmadd,fcmp")) "arcv_rmx100_FPU*2") -(define_insn_reservation "arcv_rmx100_fdiv_insn" 17 - (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fdiv,fsqrt")) - "arcv_rmx100_FPU*17") - (define_insn_reservation "arcv_rmx100_xfer" 2 (and (eq_attr "tune" "arcv_rmx100") (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f")) @@ -108,3 +103,9 @@ (and (eq_attr "tune" "arcv_rmx100") (eq_attr "type" "fsqrt")) "arcv_rmx100_FPU*25") + +(define_bypass 1 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") +(define_bypass 2 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p") + +(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") +(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p") diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 3feb211767cb..7be10413b4d9 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -86,6 +86,13 @@ enum rvv_max_lmul_enum { RVV_DYNAMIC = 9 }; +/* ARC-V multiply option. */ +enum arcv_mpy_option_enum { + ARCV_MPY_OPTION_1C = 1, + ARCV_MPY_OPTION_2C = 2, + ARCV_MPY_OPTION_10C = 8, +}; + enum riscv_multilib_select_kind { /* Select multilib by builtin way. */ select_by_builtin, diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index cdb706ab82ac..6c20eb4e2140 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -165,6 +165,9 @@ extern bool riscv_epilogue_uses (unsigned int); extern bool riscv_can_use_return_insn (void); extern rtx riscv_function_value (const_tree, const_tree, enum machine_mode); extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_1c_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_2c_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_10c_bypass_p (rtx_insn *, rtx_insn *); extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); extern void riscv_reinit (void); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 237400d188be..f789e129f5b7 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10368,6 +10368,30 @@ riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) return store_data_bypass_p (out_insn, in_insn); } +/* Implement one boolean function for each of the values of the + arcv_mpy_option enum, for the needs of rhx100.md. */ + +bool +arcv_mpy_1c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_1C; +} + +bool +arcv_mpy_2c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_2C; +} + +bool +arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_10C; +} + /* Implement TARGET_SECONDARY_MEMORY_NEEDED. When floating-point registers are wider than integer ones, moves between diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 6543fd1c4a72..663acf62dac4 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -396,3 +396,20 @@ Specifies whether the fence.tso instruction should be used. mautovec-segment Target Integer Var(riscv_mautovec_segment) Init(1) Enable (default) or disable generation of vector segment load/store instructions. + +Enum +Name(arcv_mpy_option) Type(enum arcv_mpy_option_enum) +Valid arguments to -param=arcv_mpy_option=: + +EnumValue +Enum(arcv_mpy_option) String(1c) Value(ARCV_MPY_OPTION_1C) + +EnumValue +Enum(arcv_mpy_option) String(2c) Value(ARCV_MPY_OPTION_2C) + +EnumValue +Enum(arcv_mpy_option) String(10c) Value(ARCV_MPY_OPTION_10C) + +-param=arcv-mpy-option= +Target RejectNegative Joined Enum(arcv_mpy_option) Var(arcv_mpy_option) Init(ARCV_MPY_OPTION_2C) +The type of MPY unit used by the RMX-100 core (to be used in combination with -mtune=rmx100) (default: 2c). From b21da0b8c0ec0ba86d2f423359839f8100995d3f Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Thu, 8 May 2025 01:36:17 -0700 Subject: [PATCH 03/47] arcv: add FPU insn latencies to the RMX-100 scheduling model This patch adds latencies related to FPU instructions to arcv-rmx100.md. The specific values used correspond to the 'fast' config, except fdiv where the latency was reduced to 10 cycles. In the future, FP latencies for RMX-100 should be made dependent on an external (-mfpu-like) option. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rmx100.md | 50 ++++++++++++++------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md index 003bf9ff268e..5a25dfb67cfc 100644 --- a/gcc/config/riscv/arcv-rmx100.md +++ b/gcc/config/riscv/arcv-rmx100.md @@ -54,7 +54,7 @@ (define_insn_reservation "arcv_rmx100_load_insn" 3 (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "load,fpload")) + (eq_attr "type" "load")) "arcv_rmx100_DMP,nothing*2") (define_insn_reservation "arcv_rmx100_store_insn" 1 @@ -62,47 +62,39 @@ (eq_attr "type" "store,fpstore")) "arcv_rmx100_DMP") +;; FPU scheduling. FIXME: This is based on the "fast" unit for now, the "slow" +;; option remains to be implemented later (together with the -mfpu flag). + +(define_insn_reservation "arcv_rmx100_fpload_insn" 3 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fpload")) + "arcv_rmx100_DMP,nothing*2") + (define_insn_reservation "arcv_rmx100_farith_insn" 2 (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fadd,fmul,fmadd,fcmp")) - "arcv_rmx100_FPU*2") + (eq_attr "type" "fadd,fcmp")) + "arcv_rmx100_FPU,nothing") -(define_insn_reservation "arcv_rmx100_xfer" 2 +(define_insn_reservation "arcv_rmx100_xfer" 1 (and (eq_attr "tune" "arcv_rmx100") (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f")) - "arcv_rmx100_FPU*2") - -;;(define_insn_reservation "core" 1 -;; (eq_attr "type" "block, brk, dmb, flag, lr, sr, sync") -;; "arcv_rmx100_ALU0 + arcv_rmx100_ALU1 + arcv_rmx100_DMP + arcv_rmx100_MPY + arcv_rmx100_MPY64 + arcv_rmx100_DIV") + "arcv_rmx100_FPU") -(define_insn_reservation "arcv_rmx100_fmul_half" 5 +(define_insn_reservation "arcv_rmx100_fmul_insn" 2 (and (eq_attr "tune" "arcv_rmx100") - (and (eq_attr "type" "fadd,fmul,fmadd") - (eq_attr "mode" "HF"))) - "arcv_rmx100_FPU") + (eq_attr "type" "fmul")) + "arcv_rmx100_FPU,nothing") -(define_insn_reservation "arcv_rmx100_fmul_single" 5 +(define_insn_reservation "arcv_rmx100_fmac_insn" 2 (and (eq_attr "tune" "arcv_rmx100") - (and (eq_attr "type" "fadd,fmul,fmadd") - (eq_attr "mode" "SF"))) - "arcv_rmx100_FPU") + (eq_attr "type" "fmadd")) + "arcv_rmx100_FPU,nothing") -(define_insn_reservation "arcv_rmx100_fmul_double" 7 +(define_insn_reservation "arcv_rmx100_fdiv_insn" 10 (and (eq_attr "tune" "arcv_rmx100") - (and (eq_attr "type" "fadd,fmul,fmadd") - (eq_attr "mode" "DF"))) + (eq_attr "type" "fdiv,fsqrt")) "arcv_rmx100_FPU") -(define_insn_reservation "arcv_rmx100_fdiv" 20 - (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fdiv")) - "arcv_rmx100_FPU*20") - -(define_insn_reservation "arcv_rmx100_fsqrt" 25 - (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fsqrt")) - "arcv_rmx100_FPU*25") (define_bypass 1 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") (define_bypass 2 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p") From 5b1ca413a612ced4b0512e7b5bbf831dfc1e7325 Mon Sep 17 00:00:00 2001 From: Claudiu Zissulescu Date: Fri, 10 Mar 2023 13:08:18 +0200 Subject: [PATCH 04/47] arcv: Add initial scheduling scheme. Signed-off-by: Claudiu Zissulescu --- gcc/config/riscv/arcv-rhx100.md | 103 ++++++++++++++++++++++++++++ gcc/config/riscv/riscv-cores.def | 1 + gcc/config/riscv/riscv-opts.h | 1 + gcc/config/riscv/riscv.cc | 113 +++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.md | 4 +- 5 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 gcc/config/riscv/arcv-rhx100.md diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md new file mode 100644 index 000000000000..256871fc1656 --- /dev/null +++ b/gcc/config/riscv/arcv-rhx100.md @@ -0,0 +1,103 @@ +;; DFA scheduling description of the Synopsys RHX-100 cpu +;; for GNU C compiler +;; Copyright (C) 2023 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "arcv_rhx100") + +(define_cpu_unit "arcv_rhx100_ALU_A_fuse0_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_ALU_A_fuse1_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_ALU_B_fuse0_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_ALU_B_fuse1_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_MPY32" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_DIV" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_DMP_fuse0" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_DMP_fuse1" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_fdivsqrt" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueA_fuse0" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueA_fuse1" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueB_fuse0" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueB_fuse1" "arcv_rhx100") + +;; Instruction reservation for arithmetic instructions (pipe A, pipe B). +(define_insn_reservation "arcv_rhx100_alu_early_arith" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "unknown,move,const,arith,shift,slt,multi,auipc,nop,logical,\ + bitmanip,min,max,minu,maxu,clz,ctz,atomic,\ + condmove,mvpair,zicond,cpop,clmul")) + "((arcv_rhx100_issueA_fuse0 + arcv_rhx100_ALU_A_fuse0_early) | (arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse1_early)) | ((arcv_rhx100_issueB_fuse0 + arcv_rhx100_ALU_B_fuse0_early) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse1_early))") + +(define_insn_reservation "arcv_rhx100_jmp_insn" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "branch,jump,call,jalr,ret,trap")) + "arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1") + +(define_insn_reservation "arcv_rhx100_div_insn" 12 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "idiv")) + "arcv_rhx100_issueA_fuse0 + arcv_rhx100_DIV, nothing*11") + +(define_insn_reservation "arcv_rhx100_mpy32_insn" 4 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "imul")) + "arcv_rhx100_issueA_fuse0 + arcv_rhx100_MPY32, nothing*3") + +(define_insn_reservation "arcv_rhx100_load_insn" 3 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "load,fpload")) + "(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)") + +(define_insn_reservation "arcv_rhx100_store_insn" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "store,fpstore")) + "(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)") + +;; (soft) floating points +(define_insn_reservation "arcv_rhx100_xfer" 3 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "mfc,mtc,fcvt,fcvt_i2f,fcvt_f2i,fmove,fcmp")) + "(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early), nothing*2") + +(define_insn_reservation "arcv_rhx100_fmul" 5 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "fadd,fmul,fmadd")) + "(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early)") + +(define_insn_reservation "arcv_rhx100_fdiv" 20 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "fdiv,fsqrt")) + "arcv_rhx100_fdivsqrt*20") + +;(final_presence_set "arcv_rhx100_issueA_fuse1" "arcv_rhx100_issueA_fuse0") +;(final_presence_set "arcv_rhx100_issueB_fuse1" "arcv_rhx100_issueB_fuse0") +;(final_presence_set "arcv_rhx100_ALU_A_fuse1_early" "arcv_rhx100_ALU_A_fuse0_early") +;(final_presence_set "arcv_rhx100_ALU_B_fuse1_early" "arcv_rhx100_ALU_B_fuse0_early") + +;; Bypasses +;(define_bypass 0 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") +(define_bypass 1 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") + +;(define_bypass 0 "arcv_rhx100_load_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_alu_early_arith") +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_mpy*_insn") +(define_bypass 2 "arcv_rhx100_load_insn" "arcv_rhx100_load_insn") +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_div_insn") + +(define_bypass 3 "arcv_rhx100_mpy32_insn" "arcv_rhx100_mpy*_insn") +(define_bypass 3 "arcv_rhx100_mpy32_insn" "arcv_rhx100_div_insn") diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index d1708f3785b6..3b5da61d0bc9 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -51,6 +51,7 @@ RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info) RISCV_TUNE("arc-v-rmx-100-series", arcv_rmx100, arcv_rmx100_tune_info) +RISCV_TUNE("arc-v-rhx-100-series", arcv_rhx100, arcv_rhx100_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 7be10413b4d9..632d426503be 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -62,6 +62,7 @@ enum riscv_microarchitecture_type { mips_p8700, tt_ascalon_d8, arcv_rmx100, + arcv_rhx100, }; extern enum riscv_microarchitecture_type riscv_microarchitecture; diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f789e129f5b7..8001e7dae35a 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -291,6 +291,7 @@ enum riscv_fusion_pairs RISCV_FUSE_BFEXT = (1 << 11), RISCV_FUSE_EXPANDED_LD = (1 << 12), RISCV_FUSE_B_ALUI = (1 << 13), + RISCV_FUSE_ARCV = (1 << 14), }; /* Costs of various operations on the different architectures. */ @@ -709,6 +710,30 @@ static const struct riscv_tune_param arcv_rmx100_tune_info = { NULL, /* loop_align */ }; +/* Costs to use when optimizing for Synopsys RHX-100. */ +static const struct riscv_tune_param arcv_rhx100_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ + {COSTS_N_INSNS (27), COSTS_N_INSNS (43)}, /* int_div */ + 4, /* issue_rate */ + 9, /* branch_cost */ + 2, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_ARCV, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ +}; + /* Costs to use when optimizing for size. */ static const struct riscv_tune_param optimize_size_tune_info = { {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */ @@ -10743,6 +10768,91 @@ riscv_set_is_shNadduw (rtx set) && REG_P (SET_DEST (set))); } +/* Return TRUE if two addresses can be fused. */ + +static bool +arcv_fused_addr_p (rtx addr0, rtx addr1) +{ + rtx base0, base1, tmp; + HOST_WIDE_INT off0 = 0, off1 = 0; + + if (GET_CODE (addr0) == PLUS) + { + base0 = XEXP (addr0, 0); + tmp = XEXP (addr0, 1); + if (!CONST_INT_P (tmp)) + return false; + off0 = INTVAL (tmp); + } + else if (REG_P (addr0)) + base0 = addr0; + else + return false; + + if (GET_CODE (addr1) == PLUS) + { + base1 = XEXP (addr1, 0); + tmp = XEXP (addr1, 1); + if (!CONST_INT_P (tmp)) + return false; + off1 = INTVAL (tmp); + } + else if (REG_P (addr1)) + base1 = addr1; + else + return false; + + /* Check if we have the same base. */ + gcc_assert (REG_P (base0) && REG_P (base1)); + if (REGNO (base0) != REGNO (base1)) + return false; + + /* Offsets have to be aligned to word boundary and adjacent in memory, + but the memory operations can be narrower. */ + if ((off0 % UNITS_PER_WORD == 0) && (abs (off1 - off0) == UNITS_PER_WORD)) + return true; + + return false; +} + +/* Return true if PREV and CURR should be kept together during scheduling. */ + +static bool +arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + /* prev and curr are simple SET insns i.e. no flag setting or branching. */ + bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); + + /* Don't handle anything with a jump. */ + if (!simple_sets_p) + return false; + + /* Fuse adjacent loads and stores. */ + if (get_attr_type (prev) == TYPE_LOAD + && get_attr_type (curr) == TYPE_LOAD) + { + rtx addr0 = XEXP (SET_SRC (prev_set), 0); + rtx addr1 = XEXP (SET_SRC (curr_set), 0); + + if (arcv_fused_addr_p (addr0, addr1)) + return true; + } + + if (get_attr_type (prev) == TYPE_STORE + && get_attr_type (curr) == TYPE_STORE) + { + rtx addr0 = XEXP (SET_DEST (prev_set), 0); + rtx addr1 = XEXP (SET_DEST (curr_set), 0); + + if (arcv_fused_addr_p (addr0, addr1)) + return true; + } + + return false; +} + /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR should be kept together during scheduling. */ @@ -11375,6 +11485,9 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) } } + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) + return arcv_macro_fusion_pair_p (prev, curr); + return false; } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 823f8dda8a30..5779a862743d 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -672,7 +672,8 @@ ;; Microarchitectures we know how to tune for. ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100" + "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100,arcv_rhx100" + "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,arcv_rhx100,generic_ooo" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. @@ -4967,3 +4968,4 @@ (include "generic-ooo.md") (include "tt-ascalon-d8.md") (include "arcv-rmx100.md") +(include "arcv-rhx100.md") From d5df53d30167f429c193145269034566dc155c3c Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Wed, 11 Oct 2023 10:26:20 +0200 Subject: [PATCH 05/47] arcv: fuse load-immediate with store For the RMX-500 and RHX cores, the sequence "load-immediate + store" (that is used to store a constant value) can be executed in 1 cycle, provided the two instructions are kept next to one another. This patch handles this case in riscv_macro_fusion_pair_p(). Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 8001e7dae35a..80d4e537cdfb 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10850,6 +10850,16 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return true; } + /* Fuse load-immediate with a store of the destination register. */ + if (get_attr_type (prev) == TYPE_MOVE + && get_attr_move_type (prev) == MOVE_TYPE_CONST + && get_attr_type (curr) == TYPE_STORE + && ((REG_P (SET_SRC (curr_set)) + && SET_DEST (prev_set) == SET_SRC (curr_set)) + || (SUBREG_P (SET_SRC (curr_set)) + && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set))))) + return true; + return false; } From 3690189a37bd962484ad3215be5b83572df25053 Mon Sep 17 00:00:00 2001 From: Shahab Vahedi Date: Thu, 19 Oct 2023 13:28:17 +0200 Subject: [PATCH 06/47] arcv: Introduce riscv_is_micro_arch () ARC-V related optimisations must be guarded like: if (riscv_microarchitecture == ) { ... } Introduce an inline function that encapsulates this: static inline bool riscv_is_micro_arch () Use it to define __riscv_rhx whenever compiling for the RHX microarchitecture. Signed-off-by: Shahab Vahedi --- gcc/config/riscv/riscv-c.cc | 3 +++ gcc/config/riscv/riscv-protos.h | 3 +++ gcc/config/riscv/riscv.cc | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index d497326e0611..52d240ceb89f 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -149,6 +149,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) builtin_define_with_int_value ("__riscv_th_v_intrinsic", riscv_ext_version_value (0, 11)); + if (riscv_is_micro_arch (arcv_rhx100)) + builtin_define ("__riscv_rhx"); + /* Define architecture extension test macros. */ builtin_define_with_int_value ("__riscv_arch_test", 1); diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 6c20eb4e2140..83792bae8633 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -821,6 +821,9 @@ extern unsigned int th_int_get_mask (unsigned int); extern unsigned int th_int_get_save_adjustment (void); extern rtx th_int_adjust_cfi_prologue (unsigned int); extern const char *th_asm_output_opcode (FILE *asm_out_file, const char *p); + +extern bool riscv_is_micro_arch (enum riscv_microarchitecture_type); + #ifdef RTX_CODE extern const char* th_mempair_output_move (rtx[4], bool, machine_mode, RTX_CODE); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 80d4e537cdfb..87e2e6f33bfa 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -903,6 +903,12 @@ typedef enum typedef insn_code (*code_for_push_pop_t) (machine_mode); +bool +riscv_is_micro_arch (enum riscv_microarchitecture_type arch) +{ + return (riscv_microarchitecture == arch); +} + void riscv_frame_info::reset(void) { total_size = 0; From ec76bcca47840a84ac48dce86be36105d2d8e30b Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 19 Mar 2024 19:27:03 -0700 Subject: [PATCH 07/47] arcv: fuse load-immediate with dependent branch With this commit, we allow a load-immediate to be macro-op fused with a successive conditional branch that is dependent on it, e.g.: li t0, #imm bge t1, t0, .label Additionally, add a new testcase to check that this fusion type is handled correctly. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv.cc | 15 +++++++++++++-- .../gcc.target/riscv/arcv-fusion-limm-condbr.c | 12 ++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 83792bae8633..0260e7b5acc6 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -822,6 +822,7 @@ extern unsigned int th_int_get_save_adjustment (void); extern rtx th_int_adjust_cfi_prologue (unsigned int); extern const char *th_asm_output_opcode (FILE *asm_out_file, const char *p); +extern bool riscv_macro_fusion_p (); extern bool riscv_is_micro_arch (enum riscv_microarchitecture_type); #ifdef RTX_CODE diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 87e2e6f33bfa..84a90e893a9b 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10685,7 +10685,7 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports instruction fusion of some sort. */ -static bool +bool riscv_macro_fusion_p (void) { return tune_param->fusible_ops != RISCV_FUSE_NOTHING; @@ -10831,7 +10831,18 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) /* prev and curr are simple SET insns i.e. no flag setting or branching. */ bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); - /* Don't handle anything with a jump. */ + /* Fuse load-immediate with a dependent conditional branch. */ + if (get_attr_type (prev) == TYPE_MOVE + && get_attr_move_type (prev) == MOVE_TYPE_CONST + && any_condjump_p (curr)) + { + rtx comp = XEXP (SET_SRC (curr_set), 0); + + return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set)) + || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set)); + } + + /* Don't handle anything with a jump past this point. */ if (!simple_sets_p) return false; diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c new file mode 100644 index 000000000000..cc2a56a2e086 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=arc-v-rhx-100-series" } */ + +int +f (int x) +{ + begin: + if (x <= 3) + goto begin; +} + +/* { dg-final { scan-assembler "\\sli\\sa5,3\n\\sble\\sa0,a5,.L\[0-9\]+\n" } } */ From ea09fb490a4cd4e98492efdfa9d42fdfe230fec2 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 4 Jun 2024 04:58:46 -0700 Subject: [PATCH 08/47] arcv: implement TARGET_SCHED_FUSION_PRIORITY To take better advantage of double load/store fusion, make use of the sched_fusion pass that assigns unique "fusion priorities" to load/store instructions and schedules operations on adjacent addresses together. This maximizes the probability that loads/stores are fused between each other instead of with other instructions. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 75 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 84a90e893a9b..c6abc60cb2bb 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11518,6 +11518,78 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return false; } +/* If INSN is a load or store of address in the form of [base+offset], + extract the two parts and set to BASE and OFFSET. IS_LOAD is set + to TRUE if it's a load. Return TRUE if INSN is such an instruction, + otherwise return FALSE. */ + +static bool +fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load) +{ + rtx x, dest, src; + + gcc_assert (INSN_P (insn)); + x = PATTERN (insn); + if (GET_CODE (x) != SET) + return false; + + src = SET_SRC (x); + dest = SET_DEST (x); + if (REG_P (src) && MEM_P (dest)) + { + *is_load = false; + extract_base_offset_in_addr (dest, base, offset); + } + else if (MEM_P (src) && REG_P (dest)) + { + *is_load = true; + extract_base_offset_in_addr (src, base, offset); + } + else + return false; + + return (*base != NULL_RTX && *offset != NULL_RTX); +} + +static void +riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, + int *pri) +{ + int tmp, off_val; + bool is_load; + rtx base, offset; + + gcc_assert (INSN_P (insn)); + + tmp = max_pri - 1; + if (!fusion_load_store (insn, &base, &offset, &is_load)) + { + *pri = tmp; + *fusion_pri = tmp; + return; + } + + tmp /= 2; + + /* INSN with smaller base register goes first. */ + tmp -= ((REGNO (base) & 0xff) << 20); + + /* INSN with smaller offset goes first. */ + off_val = (int)(INTVAL (offset)); + + /* Put loads/stores operating on adjacent words into the same + * scheduling group. */ + *fusion_pri = tmp - ((off_val / (UNITS_PER_WORD * 2)) << 1) + is_load; + + if (off_val >= 0) + tmp -= (off_val & 0xfffff); + else + tmp += ((- off_val) & 0xfffff); + + *pri = tmp; + return; +} + /* Adjust the cost/latency of instructions for scheduling. For now this is just used to change the latency of vector instructions according to their LMUL. We assume that an insn with LMUL == 8 requires @@ -16199,6 +16271,9 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_MACRO_FUSION_PAIR_P #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p +#undef TARGET_SCHED_FUSION_PRIORITY +#define TARGET_SCHED_FUSION_PRIORITY riscv_sched_fusion_priority + #undef TARGET_SCHED_VARIABLE_ISSUE #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue From 2767e2e06b66dc03bee0db1ab2230e61e53c9116 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Wed, 12 Jun 2024 02:06:43 -0700 Subject: [PATCH 09/47] arcv: fuse load/store + register post-{inc,dec}rement With this patch, arcv_macro_fusion_pair_p () recognizes instruction pairs like: LOAD rd1, [rs1,offset] add/sub rd2, rs1, rs2/imm (where all regs are distinct) and: STORE rs2, [rs1,offset] add/sub rd, rs1, rs2/imm as fused macro-op pairs. In the case of a load, rd1 being equal to rd2, rs1, or rs2 would lead to data hazards, hence this is disallowed; for stores, rs1 and rs2 of the two instructions must match. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index c6abc60cb2bb..63db9c6f375a 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10867,6 +10867,38 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return true; } + /* Fuse load/store + register post-{inc,dec}rement: + * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X rs1) (const_int)))) + * or + * prev (st) == (set (mem:X (plus:X (reg:X rs1) (const_int))) (reg:X rs2)) + * ... + */ + if ((GET_CODE (SET_SRC (curr_set)) == PLUS + || GET_CODE (SET_SRC (curr_set)) == MINUS) + && REG_P (XEXP (SET_SRC (curr_set), 0)) + && ((get_attr_type (prev) == TYPE_LOAD + && REG_P (XEXP (SET_SRC (prev_set), 0)) + && REGNO (XEXP (SET_SRC (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (prev_set), 0)) + != REGNO (SET_DEST (prev_set)) + && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)) + /* curr (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ + && (CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) + /* or curr (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ + || REGNO (SET_DEST (prev_set)) + != REGNO (XEXP (SET_SRC (curr_set), 1)))) + || (get_attr_type (prev) == TYPE_STORE + && REG_P (XEXP (SET_DEST (prev_set), 0)) + && REGNO (XEXP (SET_DEST (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 0)) + /* curr (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ + && (CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) + /* or curr (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ + || REGNO (XEXP (SET_DEST (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 1)))))) + return true; + /* Fuse load-immediate with a store of the destination register. */ if (get_attr_type (prev) == TYPE_MOVE && get_attr_move_type (prev) == MOVE_TYPE_CONST From 55bba898bc4800adcdcc6df0693afd7287fc183e Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Mon, 17 Jun 2024 01:23:28 -0700 Subject: [PATCH 10/47] arcv: fuse load/store with lui Fuse together instruction pairs such as: LOAD rd1, [rs1,offset] lui rd2, imm (where rd1 and rd2 are distinct) and: STORE rs2, [rs1,offset] lui rd, imm Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 63db9c6f375a..d78b98a76903 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10899,6 +10899,22 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) == REGNO (XEXP (SET_SRC (curr_set), 1)))))) return true; + /* Fuse load/store with lui: + * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X) (const_int)))) + * or + * prev (st) == (set (mem:X (plus:X (reg:X) (const_int))) (reg:X rD)) + * + * curr (lui) == (set (reg:X rd2) (const_int UPPER_IMM_20)) + */ + if (((get_attr_type (curr) == TYPE_MOVE + && GET_CODE (SET_SRC (curr_set)) == HIGH) + || (CONST_INT_P (SET_SRC (curr_set)) + && LUI_OPERAND (INTVAL (SET_SRC (curr_set))))) + && ((get_attr_type (prev) == TYPE_LOAD + && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set))) + || get_attr_type (prev) == TYPE_STORE)) + return true; + /* Fuse load-immediate with a store of the destination register. */ if (get_attr_type (prev) == TYPE_MOVE && get_attr_move_type (prev) == MOVE_TYPE_CONST From 76a437094cad4401185c72f6ff93e46f48f49b27 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Fri, 21 Jun 2024 08:19:46 -0700 Subject: [PATCH 11/47] arcv: create a 32-bit integer multiply-add instruction pattern The RHX core executes integer multiply-add sequences of the form: mul r1,r2,r3 add r1,r1,r4 in 1 cycle due to macro-op fusion. This patch adds a define_insn_and_split to recognize the above sequence and preserve it as a single insn up until the post-reload split pass. Since, due to a microarchitectural restriction, the output operand of both instructions must be the same register, the insn_and_split pattern has two alternatives corresponding to the following cases: (0) r1 is different from r4, in which case the insn can be split to the sequence above; (1) r1 and r4 are the same, in which case a temporary register has to be used and there is no fusion. Alternative (1) is discouraged so that reload maximizes the number of instances where MAC fusion can be applied. Since RHX is a rv32im core, the pattern requires that the target is 32-bit and supports multiplication. In addition, the {u,}maddhisi3 expand is implemented for RHX to convert the ( 16-bit x 16-bit + 32_bit ) WIDEN_MULT_PLUS_EXPR GIMPLE operator to the aforementioned madd_split instruction directly. Lastly, a very basic testcase is introduced to make sure that the new patterns are sufficient to produce MAC-fusion-aware code. No new dejagnu failures with RUNTESTFLAGS="CFLAGS_FOR_TARGET=-mtune=rhx dg.exp". Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.md | 57 ++++++++++++++++++- .../gcc.target/riscv/arcv-fusion-madd.c | 12 ++++ 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 5779a862743d..02a8ff39a59f 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4502,7 +4502,35 @@ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand")) (sign_extend:SI (match_operand:HI 2 "register_operand"))) (match_operand:SI 3 "register_operand")))] - "TARGET_XTHEADMAC" + "TARGET_XTHEADMAC || (riscv_is_micro_arch (arcv_rhx100) + && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL))" + { + if (riscv_is_micro_arch (arcv_rhx100)) + { + rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); + emit_insn (gen_extendhisi2 (tmp0, operands[1])); + emit_insn (gen_extendhisi2 (tmp1, operands[2])); + emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); + DONE; + } + } +) + +(define_expand "umaddhisi4" + [(set (match_operand:SI 0 "register_operand") + (plus:SI + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand")) + (zero_extend:SI (match_operand:HI 2 "register_operand"))) + (match_operand:SI 3 "register_operand")))] + "riscv_is_micro_arch (arcv_rhx100) + && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" + { + rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (tmp0, operands[1])); + emit_insn (gen_zero_extendhisi2 (tmp1, operands[2])); + emit_insn (gen_madd_split (operands[0], tmp0, tmp1, operands[3])); + DONE; + } ) (define_expand "msubhisi4" @@ -4514,6 +4542,33 @@ "TARGET_XTHEADMAC" ) +(define_insn_and_split "madd_split" + [(set (match_operand:SI 0 "register_operand" "=&r,r") + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "register_operand" "r,r")) + (match_operand:SI 3 "register_operand" "r,?0"))) + (clobber (match_scratch:SI 4 "=&r,&r"))] + "riscv_is_micro_arch (rhx) && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" + "#" + "&& reload_completed" + [(const_int 0)] + "{ + if (REGNO (operands[0]) == REGNO (operands[3])) + { + emit_insn (gen_mulsi3 (operands[4], operands[1], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[3], operands[4])); + } + else + { + emit_insn (gen_mulsi3 (operands[0], operands[1], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[0], operands[3])); + } + DONE; + }" + [(set_attr "type" "imul")] +) + ;; String compare with length insn. ;; Argument 0 is the target (result) ;; Argument 1 is the source1 diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c new file mode 100644 index 000000000000..eb8665f576c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv32 } */ +/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */ +/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im -mabi=ilp32" } */ + +int +f (int x, int y, int z, int v, int w) +{ + return x + y * z + v * w; +} + +/* { dg-final { scan-assembler {\smul\s([ast][0-9]+),a1,a2\n\sadd\s\1,\1,a0\n\smul\sa0,a3,a4\n\sadd\sa0,a0,\1\n} } } */ From e8311934da0e7de2721f1fba0b7be918c3ea1130 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Fri, 21 Jun 2024 07:27:33 -0700 Subject: [PATCH 12/47] arcv: fuse integer multiply-add instruction pairs To make sure that the multiply-add pairs (split post-reload from the madd_split instruction) are not broken up by the sched2 pass, designate them as fusable in arcv_macro_fusion_pair_p (). Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rhx100.md | 5 +++++ gcc/config/riscv/riscv.cc | 7 +++++++ gcc/config/riscv/riscv.md | 25 ++++++++++--------------- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md index 256871fc1656..08d468b1288c 100644 --- a/gcc/config/riscv/arcv-rhx100.md +++ b/gcc/config/riscv/arcv-rhx100.md @@ -42,6 +42,11 @@ condmove,mvpair,zicond,cpop,clmul")) "((arcv_rhx100_issueA_fuse0 + arcv_rhx100_ALU_A_fuse0_early) | (arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse1_early)) | ((arcv_rhx100_issueB_fuse0 + arcv_rhx100_ALU_B_fuse0_early) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse1_early))") +(define_insn_reservation "arcv_rhx100_imul_fused" 4 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "imul_fused")) + "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early + arcv_rhx100_MPY32), nothing*3") + (define_insn_reservation "arcv_rhx100_jmp_insn" 1 (and (eq_attr "tune" "arcv_rhx100") (eq_attr "type" "branch,jump,call,jalr,ret,trap")) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d78b98a76903..4fe451fee286 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10925,6 +10925,13 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set))))) return true; + if (GET_CODE (SET_SRC (prev_set)) == MULT + && GET_CODE (SET_SRC (curr_set)) == PLUS + && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) + && (REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)) + || REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1)))) + return true; + return false; } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 02a8ff39a59f..239e8d859811 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -518,7 +518,7 @@ vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down, vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, - vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16, + vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,imul_fused, sf_vc,sf_vc_se" (cond [(eq_attr "got" "load") (const_string "load") @@ -4528,7 +4528,7 @@ rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); emit_insn (gen_zero_extendhisi2 (tmp0, operands[1])); emit_insn (gen_zero_extendhisi2 (tmp1, operands[2])); - emit_insn (gen_madd_split (operands[0], tmp0, tmp1, operands[3])); + emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); DONE; } ) @@ -4542,31 +4542,26 @@ "TARGET_XTHEADMAC" ) -(define_insn_and_split "madd_split" +(define_insn "madd_split_fused" [(set (match_operand:SI 0 "register_operand" "=&r,r") (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r,r") (match_operand:SI 2 "register_operand" "r,r")) (match_operand:SI 3 "register_operand" "r,?0"))) (clobber (match_scratch:SI 4 "=&r,&r"))] - "riscv_is_micro_arch (rhx) && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" - "#" - "&& reload_completed" - [(const_int 0)] - "{ + "riscv_is_micro_arch (arcv_rhx100) + && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" + { if (REGNO (operands[0]) == REGNO (operands[3])) { - emit_insn (gen_mulsi3 (operands[4], operands[1], operands[2])); - emit_insn (gen_addsi3 (operands[0], operands[3], operands[4])); + return "mul\t%4,%1,%2\n\tadd\t%4,%3,%4\n\tmv\t%0,%4"; } else { - emit_insn (gen_mulsi3 (operands[0], operands[1], operands[2])); - emit_insn (gen_addsi3 (operands[0], operands[0], operands[3])); + return "mul\t%0,%1,%2\n\tadd\t%0,%0,%3"; } - DONE; - }" - [(set_attr "type" "imul")] + } + [(set_attr "type" "imul_fused")] ) ;; String compare with length insn. From 3cb240ecf94274c8226061e9b91096bd2b7b3b68 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Sun, 7 Jul 2024 21:31:13 +0200 Subject: [PATCH 13/47] arcv: implement bit-extract fusion The bitfield zero_extract operation is normally expanded into an srai followed by an andi. (With the ZBS extension enabled, the special case of 1-bit zero-extract is implemented with the bexti insn.) However, since the RHX core can execute a shift-left and a shift-right of the same register in 1 cycle, we would prefer to emit those two instructions instead, and schedule them together so that macro fusion can take place. The required steps to achieve this are: (1) Create an insn_and_split that handles the zero_extract RTX; (2) Tell the combiner to use that split by lowering the cost of the zero_extract RTX when the target is the RHX core; (3) Designate the resulting slli + srli pair as fusable by the scheduler. Attached is a small testcase demonstrating the split, and that the bexti insn still takes priority over the shift pair. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rhx100.md | 5 +++++ gcc/config/riscv/riscv.cc | 10 +++++++++- gcc/config/riscv/riscv.md | 19 ++++++++++++++++++- .../gcc.target/riscv/arcv-fusion-xbfu.c | 14 ++++++++++++++ 4 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md index 08d468b1288c..398f13131606 100644 --- a/gcc/config/riscv/arcv-rhx100.md +++ b/gcc/config/riscv/arcv-rhx100.md @@ -47,6 +47,11 @@ (eq_attr "type" "imul_fused")) "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early + arcv_rhx100_MPY32), nothing*3") +(define_insn_reservation "arcv_rhx100_alu_fused" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "alu_fused")) + "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early) | (arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse0_early + arcv_rhx100_ALU_B_fuse1_early)") + (define_insn_reservation "arcv_rhx100_jmp_insn" 1 (and (eq_attr "tune" "arcv_rhx100") (eq_attr "type" "branch,jump,call,jalr,ret,trap")) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 4fe451fee286..2806de015d45 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -4337,7 +4337,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN } gcc_fallthrough (); case SIGN_EXTRACT: - if (TARGET_XTHEADBB && outer_code == SET + if ((riscv_is_micro_arch (arcv_rhx100) || TARGET_XTHEADBB) + && outer_code == SET && CONST_INT_P (XEXP (x, 1)) && CONST_INT_P (XEXP (x, 2))) { @@ -10932,6 +10933,13 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) || REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1)))) return true; + /* Fuse logical shift left with logical shift right (bit-extract pattern). */ + if (GET_CODE (SET_SRC (prev_set)) == ASHIFT + && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT + && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) + && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) + return true; + return false; } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 239e8d859811..eece6f1c17e7 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -518,7 +518,7 @@ vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down, vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, - vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,imul_fused, + vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,imul_fused,alu_fused, sf_vc,sf_vc_se" (cond [(eq_attr "got" "load") (const_string "load") @@ -4564,6 +4564,23 @@ [(set_attr "type" "imul_fused")] ) +(define_insn "*zero_extract_fused" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand")))] + "riscv_is_micro_arch (arcv_rhx100) && !TARGET_64BIT + && (INTVAL (operands[2]) > 1 || !TARGET_ZBS)" + { + int amount = INTVAL (operands[2]); + int end = INTVAL (operands[3]) + amount; + operands[2] = GEN_INT (BITS_PER_WORD - end); + operands[3] = GEN_INT (BITS_PER_WORD - amount); + return "slli\t%0,%1,%2\n\tsrli\t%0,%0,%3"; + } + [(set_attr "type" "alu_fused")] +) + ;; String compare with length insn. ;; Argument 0 is the target (result) ;; Argument 1 is the source1 diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c new file mode 100644 index 000000000000..010038b52c96 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv32 } */ +/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-Oz" "-Os" } } */ +/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im_zbs -mabi=ilp32" } */ + +#define bit_extract(x,start,amt) (((x)>>(start)) & (~(0xffffffff << (amt)))) + +int +f (int x) +{ + return bit_extract(x,10,14) + bit_extract(x,1,1); +} + +/* { dg-final { scan-assembler {\sslli\s([ast][0-9]+),a0,8\n\ssrli\s([ast][0-9]+),\1,18\n\sbexti\sa0,a0,1\n\sadd\sa0,\2,a0\n} } } */ From 66e5d1c8cd91431531b5b8ffd5481107e63e25af Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 9 Jul 2024 04:11:09 -0700 Subject: [PATCH 14/47] arcv: allow inverted instruction order for some fusion types Some fusion types (namely, LD/ST-OP/OPIMM and LD/ST-LUI) are available regardless of the order of instructions. To support this, extract the new arcv_memop_arith_pair_p () and arcv_memop_lui_pair_p () functions and call them twice. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 141 ++++++++++++++++++++++++++------------ 1 file changed, 97 insertions(+), 44 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 2806de015d45..805f888619c0 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10822,6 +10822,97 @@ arcv_fused_addr_p (rtx addr0, rtx addr1) return false; } +/* Return true if PREV and CURR constitute an ordered load/store + op/opimm + pair, for the purposes of ARCV-specific macro-op fusion. */ +static bool +arcv_memop_arith_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + gcc_assert (prev_set); + gcc_assert (curr_set); + + /* Fuse load/store + register post-{inc,dec}rement: + * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X rs1) (const_int)))) + * or + * prev (st) == (set (mem:X (plus:X (reg:X rs1) (const_int))) (reg:X rs2)) + * ... + */ + if ((get_attr_type (curr) == TYPE_ARITH + || get_attr_type (curr) == TYPE_LOGICAL + || get_attr_type (curr) == TYPE_SHIFT + || get_attr_type (curr) == TYPE_SLT + || get_attr_type (curr) == TYPE_BITMANIP + || get_attr_type (curr) == TYPE_MIN + || get_attr_type (curr) == TYPE_MAX + || get_attr_type (curr) == TYPE_MINU + || get_attr_type (curr) == TYPE_MAXU + || get_attr_type (curr) == TYPE_CLZ + || get_attr_type (curr) == TYPE_CTZ) + && (CONST_INT_P (SET_SRC (curr_set)) + || REG_P (XEXP (SET_SRC (curr_set), 0))) + && ((get_attr_type (prev) == TYPE_LOAD + && REG_P (XEXP (SET_SRC (prev_set), 0)) + && REGNO (XEXP (SET_SRC (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (prev_set), 0)) + != REGNO (SET_DEST (prev_set)) + && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)) + && (/* (set (reg:X rd1) (not (reg:X rs1))) */ + GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 + /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ + || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) + /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ + || REGNO (SET_DEST (prev_set)) + != REGNO (XEXP (SET_SRC (curr_set), 1)))) + || (get_attr_type (prev) == TYPE_STORE + && REG_P (XEXP (SET_DEST (prev_set), 0)) + && REGNO (XEXP (SET_DEST (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 0)) + && (/* (set (reg:X rd1) (not (reg:X rs1))) */ + GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 + /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ + || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) + /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ + || REGNO (XEXP (SET_DEST (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 1)))))) + return true; + + return false; +} + +/* Return true if PREV and CURR constitute an ordered load/store + lui pair, for + the purposes of ARCV-specific macro-op fusion. */ +static bool +arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + gcc_assert (prev_set); + gcc_assert (curr_set); + + /* Fuse load/store with lui: + * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X) (const_int)))) + * or + * prev (st) == (set (mem:X (plus:X (reg:X) (const_int))) (reg:X rD)) + * + * curr (lui) == (set (reg:X rd2) (const_int UPPER_IMM_20)) + */ + if (REG_P (curr) + && ((get_attr_type (curr) == TYPE_MOVE + && GET_CODE (SET_SRC (curr_set)) == HIGH) + || (CONST_INT_P (SET_SRC (curr_set)) + && LUI_OPERAND (INTVAL (SET_SRC (curr_set))))) + && ((get_attr_type (prev) == TYPE_LOAD + && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set))) + || get_attr_type (prev) == TYPE_STORE)) + return true; + + return false; +} + /* Return true if PREV and CURR should be kept together during scheduling. */ static bool @@ -10868,52 +10959,14 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return true; } - /* Fuse load/store + register post-{inc,dec}rement: - * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X rs1) (const_int)))) - * or - * prev (st) == (set (mem:X (plus:X (reg:X rs1) (const_int))) (reg:X rs2)) - * ... - */ - if ((GET_CODE (SET_SRC (curr_set)) == PLUS - || GET_CODE (SET_SRC (curr_set)) == MINUS) - && REG_P (XEXP (SET_SRC (curr_set), 0)) - && ((get_attr_type (prev) == TYPE_LOAD - && REG_P (XEXP (SET_SRC (prev_set), 0)) - && REGNO (XEXP (SET_SRC (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 0)) - && REGNO (XEXP (SET_SRC (prev_set), 0)) - != REGNO (SET_DEST (prev_set)) - && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)) - /* curr (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ - && (CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) - /* or curr (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ - || REGNO (SET_DEST (prev_set)) - != REGNO (XEXP (SET_SRC (curr_set), 1)))) - || (get_attr_type (prev) == TYPE_STORE - && REG_P (XEXP (SET_DEST (prev_set), 0)) - && REGNO (XEXP (SET_DEST (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 0)) - /* curr (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ - && (CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) - /* or curr (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ - || REGNO (XEXP (SET_DEST (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 1)))))) + /* Fuse a pre- or post-update memory operation. */ + if (arcv_memop_arith_pair_p (prev, curr) + || arcv_memop_arith_pair_p (curr, prev)) return true; - /* Fuse load/store with lui: - * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X) (const_int)))) - * or - * prev (st) == (set (mem:X (plus:X (reg:X) (const_int))) (reg:X rD)) - * - * curr (lui) == (set (reg:X rd2) (const_int UPPER_IMM_20)) - */ - if (((get_attr_type (curr) == TYPE_MOVE - && GET_CODE (SET_SRC (curr_set)) == HIGH) - || (CONST_INT_P (SET_SRC (curr_set)) - && LUI_OPERAND (INTVAL (SET_SRC (curr_set))))) - && ((get_attr_type (prev) == TYPE_LOAD - && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set))) - || get_attr_type (prev) == TYPE_STORE)) + /* Fuse a memory operation preceded or followed by a lui. */ + if (arcv_memop_lui_pair_p (prev, curr) + || arcv_memop_lui_pair_p (curr, prev)) return true; /* Fuse load-immediate with a store of the destination register. */ From 381c2ee5aa1b49f3ebd6215c5ecd002f916a7f29 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 1 Oct 2024 01:02:17 -0700 Subject: [PATCH 15/47] arcv: add scheduling implementation for RHX-100 This commit implements the scheduling model for the RHX-100 core. Among notable things are: (1) The arcv_macro_fusion_pair_p () hook has been modified to not create SCHED_GROUP's larger than 2 instructions; also, it gives priority to double load/store fusion, suppressing the other types until sched2. (2) riscv_issue_rate () is set to 4 and the system is modeled as 4 separate pipelines, giving access to as many instructions in ready_list as possible. (3) The rhx.md description puts some initial constraints in place (e.g. memory ops can only go into pipe B), saving some work in the reordering hook. (4) The riscv_sched_variable_issue () and riscv_sched_reorder2 () hooks work together to make sure (in order of descending priority) that: (a) the critical path and the instruction priorities are respected; (b) both pipes are filled (taking advantage of parallel dispatch within the microarchitectural constraints); (c) there is as much fusion going on as possible (and the existing fusion pairs are not broken up). There is probably some room for improvement, and some tweaks will probably have to be made in response to HLA changes as the HW development process goes on. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 283 ++++++++++++++++++++++++++++++++++---- 1 file changed, 259 insertions(+), 24 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 805f888619c0..6305e4c9d892 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -340,6 +340,12 @@ unsigned riscv_stack_boundary; /* Whether in riscv_output_mi_thunk. */ static bool riscv_in_thunk_func = false; +static int alu_pipe_scheduled_p; +static int pipeB_scheduled_p; + +static rtx_insn *last_scheduled_insn; +static short cached_can_issue_more; + /* If non-zero, this is an offset to be added to SP to redefine the CFA when restoring the FP register from the stack. Only valid when generating the epilogue. */ @@ -10660,6 +10666,21 @@ riscv_issue_rate (void) static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) { + /* Beginning of cycle - reset variables. */ + if (more == tune_param->issue_rate) + { + alu_pipe_scheduled_p = 0; + pipeB_scheduled_p = 0; + } + + if (alu_pipe_scheduled_p && pipeB_scheduled_p) + { + cached_can_issue_more = 0; + return 0; + } + + cached_can_issue_more = more; + if (DEBUG_INSN_P (insn)) return more; @@ -10680,6 +10701,28 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); + if (next_insn (insn) && INSN_P (next_insn (insn)) + && SCHED_GROUP_P (next_insn (insn))) + { + if (get_attr_type (insn) == TYPE_LOAD + || get_attr_type (insn) == TYPE_STORE + || get_attr_type (next_insn (insn)) == TYPE_LOAD + || get_attr_type (next_insn (insn)) == TYPE_STORE) + pipeB_scheduled_p = 1; + else + alu_pipe_scheduled_p = 1; + } + + if (get_attr_type (insn) == TYPE_ALU_FUSED + || get_attr_type (insn) == TYPE_IMUL_FUSED) + { + alu_pipe_scheduled_p = 1; + more -= 1; + } + + last_scheduled_insn = insn; + cached_can_issue_more = more - 1; + return more - 1; } @@ -10918,15 +10961,35 @@ arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) static bool arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) { + /* Never create sched groups with more than 2 members. */ + if (SCHED_GROUP_P (prev)) + return false; + rtx prev_set = single_set (prev); rtx curr_set = single_set (curr); - /* prev and curr are simple SET insns i.e. no flag setting or branching. */ - bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); - /* Fuse load-immediate with a dependent conditional branch. */ - if (get_attr_type (prev) == TYPE_MOVE - && get_attr_move_type (prev) == MOVE_TYPE_CONST - && any_condjump_p (curr)) + /* Fuse multiply-add pair. */ + if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT + && GET_CODE (SET_SRC (curr_set)) == PLUS + && (REG_P (XEXP (SET_SRC (curr_set), 0)) + && REGNO (SET_DEST (prev_set)) == + REGNO (XEXP (SET_SRC (curr_set), 0)) + || (REG_P (XEXP (SET_SRC (curr_set), 1)) + && REGNO (SET_DEST (prev_set)) == + REGNO (XEXP (SET_SRC (curr_set), 1))))) + return true; + + /* Fuse logical shift left with logical shift right (bit-extract pattern). */ + if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT + && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT + && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) + && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) + return true; + + /* Fuse load-immediate with a dependent conditional branch. */ + if (get_attr_type (prev) == TYPE_MOVE + && get_attr_move_type (prev) == MOVE_TYPE_CONST + && any_condjump_p (curr)) { rtx comp = XEXP (SET_SRC (curr_set), 0); @@ -10934,6 +10997,13 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set)); } + /* Do not fuse loads/stores before sched2. */ + if (!reload_completed || sched_fusion) + return false; + + /* prev and curr are simple SET insns i.e. no flag setting or branching. */ + bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); + /* Don't handle anything with a jump past this point. */ if (!simple_sets_p) return false; @@ -10959,6 +11029,30 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return true; } + /* Look ahead 1 insn to make sure double loads/stores are always + fused together, even in the presence of other opportunities. */ + if (next_insn (curr) && single_set (next_insn (curr)) + && get_attr_type (curr) == TYPE_LOAD + && get_attr_type (next_insn (curr)) == TYPE_LOAD) + { + rtx addr0 = XEXP (SET_SRC (curr_set), 0); + rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0); + + if (arcv_fused_addr_p (addr0, addr1)) + return false; + } + + if (next_insn (curr) && single_set (next_insn (curr)) + && get_attr_type (curr) == TYPE_STORE + && get_attr_type (next_insn (curr)) == TYPE_STORE) + { + rtx addr0 = XEXP (SET_DEST (curr_set), 0); + rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0); + + if (arcv_fused_addr_p (addr0, addr1)) + return false; + } + /* Fuse a pre- or post-update memory operation. */ if (arcv_memop_arith_pair_p (prev, curr) || arcv_memop_arith_pair_p (curr, prev)) @@ -10979,20 +11073,6 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set))))) return true; - if (GET_CODE (SET_SRC (prev_set)) == MULT - && GET_CODE (SET_SRC (curr_set)) == PLUS - && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) - && (REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)) - || REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1)))) - return true; - - /* Fuse logical shift left with logical shift right (bit-extract pattern). */ - if (GET_CODE (SET_SRC (prev_set)) == ASHIFT - && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT - && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) - && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) - return true; - return false; } @@ -11714,17 +11794,21 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, we currently only perform the adjustment when -madjust-lmul-cost is given. */ static int -riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost, - unsigned int) +riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, + int cost, unsigned int) { + if (riscv_is_micro_arch (arcv_rhx100) && dep_type == REG_DEP_ANTI + && !SCHED_GROUP_P (insn)) + return cost + 1; + /* Only do adjustments for the generic out-of-order scheduling model. */ if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo) return cost; - if (recog_memoized (insn) < 0) + if (recog_memoized (dep_insn) < 0) return cost; - enum attr_type type = get_attr_type (insn); + enum attr_type type = get_attr_type (dep_insn); if (type == TYPE_VFREDO || type == TYPE_VFWREDO) { @@ -11775,6 +11859,7 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost, return new_cost; } + /* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can can be scheduled for speculative execution. Reject vsetvl instructions to prevent the scheduler from hoisting them out of basic blocks without @@ -11796,6 +11881,149 @@ riscv_sched_can_speculate_insn (rtx_insn *insn) } } +static void +riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, + int verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ + last_scheduled_insn = 0; +} + +static int +riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, + int verbose ATTRIBUTE_UNUSED, + rtx_insn **ready, + int *n_readyp, + int clock ATTRIBUTE_UNUSED) +{ + if (sched_fusion) + return cached_can_issue_more; + + if (!cached_can_issue_more) + return 0; + + /* Fuse double load/store instances missed by sched_fusion. */ + if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0 + && !SCHED_GROUP_P (last_scheduled_insn) + && (get_attr_type (last_scheduled_insn) == TYPE_LOAD + || get_attr_type (last_scheduled_insn) == TYPE_STORE)) + { + for (int i = 1; i <= *n_readyp; i++) + { + if (NONDEBUG_INSN_P (ready[*n_readyp - i]) + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && (!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) + && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) + { + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; + pipeB_scheduled_p = 1; + return cached_can_issue_more; + } + } + pipeB_scheduled_p = 1; + } + + /* Try to fuse a non-memory last_scheduled_insn. */ + if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p) + && last_scheduled_insn && ready && *n_readyp > 0 + && !SCHED_GROUP_P (last_scheduled_insn) + && (get_attr_type (last_scheduled_insn) != TYPE_LOAD + && get_attr_type (last_scheduled_insn) != TYPE_STORE)) + { + for (int i = 1; i <= *n_readyp; i++) + { + if (NONDEBUG_INSN_P (ready[*n_readyp - i]) + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && (!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) + && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) + { + if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE) + if (pipeB_scheduled_p) + continue; + else + pipeB_scheduled_p = 1; + else if (!alu_pipe_scheduled_p) + alu_pipe_scheduled_p = 1; + else + pipeB_scheduled_p = 1; + + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; + return cached_can_issue_more; + } + } + alu_pipe_scheduled_p = 1; + } + + /* When pipe B is scheduled, we can have no more memops this cycle. */ + if (pipeB_scheduled_p && *n_readyp > 0 + && NONDEBUG_INSN_P (ready[*n_readyp - 1]) + && recog_memoized (ready[*n_readyp - 1]) >= 0 + && !SCHED_GROUP_P (ready[*n_readyp - 1]) + && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + { + if (alu_pipe_scheduled_p) + return 0; + + for (int i = 2; i <= *n_readyp; i++) + { + if ((NONDEBUG_INSN_P (ready[*n_readyp - i]) + && recog_memoized (ready[*n_readyp - i]) >= 0 + && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD + && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && ((!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))))) + || ((next_insn (ready[*n_readyp - i]) + && NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + && recog_memoized (next_insn (ready[*n_readyp - i])) >= 0 + && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD + && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE))) + { + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + alu_pipe_scheduled_p = 1; + cached_can_issue_more = 1; + return 1; + } + } + return 0; + } + + /* If all else fails, schedule a single instruction. */ + if (ready && *n_readyp > 0 + && NONDEBUG_INSN_P (ready[*n_readyp - 1]) + && recog_memoized (ready[*n_readyp - 1]) >= 0 + && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD + && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + { + if (!pipeB_scheduled_p + && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + { + alu_pipe_scheduled_p = pipeB_scheduled_p = 1; + cached_can_issue_more = 1; + return 1; + } + else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD + || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + { + alu_pipe_scheduled_p = pipeB_scheduled_p = 1; + cached_can_issue_more = 1; + return 1; + } + } + + return cached_can_issue_more; +} + /* Auxiliary function to emit RISC-V ELF attribute. */ static void riscv_emit_attribute () @@ -16396,9 +16624,16 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost + #undef TARGET_SCHED_CAN_SPECULATE_INSN #define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 riscv_sched_reorder2 + +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT riscv_sched_init + #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall From ee9edc645b898523d4aa2a7168f3917d59879ba1 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 3 Dec 2024 00:49:30 -0800 Subject: [PATCH 16/47] arcv: adjust scheduling priority of memop pairs for RHX-100 This patch implements riscv_sched_adjust_priority () for the RHX-100 microarchitecture by slightly bumping the priority of load/store pairs. As a consequence of this change, it becomes easier for riscv_sched_reorder2 () to schedule instructions in the memory pipe. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 6305e4c9d892..58343c77dcfc 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11881,6 +11881,31 @@ riscv_sched_can_speculate_insn (rtx_insn *insn) } } +static int +riscv_sched_adjust_priority (rtx_insn *insn, int priority) +{ + if (!riscv_is_micro_arch (arcv_rhx100)) + return priority; + + if (DEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return priority; + + /* Bump the priority of fused load-store pairs for easier + scheduling of the memory pipe. The specific increase + value is determined empirically. */ + if (next_insn (insn) && INSN_P (next_insn (insn)) + && SCHED_GROUP_P (next_insn (insn)) + && ((get_attr_type (insn) == TYPE_STORE + && get_attr_type (next_insn (insn)) == TYPE_STORE) + || (get_attr_type (insn) == TYPE_LOAD + && get_attr_type (next_insn (insn)) == TYPE_LOAD))) + return priority + 1; + + return priority; +} + + static void riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, @@ -16624,10 +16649,12 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost - #undef TARGET_SCHED_CAN_SPECULATE_INSN #define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY riscv_sched_adjust_priority + #undef TARGET_SCHED_REORDER2 #define TARGET_SCHED_REORDER2 riscv_sched_reorder2 From 5af3f92fa6fd9cb433f34e17d967fcab9a0a28b9 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Thu, 28 Nov 2024 02:08:08 -0800 Subject: [PATCH 17/47] arcv: fuse LH+LH and LB+LB instruction pairs In addition to the LW+LW and SW+SW pairs that are already being recognized as macro-op-fusable, add support for 8-bit and naturally aligned 16-bit loads operating on adjacent memory locations. To that end, introduce the new microarch-specific pair_fusion_mode_allowed_p () predicate, and call it from fusion_load_store () during sched_fusion, and from arcv_macro_fusion_pair_p () during regular scheduling passes. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.cc | 115 ++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 35 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 58343c77dcfc..be29da213a17 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10818,37 +10818,73 @@ riscv_set_is_shNadduw (rtx set) && REG_P (SET_DEST (set))); } +/* Return TRUE if the target microarchitecture supports macro-op + fusion for two memory operations of mode MODE (the direction + of transfer is determined by the IS_LOAD parameter). */ + +static bool +pair_fusion_mode_allowed_p (machine_mode mode, bool is_load) +{ + if (!riscv_is_micro_arch (arcv_rhx100)) + return true; + + return ((is_load && (mode == SImode + || mode == HImode + || mode == QImode)) + || (!is_load && mode == SImode)); +} + /* Return TRUE if two addresses can be fused. */ static bool -arcv_fused_addr_p (rtx addr0, rtx addr1) +arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load) { rtx base0, base1, tmp; HOST_WIDE_INT off0 = 0, off1 = 0; - if (GET_CODE (addr0) == PLUS) + if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND) + addr0 = XEXP (addr0, 0); + + if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND) + addr1 = XEXP (addr1, 0); + + if (!MEM_P (addr0) || !MEM_P (addr1)) + return false; + + /* Require the accesses to have the same mode. */ + if (GET_MODE (addr0) != GET_MODE (addr1)) + return false; + + /* Check if the mode is allowed. */ + if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load)) + return false; + + rtx reg0 = XEXP (addr0, 0); + rtx reg1 = XEXP (addr1, 0); + + if (GET_CODE (reg0) == PLUS) { - base0 = XEXP (addr0, 0); - tmp = XEXP (addr0, 1); + base0 = XEXP (reg0, 0); + tmp = XEXP (reg0, 1); if (!CONST_INT_P (tmp)) return false; off0 = INTVAL (tmp); } - else if (REG_P (addr0)) - base0 = addr0; + else if (REG_P (reg0)) + base0 = reg0; else return false; - if (GET_CODE (addr1) == PLUS) + if (GET_CODE (reg1) == PLUS) { - base1 = XEXP (addr1, 0); - tmp = XEXP (addr1, 1); + base1 = XEXP (reg1, 0); + tmp = XEXP (reg1, 1); if (!CONST_INT_P (tmp)) return false; off1 = INTVAL (tmp); } - else if (REG_P (addr1)) - base1 = addr1; + else if (REG_P (reg1)) + base1 = reg1; else return false; @@ -10857,9 +10893,9 @@ arcv_fused_addr_p (rtx addr0, rtx addr1) if (REGNO (base0) != REGNO (base1)) return false; - /* Offsets have to be aligned to word boundary and adjacent in memory, - but the memory operations can be narrower. */ - if ((off0 % UNITS_PER_WORD == 0) && (abs (off1 - off0) == UNITS_PER_WORD)) + /* Fuse adjacent aligned addresses. */ + if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0) + && (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant ())) return true; return false; @@ -11012,20 +11048,14 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) if (get_attr_type (prev) == TYPE_LOAD && get_attr_type (curr) == TYPE_LOAD) { - rtx addr0 = XEXP (SET_SRC (prev_set), 0); - rtx addr1 = XEXP (SET_SRC (curr_set), 0); - - if (arcv_fused_addr_p (addr0, addr1)) + if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true)) return true; } if (get_attr_type (prev) == TYPE_STORE && get_attr_type (curr) == TYPE_STORE) { - rtx addr0 = XEXP (SET_DEST (prev_set), 0); - rtx addr1 = XEXP (SET_DEST (curr_set), 0); - - if (arcv_fused_addr_p (addr0, addr1)) + if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false)) return true; } @@ -11035,10 +11065,9 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && get_attr_type (curr) == TYPE_LOAD && get_attr_type (next_insn (curr)) == TYPE_LOAD) { - rtx addr0 = XEXP (SET_SRC (curr_set), 0); - rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0); - - if (arcv_fused_addr_p (addr0, addr1)) + if (arcv_fused_addr_p (SET_SRC (curr_set), + SET_SRC (single_set (next_insn (curr))), + true)) return false; } @@ -11046,10 +11075,9 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && get_attr_type (curr) == TYPE_STORE && get_attr_type (next_insn (curr)) == TYPE_STORE) { - rtx addr0 = XEXP (SET_DEST (curr_set), 0); - rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0); - - if (arcv_fused_addr_p (addr0, addr1)) + if (arcv_fused_addr_p (SET_DEST (curr_set), + SET_DEST (single_set (next_insn (curr))), + false)) return false; } @@ -11720,7 +11748,8 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) otherwise return FALSE. */ static bool -fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load) +fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode, + bool *is_load) { rtx x, dest, src; @@ -11731,15 +11760,22 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load) src = SET_SRC (x); dest = SET_DEST (x); + + if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND) + && MEM_P (XEXP (src, 0))) + src = XEXP (src, 0); + if (REG_P (src) && MEM_P (dest)) { *is_load = false; - extract_base_offset_in_addr (dest, base, offset); + if (extract_base_offset_in_addr (dest, base, offset)) + *mode = GET_MODE (dest); } else if (MEM_P (src) && REG_P (dest)) { *is_load = true; - extract_base_offset_in_addr (src, base, offset); + if (extract_base_offset_in_addr (src, base, offset)) + *mode = GET_MODE (src); } else return false; @@ -11754,11 +11790,13 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, int tmp, off_val; bool is_load; rtx base, offset; + machine_mode mode = SImode; gcc_assert (INSN_P (insn)); tmp = max_pri - 1; - if (!fusion_load_store (insn, &base, &offset, &is_load)) + if (!fusion_load_store (insn, &base, &offset, &mode, &is_load) + || !pair_fusion_mode_allowed_p (mode, is_load)) { *pri = tmp; *fusion_pri = tmp; @@ -11767,6 +11805,11 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, tmp /= 2; + if (mode == HImode) + tmp /= 2; + else if (mode == QImode) + tmp /= 4; + /* INSN with smaller base register goes first. */ tmp -= ((REGNO (base) & 0xff) << 20); @@ -11775,7 +11818,9 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, /* Put loads/stores operating on adjacent words into the same * scheduling group. */ - *fusion_pri = tmp - ((off_val / (UNITS_PER_WORD * 2)) << 1) + is_load; + *fusion_pri = tmp + - ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1) + + is_load; if (off_val >= 0) tmp -= (off_val & 0xfffff); From 5ea894c26f7570e8e95cc8d574ba5fdc8c26deea Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 6 May 2025 00:06:35 -0700 Subject: [PATCH 18/47] arcv: do not emit 64-bit MAC pairs for 32-bit data Currently on ARC-V, the maddhisi3 pattern always expands to the madd_split_fused instruction regardless of the target word size, which leads to the full-width mul and add instructions being emitted for 32-bit data even on riscv64: mul a6,a4,s6 add a6,a6,s7 sext.w s7,a6 To fix this, add another define_insn (madd_split_fused_extended) pattern wrapping the result of a MAC operation into a sign-extension from 32 to 64 bits, and use it in the (u)maddhisi3 expander in case of a 64-bit target. The assembly code after this change is more efficient, viz.: mulw a6,a4,s6 addw a6,a6,s7 Signed-off-by: Artemiy Volkov --- gcc/config/riscv/riscv.md | 55 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index eece6f1c17e7..3987823be2d3 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4510,7 +4510,21 @@ rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); emit_insn (gen_extendhisi2 (tmp0, operands[1])); emit_insn (gen_extendhisi2 (tmp1, operands[2])); - emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); + + if (TARGET_64BIT) + { + rtx op0 = gen_reg_rtx (DImode); + emit_insn (gen_madd_split_fused_extended (op0, tmp0, tmp1, operands[3])); + op0 = gen_lowpart (SImode, op0); + SUBREG_PROMOTED_VAR_P (op0) = 1; + SUBREG_PROMOTED_SET (op0, SRP_SIGNED); + emit_move_insn (operands[0], op0); + } + else + { + emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); + } + DONE; } } @@ -4528,7 +4542,21 @@ rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); emit_insn (gen_zero_extendhisi2 (tmp0, operands[1])); emit_insn (gen_zero_extendhisi2 (tmp1, operands[2])); - emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); + + if (TARGET_64BIT) + { + rtx op0 = gen_reg_rtx (DImode); + emit_insn (gen_madd_split_fused_extended (op0, tmp0, tmp1, operands[3])); + op0 = gen_lowpart (SImode, op0); + SUBREG_PROMOTED_VAR_P (op0) = 1; + SUBREG_PROMOTED_SET (op0, SRP_SIGNED); + emit_move_insn (operands[0], op0); + } + else + { + emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); + } + DONE; } ) @@ -4564,6 +4592,29 @@ [(set_attr "type" "imul_fused")] ) +(define_insn "madd_split_fused_extended" + [(set (match_operand:DI 0 "register_operand" "=&r,r") + (sign_extend:DI + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "register_operand" "r,r")) + (match_operand:SI 3 "register_operand" "r,?0")))) + (clobber (match_scratch:SI 4 "=&r,&r"))] + "arcv_micro_arch_supports_fusion_p () + && (TARGET_ZMMUL || TARGET_MUL)" + { + if (REGNO (operands[0]) == REGNO (operands[3])) + { + return "mulw\t%4,%1,%2\n\taddw\t%4,%3,%4\n\tmv\t%0,%4"; + } + else + { + return "mulw\t%0,%1,%2\n\taddw\t%0,%0,%3"; + } + } + [(set_attr "type" "imul_fused")] +) + (define_insn "*zero_extract_fused" [(set (match_operand:SI 0 "register_operand" "=r") (zero_extract:SI (match_operand:SI 1 "register_operand" "r") From 6091a48da01dd7a60e868cf9be9a7f1139c39369 Mon Sep 17 00:00:00 2001 From: Michiel Derhaeg Date: Thu, 17 Jul 2025 05:45:37 -0700 Subject: [PATCH 19/47] arcv: Disable *3 when fusion is available This define_insn_and_split prevents *zero_extract_fused from being selected. Updated the test. It succeeded despite the fused case not being selected because the right instructions were produced still. Signed-off-by: Michiel Derhaeg --- gcc/config/riscv/iterators.md | 2 ++ gcc/config/riscv/riscv.md | 5 ++++- gcc/doc/riscv-mtune.texi | 2 ++ gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 35de17f76cd9..df979031cd88 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -218,6 +218,8 @@ (zero_extract "srliw")]) (define_code_attr extract_shift [(sign_extract "ashiftrt") (zero_extract "lshiftrt")]) +(define_code_attr is_zero_extract [(sign_extract "false") + (zero_extract "true")]) ;; This code iterator allows the two right shift instructions to be ;; generated from the same template. diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 3987823be2d3..b60daf217d2c 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3101,6 +3101,7 @@ ;; * Single-bit extraction (SFB) ;; * Extraction instruction th.ext(u) (XTheadBb) ;; * lshrsi3_extend_2 (see above) +;; * Zero extraction fusion (ARC-V) (define_insn_and_split "*3" [(set (match_operand:GPR 0 "register_operand" "=r") (any_extract:GPR @@ -3113,6 +3114,8 @@ && (INTVAL (operands[2]) == 1)) && !TARGET_XTHEADBB && !TARGET_XANDESPERF + && !(riscv_is_micro_arch (arcv_rhx100) + && ) && !(TARGET_64BIT && (INTVAL (operands[3]) > 0) && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))" @@ -4600,7 +4603,7 @@ (match_operand:SI 2 "register_operand" "r,r")) (match_operand:SI 3 "register_operand" "r,?0")))) (clobber (match_scratch:SI 4 "=&r,&r"))] - "arcv_micro_arch_supports_fusion_p () + "riscv_is_micro_arch (arcv_rhx100) && (TARGET_ZMMUL || TARGET_MUL)" { if (REGNO (operands[0]) == REGNO (operands[3])) diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi index 63a01db67726..8ffb3db906fe 100644 --- a/gcc/doc/riscv-mtune.texi +++ b/gcc/doc/riscv-mtune.texi @@ -52,6 +52,8 @@ particular CPU name. Permissible values for this option are: @samp{arc-v-rmx-100-series}, +@samp{arc-v-rhx-100-series}, + @samp{generic-ooo}, @samp{size}, diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c index 010038b52c96..7abf54ec1448 100644 --- a/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c +++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target rv32 } */ /* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-Oz" "-Os" } } */ -/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im_zbs -mabi=ilp32" } */ +/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im_zbs -mabi=ilp32 -dp" } */ #define bit_extract(x,start,amt) (((x)>>(start)) & (~(0xffffffff << (amt)))) @@ -11,4 +11,4 @@ f (int x) return bit_extract(x,10,14) + bit_extract(x,1,1); } -/* { dg-final { scan-assembler {\sslli\s([ast][0-9]+),a0,8\n\ssrli\s([ast][0-9]+),\1,18\n\sbexti\sa0,a0,1\n\sadd\sa0,\2,a0\n} } } */ +/* { dg-final { scan-assembler {\sslli\s([ast][0-9]+),a0,8.*zero_extract_fused\n\ssrli\s([ast][0-9]+),\1,18\n\sbexti\sa0,a0,1.*\n\sadd\sa0,\2,a0.*\n} } } */ From 0912ccf068050076a15ee33079dbae563744746b Mon Sep 17 00:00:00 2001 From: Michiel Derhaeg Date: Wed, 15 Oct 2025 10:10:21 +0200 Subject: [PATCH 20/47] fixup! arcv: Add initial scheduling scheme. --- gcc/config/riscv/riscv.md | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index b60daf217d2c..67e8e76d725b 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -673,7 +673,6 @@ ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100,arcv_rhx100" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,arcv_rhx100,generic_ooo" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. From 1698738dab5f1fb0fc355ebfcf6b74ff63eb62a4 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 12 Nov 2025 16:13:18 +0000 Subject: [PATCH 21/47] tmp: fusion: add arcv.{cc,h} files. Signed-off-by: Luis Silva --- gcc/config.gcc | 2 +- gcc/config/riscv/arcv.cc | 690 ++++++++++++++++++++++++++++++++ gcc/config/riscv/arcv.h | 34 ++ gcc/config/riscv/riscv-protos.h | 3 + gcc/config/riscv/riscv.cc | 600 ++------------------------- gcc/config/riscv/t-riscv | 10 + 6 files changed, 764 insertions(+), 575 deletions(-) create mode 100644 gcc/config/riscv/arcv.cc create mode 100644 gcc/config/riscv/arcv.h diff --git a/gcc/config.gcc b/gcc/config.gcc index c678b801f705..2c568d05077c 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -559,7 +559,7 @@ riscv*) extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o" extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o riscv-avlprop.o riscv-vect-permconst.o" extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o sifive-vector-builtins-bases.o andes-vector-builtins-bases.o" - extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-bclr-lowest-set-bit.o" + extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-bclr-lowest-set-bit.o arcv.o" d_target_objs="riscv-d.o" extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h riscv_th_vector.h sifive_vector.h andes_vector.h" target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc" diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc new file mode 100644 index 000000000000..8674ab1cb674 --- /dev/null +++ b/gcc/config/riscv/arcv.cc @@ -0,0 +1,690 @@ +/* ARCV-specific macro-op fusion for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "tm.h" +#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "stor-layout.h" +#include "alias.h" +#include "fold-const.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "explow.h" +#include "calls.h" +#include "varasm.h" +#include "expr.h" +#include "tm_p.h" +#include "df.h" +#include "reload.h" +#include "sched-int.h" +#include "tm-constrs.h" +#include "arcv.h" + +/* Scheduler state tracking for dual-pipe ARCV architectures. */ +static int alu_pipe_scheduled_p; +static int pipeB_scheduled_p; +static rtx_insn *last_scheduled_insn; +static short cached_can_issue_more; + +/* Implement one boolean function for each of the values of the + arcv_mpy_option enum, for the needs of rhx100.md. */ + +bool +arcv_mpy_1c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_1C; +} + +bool +arcv_mpy_2c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_2C; +} + +bool +arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_10C; +} + +/* Return TRUE if the target microarchitecture supports macro-op + fusion for two memory operations of mode MODE (the direction + of transfer is determined by the IS_LOAD parameter). */ + +static bool +pair_fusion_mode_allowed_p (machine_mode mode, bool is_load) +{ + if (!riscv_is_micro_arch (arcv_rhx100)) + return true; + + return ((is_load && (mode == SImode + || mode == HImode + || mode == QImode)) + || (!is_load && mode == SImode)); +} + +/* Return TRUE if two addresses can be fused. */ + +static bool +arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load) +{ + rtx base0, base1, tmp; + HOST_WIDE_INT off0 = 0, off1 = 0; + + if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND) + addr0 = XEXP (addr0, 0); + + if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND) + addr1 = XEXP (addr1, 0); + + if (!MEM_P (addr0) || !MEM_P (addr1)) + return false; + + /* Require the accesses to have the same mode. */ + if (GET_MODE (addr0) != GET_MODE (addr1)) + return false; + + /* Check if the mode is allowed. */ + if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load)) + return false; + + rtx reg0 = XEXP (addr0, 0); + rtx reg1 = XEXP (addr1, 0); + + if (GET_CODE (reg0) == PLUS) + { + base0 = XEXP (reg0, 0); + tmp = XEXP (reg0, 1); + if (!CONST_INT_P (tmp)) + return false; + off0 = INTVAL (tmp); + } + else if (REG_P (reg0)) + base0 = reg0; + else + return false; + + if (GET_CODE (reg1) == PLUS) + { + base1 = XEXP (reg1, 0); + tmp = XEXP (reg1, 1); + if (!CONST_INT_P (tmp)) + return false; + off1 = INTVAL (tmp); + } + else if (REG_P (reg1)) + base1 = reg1; + else + return false; + + /* Check if we have the same base. */ + gcc_assert (REG_P (base0) && REG_P (base1)); + if (REGNO (base0) != REGNO (base1)) + return false; + + /* Fuse adjacent aligned addresses. */ + if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0) + && (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant ())) + return true; + + return false; +} + + +/* Return true if PREV and CURR constitute an ordered load/store + op/opimm + pair, for the purposes of ARCV-specific macro-op fusion. */ +static bool +arcv_memop_arith_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + gcc_assert (prev_set); + gcc_assert (curr_set); + + /* Fuse load/store + register post-{inc,dec}rement: + * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X rs1) (const_int)))) + * or + * prev (st) == (set (mem:X (plus:X (reg:X rs1) (const_int))) (reg:X rs2)) + * ... + */ + if ((get_attr_type (curr) == TYPE_ARITH + || get_attr_type (curr) == TYPE_LOGICAL + || get_attr_type (curr) == TYPE_SHIFT + || get_attr_type (curr) == TYPE_SLT + || get_attr_type (curr) == TYPE_BITMANIP + || get_attr_type (curr) == TYPE_MIN + || get_attr_type (curr) == TYPE_MAX + || get_attr_type (curr) == TYPE_MINU + || get_attr_type (curr) == TYPE_MAXU + || get_attr_type (curr) == TYPE_CLZ + || get_attr_type (curr) == TYPE_CTZ) + && (CONST_INT_P (SET_SRC (curr_set)) + || REG_P (XEXP (SET_SRC (curr_set), 0))) + && ((get_attr_type (prev) == TYPE_LOAD + && REG_P (XEXP (SET_SRC (prev_set), 0)) + && REGNO (XEXP (SET_SRC (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (prev_set), 0)) + != REGNO (SET_DEST (prev_set)) + && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)) + && (/* (set (reg:X rd1) (not (reg:X rs1))) */ + GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 + /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ + || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) + /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ + || REGNO (SET_DEST (prev_set)) + != REGNO (XEXP (SET_SRC (curr_set), 1)))) + || (get_attr_type (prev) == TYPE_STORE + && REG_P (XEXP (SET_DEST (prev_set), 0)) + && REGNO (XEXP (SET_DEST (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 0)) + && (/* (set (reg:X rd1) (not (reg:X rs1))) */ + GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 + /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ + || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) + /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ + || REGNO (XEXP (SET_DEST (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 1)))))) + return true; + + return false; +} + + +/* Return true if PREV and CURR constitute an ordered load/store + lui pair, for + the purposes of ARCV-specific macro-op fusion. */ +static bool +arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + gcc_assert (prev_set); + gcc_assert (curr_set); + + /* Fuse load/store with lui: + * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X) (const_int)))) + * or + * prev (st) == (set (mem:X (plus:X (reg:X) (const_int))) (reg:X rD)) + * + * curr (lui) == (set (reg:X rd2) (const_int UPPER_IMM_20)) + */ + if (REG_P (curr) + && ((get_attr_type (curr) == TYPE_MOVE + && GET_CODE (SET_SRC (curr_set)) == HIGH) + || (CONST_INT_P (SET_SRC (curr_set)) + && LUI_OPERAND (INTVAL (SET_SRC (curr_set))))) + && ((get_attr_type (prev) == TYPE_LOAD + && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set))) + || get_attr_type (prev) == TYPE_STORE)) + return true; + + return false; +} + + +/* Return true if PREV and CURR should be kept together during scheduling. */ + +bool +arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + /* Never create sched groups with more than 2 members. */ + if (SCHED_GROUP_P (prev)) + return false; + + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + /* Fuse multiply-add pair. */ + if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT + && GET_CODE (SET_SRC (curr_set)) == PLUS + && (REG_P (XEXP (SET_SRC (curr_set), 0)) + && REGNO (SET_DEST (prev_set)) == + REGNO (XEXP (SET_SRC (curr_set), 0)) + || (REG_P (XEXP (SET_SRC (curr_set), 1)) + && REGNO (SET_DEST (prev_set)) == + REGNO (XEXP (SET_SRC (curr_set), 1))))) + return true; + + /* Fuse logical shift left with logical shift right (bit-extract pattern). */ + if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT + && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT + && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) + && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) + return true; + + /* Fuse load-immediate with a dependent conditional branch. */ + if (get_attr_type (prev) == TYPE_MOVE + && get_attr_move_type (prev) == MOVE_TYPE_CONST + && any_condjump_p (curr)) + { + rtx comp = XEXP (SET_SRC (curr_set), 0); + + return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set)) + || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set)); + } + + /* Do not fuse loads/stores before sched2. */ + if (!reload_completed || sched_fusion) + return false; + + /* prev and curr are simple SET insns i.e. no flag setting or branching. */ + bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); + + /* Don't handle anything with a jump past this point. */ + if (!simple_sets_p) + return false; + + /* Fuse adjacent loads and stores. */ + if (get_attr_type (prev) == TYPE_LOAD + && get_attr_type (curr) == TYPE_LOAD) + { + if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true)) + return true; + } + + if (get_attr_type (prev) == TYPE_STORE + && get_attr_type (curr) == TYPE_STORE) + { + if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false)) + return true; + } + + /* Look ahead 1 insn to make sure double loads/stores are always + fused together, even in the presence of other opportunities. */ + if (next_insn (curr) && single_set (next_insn (curr)) + && get_attr_type (curr) == TYPE_LOAD + && get_attr_type (next_insn (curr)) == TYPE_LOAD) + { + if (arcv_fused_addr_p (SET_SRC (curr_set), + SET_SRC (single_set (next_insn (curr))), + true)) + return false; + } + + if (next_insn (curr) && single_set (next_insn (curr)) + && get_attr_type (curr) == TYPE_STORE + && get_attr_type (next_insn (curr)) == TYPE_STORE) + { + if (arcv_fused_addr_p (SET_DEST (curr_set), + SET_DEST (single_set (next_insn (curr))), + false)) + return false; + } + + /* Fuse a pre- or post-update memory operation. */ + if (arcv_memop_arith_pair_p (prev, curr) + || arcv_memop_arith_pair_p (curr, prev)) + return true; + + /* Fuse a memory operation preceded or followed by a lui. */ + if (arcv_memop_lui_pair_p (prev, curr) + || arcv_memop_lui_pair_p (curr, prev)) + return true; + + /* Fuse load-immediate with a store of the destination register. */ + if (get_attr_type (prev) == TYPE_MOVE + && get_attr_move_type (prev) == MOVE_TYPE_CONST + && get_attr_type (curr) == TYPE_STORE + && ((REG_P (SET_SRC (curr_set)) + && SET_DEST (prev_set) == SET_SRC (curr_set)) + || (SUBREG_P (SET_SRC (curr_set)) + && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set))))) + return true; + + return false; +} + +/* Initialize ARCV scheduler state at the beginning of scheduling. */ + +void +arcv_sched_init (void) +{ + last_scheduled_insn = 0; +} + + +/* Try to reorder ready queue to promote ARCV fusion opportunities. + Returns the number of instructions that can be issued this cycle. */ + + +int +arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) +{ + if (sched_fusion) + return cached_can_issue_more; + + if (!cached_can_issue_more) + return 0; + + /* Fuse double load/store instances missed by sched_fusion. */ + if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0 + && !SCHED_GROUP_P (last_scheduled_insn) + && (get_attr_type (last_scheduled_insn) == TYPE_LOAD + || get_attr_type (last_scheduled_insn) == TYPE_STORE)) + { + for (int i = 1; i <= *n_readyp; i++) + { + if (NONDEBUG_INSN_P (ready[*n_readyp - i]) + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && (!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) + && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) + { + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; + pipeB_scheduled_p = 1; + return cached_can_issue_more; + } + } + pipeB_scheduled_p = 1; + } + + /* Try to fuse a non-memory last_scheduled_insn. */ + if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p) + && last_scheduled_insn && ready && *n_readyp > 0 + && !SCHED_GROUP_P (last_scheduled_insn) + && (get_attr_type (last_scheduled_insn) != TYPE_LOAD + && get_attr_type (last_scheduled_insn) != TYPE_STORE)) + { + for (int i = 1; i <= *n_readyp; i++) + { + if (NONDEBUG_INSN_P (ready[*n_readyp - i]) + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && (!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) + && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) + { + if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE) + if (pipeB_scheduled_p) + continue; + else + pipeB_scheduled_p = 1; + else if (!alu_pipe_scheduled_p) + alu_pipe_scheduled_p = 1; + else + pipeB_scheduled_p = 1; + + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; + return cached_can_issue_more; + } + } + alu_pipe_scheduled_p = 1; + } + + /* When pipe B is scheduled, we can have no more memops this cycle. */ + if (pipeB_scheduled_p && *n_readyp > 0 + && NONDEBUG_INSN_P (ready[*n_readyp - 1]) + && recog_memoized (ready[*n_readyp - 1]) >= 0 + && !SCHED_GROUP_P (ready[*n_readyp - 1]) + && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + { + if (alu_pipe_scheduled_p) + return 0; + + for (int i = 2; i <= *n_readyp; i++) + { + if ((NONDEBUG_INSN_P (ready[*n_readyp - i]) + && recog_memoized (ready[*n_readyp - i]) >= 0 + && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD + && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && ((!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))))) + || ((next_insn (ready[*n_readyp - i]) + && NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + && recog_memoized (next_insn (ready[*n_readyp - i])) >= 0 + && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD + && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE))) + { + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + alu_pipe_scheduled_p = 1; + cached_can_issue_more = 1; + return 1; + } + } + return 0; + } + + /* If all else fails, schedule a single instruction. */ + if (ready && *n_readyp > 0 + && NONDEBUG_INSN_P (ready[*n_readyp - 1]) + && recog_memoized (ready[*n_readyp - 1]) >= 0 + && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD + && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + { + if (!pipeB_scheduled_p + && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + { + alu_pipe_scheduled_p = pipeB_scheduled_p = 1; + cached_can_issue_more = 1; + return 1; + } + else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD + || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + { + alu_pipe_scheduled_p = pipeB_scheduled_p = 1; + cached_can_issue_more = 1; + return 1; + } + } + + return cached_can_issue_more; +} + +int +arcv_sched_adjust_priority (rtx_insn *insn, int priority) +{ + if (!riscv_is_micro_arch (arcv_rhx100)) + return priority; + + if (DEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return priority; + + /* Bump the priority of fused load-store pairs for easier + scheduling of the memory pipe. The specific increase + value is determined empirically. */ + if (next_insn (insn) && INSN_P (next_insn (insn)) + && SCHED_GROUP_P (next_insn (insn)) + && ((get_attr_type (insn) == TYPE_STORE + && get_attr_type (next_insn (insn)) == TYPE_STORE) + || (get_attr_type (insn) == TYPE_LOAD + && get_attr_type (next_insn (insn)) == TYPE_LOAD))) + return priority + 1; + + return priority; +} + +/* Adjust scheduling cost for ARCV fusion. */ + +int +arcv_sched_adjust_cost (rtx_insn *insn, int dep_type, int cost) +{ + if (dep_type == REG_DEP_ANTI && !SCHED_GROUP_P (insn)) + return cost + 1; + + return cost; +} + + +/* If INSN is a load or store of address in the form of [base+offset], + extract the two parts and set to BASE and OFFSET. IS_LOAD is set + to TRUE if it's a load. Return TRUE if INSN is such an instruction, + otherwise return FALSE. */ + +static bool +fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode, + bool *is_load) +{ + rtx x, dest, src; + + gcc_assert (INSN_P (insn)); + x = PATTERN (insn); + if (GET_CODE (x) != SET) + return false; + + src = SET_SRC (x); + dest = SET_DEST (x); + + if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND) + && MEM_P (XEXP (src, 0))) + src = XEXP (src, 0); + + if (REG_P (src) && MEM_P (dest)) + { + *is_load = false; + if (extract_base_offset_in_addr (dest, base, offset)) + *mode = GET_MODE (dest); + } + else if (MEM_P (src) && REG_P (dest)) + { + *is_load = true; + if (extract_base_offset_in_addr (src, base, offset)) + *mode = GET_MODE (src); + } + else + return false; + + return (*base != NULL_RTX && *offset != NULL_RTX); +} + + +void +arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, + int *pri) +{ + int tmp, off_val; + bool is_load; + rtx base, offset; + machine_mode mode = SImode; + + gcc_assert (INSN_P (insn)); + + tmp = max_pri - 1; + if (!fusion_load_store (insn, &base, &offset, &mode, &is_load) + || !pair_fusion_mode_allowed_p (mode, is_load)) + { + *pri = tmp; + *fusion_pri = tmp; + return; + } + + tmp /= 2; + + if (mode == HImode) + tmp /= 2; + else if (mode == QImode) + tmp /= 4; + + /* INSN with smaller base register goes first. */ + tmp -= ((REGNO (base) & 0xff) << 20); + + /* INSN with smaller offset goes first. */ + off_val = (int)(INTVAL (offset)); + + /* Put loads/stores operating on adjacent words into the same + * scheduling group. */ + *fusion_pri = tmp + - ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1) + + is_load; + + if (off_val >= 0) + tmp -= (off_val & 0xfffff); + else + tmp += ((- off_val) & 0xfffff); + + *pri = tmp; + return; +} + + +bool +arcv_can_issue_more_p (rtx_insn *insn, int more) +{ + /* Beginning of cycle - reset variables. */ + if (more == riscv_get_tune_param_issue_rate ()) + { + alu_pipe_scheduled_p = 0; + pipeB_scheduled_p = 0; + } + + if (alu_pipe_scheduled_p && pipeB_scheduled_p) + { + cached_can_issue_more = 0; + return false; + } + + cached_can_issue_more = more; + + return true; +} + +int +arcv_sched_variable_issue (rtx_insn *insn, int more) +{ + if (next_insn (insn) && INSN_P (next_insn (insn)) + && SCHED_GROUP_P (next_insn (insn))) + { + if (get_attr_type (insn) == TYPE_LOAD + || get_attr_type (insn) == TYPE_STORE + || get_attr_type (next_insn (insn)) == TYPE_LOAD + || get_attr_type (next_insn (insn)) == TYPE_STORE) + pipeB_scheduled_p = 1; + else + alu_pipe_scheduled_p = 1; + } + + if (get_attr_type (insn) == TYPE_ALU_FUSED + || get_attr_type (insn) == TYPE_IMUL_FUSED) + { + alu_pipe_scheduled_p = 1; + more -= 1; + } + + last_scheduled_insn = insn; + cached_can_issue_more = more - 1; + + return cached_can_issue_more; +} diff --git a/gcc/config/riscv/arcv.h b/gcc/config/riscv/arcv.h new file mode 100644 index 000000000000..01b82b9065f7 --- /dev/null +++ b/gcc/config/riscv/arcv.h @@ -0,0 +1,34 @@ +/* ARCV-specific macro-op fusion for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_RISCV_ARCV_H +#define GCC_RISCV_ARCV_H + +/* ARCV scheduler interface functions. */ +extern bool arcv_can_issue_more_p (rtx_insn *, int); +extern int arcv_sched_variable_issue (rtx_insn *, int); +extern bool arcv_macro_fusion_pair_p (rtx_insn *, rtx_insn *); +extern void arcv_sched_init (void); +extern int arcv_sched_reorder2 (rtx_insn **, int *); +extern int arcv_sched_adjust_priority (rtx_insn *, int); +extern int arcv_sched_adjust_cost (rtx_insn *, int, int); +extern void arcv_sched_fusion_priority (rtx_insn *, int, int *, int *); + +#endif /* GCC_RISCV_ARCV_H */ + diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 0260e7b5acc6..91aafdbd6848 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -858,6 +858,9 @@ extern rtx riscv_prefetch_cookie (rtx, rtx); extern bool riscv_prefetch_offset_address_p (rtx, machine_mode); struct riscv_tune_param; + +extern int riscv_get_tune_param_issue_rate (void); + /* Information about one micro-arch we know about. */ struct riscv_tune_info { /* This micro-arch canonical name. */ diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index be29da213a17..288fdd2b75e1 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. If not see #include "target-def.h" #include "riscv-vector-costs.h" #include "riscv-subset.h" +#include "arcv.h" /* Target variants that support full conditional move. */ #define TARGET_COND_MOV \ @@ -340,12 +341,6 @@ unsigned riscv_stack_boundary; /* Whether in riscv_output_mi_thunk. */ static bool riscv_in_thunk_func = false; -static int alu_pipe_scheduled_p; -static int pipeB_scheduled_p; - -static rtx_insn *last_scheduled_insn; -static short cached_can_issue_more; - /* If non-zero, this is an offset to be added to SP to redefine the CFA when restoring the FP register from the stack. Only valid when generating the epilogue. */ @@ -915,6 +910,12 @@ riscv_is_micro_arch (enum riscv_microarchitecture_type arch) return (riscv_microarchitecture == arch); } +int +riscv_get_tune_param_issue_rate (void) +{ + return tune_param->issue_rate; +} + void riscv_frame_info::reset(void) { total_size = 0; @@ -10406,30 +10407,6 @@ riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) return store_data_bypass_p (out_insn, in_insn); } -/* Implement one boolean function for each of the values of the - arcv_mpy_option enum, for the needs of rhx100.md. */ - -bool -arcv_mpy_1c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, - rtx_insn *in_insn ATTRIBUTE_UNUSED) -{ - return arcv_mpy_option == ARCV_MPY_OPTION_1C; -} - -bool -arcv_mpy_2c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, - rtx_insn *in_insn ATTRIBUTE_UNUSED) -{ - return arcv_mpy_option == ARCV_MPY_OPTION_2C; -} - -bool -arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, - rtx_insn *in_insn ATTRIBUTE_UNUSED) -{ - return arcv_mpy_option == ARCV_MPY_OPTION_10C; -} - /* Implement TARGET_SECONDARY_MEMORY_NEEDED. When floating-point registers are wider than integer ones, moves between @@ -10666,20 +10643,10 @@ riscv_issue_rate (void) static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) { - /* Beginning of cycle - reset variables. */ - if (more == tune_param->issue_rate) - { - alu_pipe_scheduled_p = 0; - pipeB_scheduled_p = 0; - } - if (alu_pipe_scheduled_p && pipeB_scheduled_p) - { - cached_can_issue_more = 0; + if (riscv_is_micro_arch (arcv_rhx100)) + if (!arcv_can_issue_more_p (insn, more)) return 0; - } - - cached_can_issue_more = more; if (DEBUG_INSN_P (insn)) return more; @@ -10701,27 +10668,8 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); - if (next_insn (insn) && INSN_P (next_insn (insn)) - && SCHED_GROUP_P (next_insn (insn))) - { - if (get_attr_type (insn) == TYPE_LOAD - || get_attr_type (insn) == TYPE_STORE - || get_attr_type (next_insn (insn)) == TYPE_LOAD - || get_attr_type (next_insn (insn)) == TYPE_STORE) - pipeB_scheduled_p = 1; - else - alu_pipe_scheduled_p = 1; - } - - if (get_attr_type (insn) == TYPE_ALU_FUSED - || get_attr_type (insn) == TYPE_IMUL_FUSED) - { - alu_pipe_scheduled_p = 1; - more -= 1; - } - - last_scheduled_insn = insn; - cached_can_issue_more = more - 1; + if (riscv_is_micro_arch (arcv_rhx100)) + return arcv_sched_variable_issue (insn, more); return more - 1; } @@ -10818,292 +10766,6 @@ riscv_set_is_shNadduw (rtx set) && REG_P (SET_DEST (set))); } -/* Return TRUE if the target microarchitecture supports macro-op - fusion for two memory operations of mode MODE (the direction - of transfer is determined by the IS_LOAD parameter). */ - -static bool -pair_fusion_mode_allowed_p (machine_mode mode, bool is_load) -{ - if (!riscv_is_micro_arch (arcv_rhx100)) - return true; - - return ((is_load && (mode == SImode - || mode == HImode - || mode == QImode)) - || (!is_load && mode == SImode)); -} - -/* Return TRUE if two addresses can be fused. */ - -static bool -arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load) -{ - rtx base0, base1, tmp; - HOST_WIDE_INT off0 = 0, off1 = 0; - - if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND) - addr0 = XEXP (addr0, 0); - - if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND) - addr1 = XEXP (addr1, 0); - - if (!MEM_P (addr0) || !MEM_P (addr1)) - return false; - - /* Require the accesses to have the same mode. */ - if (GET_MODE (addr0) != GET_MODE (addr1)) - return false; - - /* Check if the mode is allowed. */ - if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load)) - return false; - - rtx reg0 = XEXP (addr0, 0); - rtx reg1 = XEXP (addr1, 0); - - if (GET_CODE (reg0) == PLUS) - { - base0 = XEXP (reg0, 0); - tmp = XEXP (reg0, 1); - if (!CONST_INT_P (tmp)) - return false; - off0 = INTVAL (tmp); - } - else if (REG_P (reg0)) - base0 = reg0; - else - return false; - - if (GET_CODE (reg1) == PLUS) - { - base1 = XEXP (reg1, 0); - tmp = XEXP (reg1, 1); - if (!CONST_INT_P (tmp)) - return false; - off1 = INTVAL (tmp); - } - else if (REG_P (reg1)) - base1 = reg1; - else - return false; - - /* Check if we have the same base. */ - gcc_assert (REG_P (base0) && REG_P (base1)); - if (REGNO (base0) != REGNO (base1)) - return false; - - /* Fuse adjacent aligned addresses. */ - if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0) - && (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant ())) - return true; - - return false; -} - -/* Return true if PREV and CURR constitute an ordered load/store + op/opimm - pair, for the purposes of ARCV-specific macro-op fusion. */ -static bool -arcv_memop_arith_pair_p (rtx_insn *prev, rtx_insn *curr) -{ - rtx prev_set = single_set (prev); - rtx curr_set = single_set (curr); - - gcc_assert (prev_set); - gcc_assert (curr_set); - - /* Fuse load/store + register post-{inc,dec}rement: - * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X rs1) (const_int)))) - * or - * prev (st) == (set (mem:X (plus:X (reg:X rs1) (const_int))) (reg:X rs2)) - * ... - */ - if ((get_attr_type (curr) == TYPE_ARITH - || get_attr_type (curr) == TYPE_LOGICAL - || get_attr_type (curr) == TYPE_SHIFT - || get_attr_type (curr) == TYPE_SLT - || get_attr_type (curr) == TYPE_BITMANIP - || get_attr_type (curr) == TYPE_MIN - || get_attr_type (curr) == TYPE_MAX - || get_attr_type (curr) == TYPE_MINU - || get_attr_type (curr) == TYPE_MAXU - || get_attr_type (curr) == TYPE_CLZ - || get_attr_type (curr) == TYPE_CTZ) - && (CONST_INT_P (SET_SRC (curr_set)) - || REG_P (XEXP (SET_SRC (curr_set), 0))) - && ((get_attr_type (prev) == TYPE_LOAD - && REG_P (XEXP (SET_SRC (prev_set), 0)) - && REGNO (XEXP (SET_SRC (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 0)) - && REGNO (XEXP (SET_SRC (prev_set), 0)) - != REGNO (SET_DEST (prev_set)) - && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)) - && (/* (set (reg:X rd1) (not (reg:X rs1))) */ - GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 - /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ - || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) - /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ - || REGNO (SET_DEST (prev_set)) - != REGNO (XEXP (SET_SRC (curr_set), 1)))) - || (get_attr_type (prev) == TYPE_STORE - && REG_P (XEXP (SET_DEST (prev_set), 0)) - && REGNO (XEXP (SET_DEST (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 0)) - && (/* (set (reg:X rd1) (not (reg:X rs1))) */ - GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 - /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ - || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) - /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ - || REGNO (XEXP (SET_DEST (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 1)))))) - return true; - - return false; -} - -/* Return true if PREV and CURR constitute an ordered load/store + lui pair, for - the purposes of ARCV-specific macro-op fusion. */ -static bool -arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) -{ - rtx prev_set = single_set (prev); - rtx curr_set = single_set (curr); - - gcc_assert (prev_set); - gcc_assert (curr_set); - - /* Fuse load/store with lui: - * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X) (const_int)))) - * or - * prev (st) == (set (mem:X (plus:X (reg:X) (const_int))) (reg:X rD)) - * - * curr (lui) == (set (reg:X rd2) (const_int UPPER_IMM_20)) - */ - if (REG_P (curr) - && ((get_attr_type (curr) == TYPE_MOVE - && GET_CODE (SET_SRC (curr_set)) == HIGH) - || (CONST_INT_P (SET_SRC (curr_set)) - && LUI_OPERAND (INTVAL (SET_SRC (curr_set))))) - && ((get_attr_type (prev) == TYPE_LOAD - && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set))) - || get_attr_type (prev) == TYPE_STORE)) - return true; - - return false; -} - -/* Return true if PREV and CURR should be kept together during scheduling. */ - -static bool -arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) -{ - /* Never create sched groups with more than 2 members. */ - if (SCHED_GROUP_P (prev)) - return false; - - rtx prev_set = single_set (prev); - rtx curr_set = single_set (curr); - - /* Fuse multiply-add pair. */ - if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT - && GET_CODE (SET_SRC (curr_set)) == PLUS - && (REG_P (XEXP (SET_SRC (curr_set), 0)) - && REGNO (SET_DEST (prev_set)) == - REGNO (XEXP (SET_SRC (curr_set), 0)) - || (REG_P (XEXP (SET_SRC (curr_set), 1)) - && REGNO (SET_DEST (prev_set)) == - REGNO (XEXP (SET_SRC (curr_set), 1))))) - return true; - - /* Fuse logical shift left with logical shift right (bit-extract pattern). */ - if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT - && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT - && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) - && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) - return true; - - /* Fuse load-immediate with a dependent conditional branch. */ - if (get_attr_type (prev) == TYPE_MOVE - && get_attr_move_type (prev) == MOVE_TYPE_CONST - && any_condjump_p (curr)) - { - rtx comp = XEXP (SET_SRC (curr_set), 0); - - return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set)) - || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set)); - } - - /* Do not fuse loads/stores before sched2. */ - if (!reload_completed || sched_fusion) - return false; - - /* prev and curr are simple SET insns i.e. no flag setting or branching. */ - bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); - - /* Don't handle anything with a jump past this point. */ - if (!simple_sets_p) - return false; - - /* Fuse adjacent loads and stores. */ - if (get_attr_type (prev) == TYPE_LOAD - && get_attr_type (curr) == TYPE_LOAD) - { - if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true)) - return true; - } - - if (get_attr_type (prev) == TYPE_STORE - && get_attr_type (curr) == TYPE_STORE) - { - if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false)) - return true; - } - - /* Look ahead 1 insn to make sure double loads/stores are always - fused together, even in the presence of other opportunities. */ - if (next_insn (curr) && single_set (next_insn (curr)) - && get_attr_type (curr) == TYPE_LOAD - && get_attr_type (next_insn (curr)) == TYPE_LOAD) - { - if (arcv_fused_addr_p (SET_SRC (curr_set), - SET_SRC (single_set (next_insn (curr))), - true)) - return false; - } - - if (next_insn (curr) && single_set (next_insn (curr)) - && get_attr_type (curr) == TYPE_STORE - && get_attr_type (next_insn (curr)) == TYPE_STORE) - { - if (arcv_fused_addr_p (SET_DEST (curr_set), - SET_DEST (single_set (next_insn (curr))), - false)) - return false; - } - - /* Fuse a pre- or post-update memory operation. */ - if (arcv_memop_arith_pair_p (prev, curr) - || arcv_memop_arith_pair_p (curr, prev)) - return true; - - /* Fuse a memory operation preceded or followed by a lui. */ - if (arcv_memop_lui_pair_p (prev, curr) - || arcv_memop_lui_pair_p (curr, prev)) - return true; - - /* Fuse load-immediate with a store of the destination register. */ - if (get_attr_type (prev) == TYPE_MOVE - && get_attr_move_type (prev) == MOVE_TYPE_CONST - && get_attr_type (curr) == TYPE_STORE - && ((REG_P (SET_SRC (curr_set)) - && SET_DEST (prev_set) == SET_SRC (curr_set)) - || (SUBREG_P (SET_SRC (curr_set)) - && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set))))) - return true; - - return false; -} - /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR should be kept together during scheduling. */ @@ -11742,93 +11404,19 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return false; } -/* If INSN is a load or store of address in the form of [base+offset], - extract the two parts and set to BASE and OFFSET. IS_LOAD is set - to TRUE if it's a load. Return TRUE if INSN is such an instruction, - otherwise return FALSE. */ - -static bool -fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode, - bool *is_load) -{ - rtx x, dest, src; - - gcc_assert (INSN_P (insn)); - x = PATTERN (insn); - if (GET_CODE (x) != SET) - return false; - - src = SET_SRC (x); - dest = SET_DEST (x); - - if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND) - && MEM_P (XEXP (src, 0))) - src = XEXP (src, 0); - - if (REG_P (src) && MEM_P (dest)) - { - *is_load = false; - if (extract_base_offset_in_addr (dest, base, offset)) - *mode = GET_MODE (dest); - } - else if (MEM_P (src) && REG_P (dest)) - { - *is_load = true; - if (extract_base_offset_in_addr (src, base, offset)) - *mode = GET_MODE (src); - } - else - return false; - - return (*base != NULL_RTX && *offset != NULL_RTX); -} - static void riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, int *pri) { - int tmp, off_val; - bool is_load; - rtx base, offset; - machine_mode mode = SImode; - - gcc_assert (INSN_P (insn)); - - tmp = max_pri - 1; - if (!fusion_load_store (insn, &base, &offset, &mode, &is_load) - || !pair_fusion_mode_allowed_p (mode, is_load)) + if (riscv_is_micro_arch (arcv_rhx100)) { - *pri = tmp; - *fusion_pri = tmp; + arcv_sched_fusion_priority (insn, max_pri, fusion_pri, pri); return; } - tmp /= 2; - - if (mode == HImode) - tmp /= 2; - else if (mode == QImode) - tmp /= 4; - - /* INSN with smaller base register goes first. */ - tmp -= ((REGNO (base) & 0xff) << 20); - - /* INSN with smaller offset goes first. */ - off_val = (int)(INTVAL (offset)); - - /* Put loads/stores operating on adjacent words into the same - * scheduling group. */ - *fusion_pri = tmp - - ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1) - + is_load; - - if (off_val >= 0) - tmp -= (off_val & 0xfffff); - else - tmp += ((- off_val) & 0xfffff); - - *pri = tmp; - return; + /* Default priority for non-ARCV architectures. */ + *pri = max_pri - 1; + *fusion_pri = max_pri - 1; } /* Adjust the cost/latency of instructions for scheduling. @@ -11842,9 +11430,9 @@ static int riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, unsigned int) { - if (riscv_is_micro_arch (arcv_rhx100) && dep_type == REG_DEP_ANTI - && !SCHED_GROUP_P (insn)) - return cost + 1; + /* Use ARCV-specific cost adjustment for RHX-100. */ + if (riscv_is_micro_arch (arcv_rhx100)) + return arcv_sched_adjust_cost (insn, dep_type, cost); /* Only do adjustments for the generic out-of-order scheduling model. */ if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo) @@ -11929,23 +11517,8 @@ riscv_sched_can_speculate_insn (rtx_insn *insn) static int riscv_sched_adjust_priority (rtx_insn *insn, int priority) { - if (!riscv_is_micro_arch (arcv_rhx100)) - return priority; - - if (DEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (insn)) == CLOBBER) - return priority; - - /* Bump the priority of fused load-store pairs for easier - scheduling of the memory pipe. The specific increase - value is determined empirically. */ - if (next_insn (insn) && INSN_P (next_insn (insn)) - && SCHED_GROUP_P (next_insn (insn)) - && ((get_attr_type (insn) == TYPE_STORE - && get_attr_type (next_insn (insn)) == TYPE_STORE) - || (get_attr_type (insn) == TYPE_LOAD - && get_attr_type (next_insn (insn)) == TYPE_LOAD))) - return priority + 1; + if (riscv_is_micro_arch (arcv_rhx100)) + return arcv_sched_adjust_priority (insn, priority); return priority; } @@ -11956,7 +11529,8 @@ riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, int max_ready ATTRIBUTE_UNUSED) { - last_scheduled_insn = 0; + if (riscv_is_micro_arch (arcv_rhx100)) + arcv_sched_init (); } static int @@ -11966,132 +11540,10 @@ riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int *n_readyp, int clock ATTRIBUTE_UNUSED) { - if (sched_fusion) - return cached_can_issue_more; + if (riscv_is_micro_arch (arcv_rhx100)) + return arcv_sched_reorder2 (ready, n_readyp); - if (!cached_can_issue_more) - return 0; - - /* Fuse double load/store instances missed by sched_fusion. */ - if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0 - && !SCHED_GROUP_P (last_scheduled_insn) - && (get_attr_type (last_scheduled_insn) == TYPE_LOAD - || get_attr_type (last_scheduled_insn) == TYPE_STORE)) - { - for (int i = 1; i <= *n_readyp; i++) - { - if (NONDEBUG_INSN_P (ready[*n_readyp - i]) - && !SCHED_GROUP_P (ready[*n_readyp - i]) - && (!next_insn (ready[*n_readyp - i]) - || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) - || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) - && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) - { - std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); - SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; - pipeB_scheduled_p = 1; - return cached_can_issue_more; - } - } - pipeB_scheduled_p = 1; - } - - /* Try to fuse a non-memory last_scheduled_insn. */ - if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p) - && last_scheduled_insn && ready && *n_readyp > 0 - && !SCHED_GROUP_P (last_scheduled_insn) - && (get_attr_type (last_scheduled_insn) != TYPE_LOAD - && get_attr_type (last_scheduled_insn) != TYPE_STORE)) - { - for (int i = 1; i <= *n_readyp; i++) - { - if (NONDEBUG_INSN_P (ready[*n_readyp - i]) - && !SCHED_GROUP_P (ready[*n_readyp - i]) - && (!next_insn (ready[*n_readyp - i]) - || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) - || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) - && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) - { - if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD - || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE) - if (pipeB_scheduled_p) - continue; - else - pipeB_scheduled_p = 1; - else if (!alu_pipe_scheduled_p) - alu_pipe_scheduled_p = 1; - else - pipeB_scheduled_p = 1; - - std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); - SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; - return cached_can_issue_more; - } - } - alu_pipe_scheduled_p = 1; - } - - /* When pipe B is scheduled, we can have no more memops this cycle. */ - if (pipeB_scheduled_p && *n_readyp > 0 - && NONDEBUG_INSN_P (ready[*n_readyp - 1]) - && recog_memoized (ready[*n_readyp - 1]) >= 0 - && !SCHED_GROUP_P (ready[*n_readyp - 1]) - && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) - { - if (alu_pipe_scheduled_p) - return 0; - - for (int i = 2; i <= *n_readyp; i++) - { - if ((NONDEBUG_INSN_P (ready[*n_readyp - i]) - && recog_memoized (ready[*n_readyp - i]) >= 0 - && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD - && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE - && !SCHED_GROUP_P (ready[*n_readyp - i]) - && ((!next_insn (ready[*n_readyp - i]) - || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) - || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))))) - || ((next_insn (ready[*n_readyp - i]) - && NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) - && recog_memoized (next_insn (ready[*n_readyp - i])) >= 0 - && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD - && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE))) - { - std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); - alu_pipe_scheduled_p = 1; - cached_can_issue_more = 1; - return 1; - } - } - return 0; - } - - /* If all else fails, schedule a single instruction. */ - if (ready && *n_readyp > 0 - && NONDEBUG_INSN_P (ready[*n_readyp - 1]) - && recog_memoized (ready[*n_readyp - 1]) >= 0 - && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD - && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) - { - if (!pipeB_scheduled_p - && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) - { - alu_pipe_scheduled_p = pipeB_scheduled_p = 1; - cached_can_issue_more = 1; - return 1; - } - else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) - { - alu_pipe_scheduled_p = pipeB_scheduled_p = 1; - cached_can_issue_more = 1; - return 1; - } - } - - return cached_can_issue_more; + return 0; } /* Auxiliary function to emit RISC-V ELF attribute. */ diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index b53a2dff2cf7..1a25ef54bdb9 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -181,6 +181,16 @@ riscv-zicfilp.o: $(srcdir)/config/riscv/riscv-zicfilp.cc \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-zicfilp.cc +arcv.o: $(srcdir)/config/riscv/arcv.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(BACKEND_H) $(TARGET_H) $(RTL_H) \ + $(TREE_H) memmodel.h $(TM_H) $(OPTABS_H) $(REGS_H) $(EMIT_RTL_H) \ + $(RECOG_H) $(DIAGNOSTIC_CORE_H) stor-layout.h $(ALIAS_H) fold-const.h \ + output.h $(INSN_ATTR_H) $(FLAGS_H) explow.h $(CALLS_H) varasm.h \ + $(EXPR_H) tm-constrs.h $(TM_P_H) $(DF_H) reload.h sched-int.h \ + $(srcdir)/config/riscv/arcv.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/riscv/arcv.cc + PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \ From 53510e54a28a7c3a2665d18e5a08d587f0254704 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 12 Nov 2025 16:30:18 +0000 Subject: [PATCH 22/47] tmp: arcv: Repalce riscv_is_micro_arch with TARGET_ARCV_RHX100 macro. Align with how other RISC-V arch are checked (e.g., TARGET_SIFIVE_7, TARGET_ROCKET, TARGET_SIFIVE_P400_SERIES). Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 4 ++-- gcc/config/riscv/riscv-c.cc | 2 +- gcc/config/riscv/riscv-protos.h | 1 - gcc/config/riscv/riscv.cc | 22 ++++++++-------------- gcc/config/riscv/riscv.h | 4 ++++ gcc/config/riscv/riscv.md | 14 +++++++------- 6 files changed, 22 insertions(+), 25 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 8674ab1cb674..bb3840a7802b 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -87,7 +87,7 @@ arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, static bool pair_fusion_mode_allowed_p (machine_mode mode, bool is_load) { - if (!riscv_is_micro_arch (arcv_rhx100)) + if (!TARGET_ARCV_RHX100) return true; return ((is_load && (mode == SImode @@ -516,7 +516,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) int arcv_sched_adjust_priority (rtx_insn *insn, int priority) { - if (!riscv_is_micro_arch (arcv_rhx100)) + if (!TARGET_ARCV_RHX100) return priority; if (DEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index 52d240ceb89f..c669e889824b 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -149,7 +149,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) builtin_define_with_int_value ("__riscv_th_v_intrinsic", riscv_ext_version_value (0, 11)); - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) builtin_define ("__riscv_rhx"); /* Define architecture extension test macros. */ diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 91aafdbd6848..5e585148861b 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -823,7 +823,6 @@ extern rtx th_int_adjust_cfi_prologue (unsigned int); extern const char *th_asm_output_opcode (FILE *asm_out_file, const char *p); extern bool riscv_macro_fusion_p (); -extern bool riscv_is_micro_arch (enum riscv_microarchitecture_type); #ifdef RTX_CODE extern const char* diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 288fdd2b75e1..f17d328e0482 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -904,12 +904,6 @@ typedef enum typedef insn_code (*code_for_push_pop_t) (machine_mode); -bool -riscv_is_micro_arch (enum riscv_microarchitecture_type arch) -{ - return (riscv_microarchitecture == arch); -} - int riscv_get_tune_param_issue_rate (void) { @@ -4344,7 +4338,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN } gcc_fallthrough (); case SIGN_EXTRACT: - if ((riscv_is_micro_arch (arcv_rhx100) || TARGET_XTHEADBB) + if ((TARGET_ARCV_RHX100 || TARGET_XTHEADBB) && outer_code == SET && CONST_INT_P (XEXP (x, 1)) && CONST_INT_P (XEXP (x, 2))) @@ -10644,7 +10638,7 @@ static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) { - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) if (!arcv_can_issue_more_p (insn, more)) return 0; @@ -10668,7 +10662,7 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) return arcv_sched_variable_issue (insn, more); return more - 1; @@ -11408,7 +11402,7 @@ static void riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, int *pri) { - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) { arcv_sched_fusion_priority (insn, max_pri, fusion_pri, pri); return; @@ -11431,7 +11425,7 @@ riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, unsigned int) { /* Use ARCV-specific cost adjustment for RHX-100. */ - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) return arcv_sched_adjust_cost (insn, dep_type, cost); /* Only do adjustments for the generic out-of-order scheduling model. */ @@ -11517,7 +11511,7 @@ riscv_sched_can_speculate_insn (rtx_insn *insn) static int riscv_sched_adjust_priority (rtx_insn *insn, int priority) { - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) return arcv_sched_adjust_priority (insn, priority); return priority; @@ -11529,7 +11523,7 @@ riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, int max_ready ATTRIBUTE_UNUSED) { - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) arcv_sched_init (); } @@ -11540,7 +11534,7 @@ riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int *n_readyp, int clock ATTRIBUTE_UNUSED) { - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) return arcv_sched_reorder2 (ready, n_readyp); return 0; diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index a0ad75c765a1..f2910877544b 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -971,6 +971,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use); || (riscv_microarchitecture == sifive_p400) \ || (riscv_microarchitecture == sifive_p600)) +/* True if the target is ARC-V RHX100. */ +#define TARGET_ARCV_RHX100 \ + (riscv_microarchitecture == arcv_rhx100) + /* True if the target supports misaligned vector loads and stores. */ #define TARGET_VECTOR_MISALIGN_SUPPORTED \ riscv_vector_unaligned_access_p diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 67e8e76d725b..f52f4eabd6ef 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3113,7 +3113,7 @@ && (INTVAL (operands[2]) == 1)) && !TARGET_XTHEADBB && !TARGET_XANDESPERF - && !(riscv_is_micro_arch (arcv_rhx100) + && !(TARGET_ARCV_RHX100 && ) && !(TARGET_64BIT && (INTVAL (operands[3]) > 0) @@ -4504,10 +4504,10 @@ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand")) (sign_extend:SI (match_operand:HI 2 "register_operand"))) (match_operand:SI 3 "register_operand")))] - "TARGET_XTHEADMAC || (riscv_is_micro_arch (arcv_rhx100) + "TARGET_XTHEADMAC || (TARGET_ARCV_RHX100 && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL))" { - if (riscv_is_micro_arch (arcv_rhx100)) + if (TARGET_ARCV_RHX100) { rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); emit_insn (gen_extendhisi2 (tmp0, operands[1])); @@ -4538,7 +4538,7 @@ (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand")) (zero_extend:SI (match_operand:HI 2 "register_operand"))) (match_operand:SI 3 "register_operand")))] - "riscv_is_micro_arch (arcv_rhx100) + "TARGET_ARCV_RHX100 && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" { rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); @@ -4579,7 +4579,7 @@ (match_operand:SI 2 "register_operand" "r,r")) (match_operand:SI 3 "register_operand" "r,?0"))) (clobber (match_scratch:SI 4 "=&r,&r"))] - "riscv_is_micro_arch (arcv_rhx100) + "TARGET_ARCV_RHX100 && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" { if (REGNO (operands[0]) == REGNO (operands[3])) @@ -4602,7 +4602,7 @@ (match_operand:SI 2 "register_operand" "r,r")) (match_operand:SI 3 "register_operand" "r,?0")))) (clobber (match_scratch:SI 4 "=&r,&r"))] - "riscv_is_micro_arch (arcv_rhx100) + "TARGET_ARCV_RHX100 && (TARGET_ZMMUL || TARGET_MUL)" { if (REGNO (operands[0]) == REGNO (operands[3])) @@ -4622,7 +4622,7 @@ (zero_extract:SI (match_operand:SI 1 "register_operand" "r") (match_operand 2 "const_int_operand") (match_operand 3 "const_int_operand")))] - "riscv_is_micro_arch (arcv_rhx100) && !TARGET_64BIT + "TARGET_ARCV_RHX100 && !TARGET_64BIT && (INTVAL (operands[2]) > 1 || !TARGET_ZBS)" { int amount = INTVAL (operands[2]); From 1af0e745d019c26ce28b7f9869a09e4976f49148 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 12 Nov 2025 16:47:39 +0000 Subject: [PATCH 23/47] tmp: arcv: Use riscv_fusion_enabled_p (RISCV_FUSE_ARCV) In these cases, it makes more sense to check if ARCV fusion is enabled rather than checking if a specific mtune is enabled. Signed-off-by: Luis Silva --- gcc/config/riscv/riscv.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f17d328e0482..c01aa5718303 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -341,6 +341,9 @@ unsigned riscv_stack_boundary; /* Whether in riscv_output_mi_thunk. */ static bool riscv_in_thunk_func = false; +/* Forward declaration for scheduler functions. */ +static bool riscv_fusion_enabled_p (enum riscv_fusion_pairs); + /* If non-zero, this is an offset to be added to SP to redefine the CFA when restoring the FP register from the stack. Only valid when generating the epilogue. */ @@ -10638,7 +10641,7 @@ static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) { - if (TARGET_ARCV_RHX100) + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) if (!arcv_can_issue_more_p (insn, more)) return 0; @@ -10662,7 +10665,7 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); - if (TARGET_ARCV_RHX100) + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) return arcv_sched_variable_issue (insn, more); return more - 1; @@ -11511,7 +11514,7 @@ riscv_sched_can_speculate_insn (rtx_insn *insn) static int riscv_sched_adjust_priority (rtx_insn *insn, int priority) { - if (TARGET_ARCV_RHX100) + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) return arcv_sched_adjust_priority (insn, priority); return priority; @@ -11523,7 +11526,7 @@ riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, int max_ready ATTRIBUTE_UNUSED) { - if (TARGET_ARCV_RHX100) + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) arcv_sched_init (); } @@ -11534,7 +11537,7 @@ riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int *n_readyp, int clock ATTRIBUTE_UNUSED) { - if (TARGET_ARCV_RHX100) + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) return arcv_sched_reorder2 (ready, n_readyp); return 0; From d8ade8074be9c541555f4b9ad08e4875b76d47d7 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 12 Nov 2025 16:59:08 +0000 Subject: [PATCH 24/47] tmp: arcv: Clean up arcv_memop_arith_pair_p () Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 168 ++++++++++++++++++++++++++++----------- 1 file changed, 123 insertions(+), 45 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index bb3840a7802b..ebb167e802f0 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -163,6 +163,114 @@ arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load) return false; } +/* Helper function to check if instruction type is arithmetic-like. */ + +static bool +is_arith_type_insn (rtx_insn *insn) +{ + enum attr_type type = get_attr_type (insn); + + return (type == TYPE_ARITH + || type == TYPE_LOGICAL + || type == TYPE_SHIFT + || type == TYPE_SLT + || type == TYPE_BITMANIP + || type == TYPE_MIN + || type == TYPE_MAX + || type == TYPE_MINU + || type == TYPE_MAXU + || type == TYPE_CLZ + || type == TYPE_CTZ); +} + +/* Helper to check if curr's source operand is valid for fusion. */ + +static bool +is_valid_arith_src (rtx curr_set) +{ + rtx src = SET_SRC (curr_set); + + /* Immediate operand or register operand. */ + return CONST_INT_P (src) || REG_P (XEXP (src, 0)); +} + +/* Helper to check if curr operation is compatible with load's destination. */ + +static bool +is_valid_load_arith_pair (rtx prev_set, rtx curr_set) +{ + rtx load_addr = XEXP (SET_SRC (prev_set), 0); + rtx load_dest = SET_DEST (prev_set); + rtx arith_src = XEXP (SET_SRC (curr_set), 0); + rtx arith_dest = SET_DEST (curr_set); + + /* Address register must be a register. */ + if (!REG_P (load_addr)) + return false; + + /* Address register must match first source operand of arithmetic op. */ + if (REGNO (load_addr) != REGNO (arith_src)) + return false; + + /* Address register must not be the load destination (no clobber). */ + if (REGNO (load_addr) == REGNO (load_dest)) + return false; + + /* Load and arithmetic destinations must be different. */ + if (REGNO (load_dest) == REGNO (arith_dest)) + return false; + + /* Check operand constraints for different arithmetic formats. */ + rtx src = SET_SRC (curr_set); + + /* Unary operation: (set (reg:X rd1) (not (reg:X rs1))). */ + if (GET_RTX_LENGTH (GET_CODE (src)) == 1) + return true; + + /* Immediate operation: (set (reg:X rd2) (op (reg:X rs1) (const_int))). */ + if (CONST_INT_P (XEXP (src, 1))) + return true; + + /* Binary register operation: ensure load dest != second source register. */ + if (REGNO (load_dest) != REGNO (XEXP (src, 1))) + return true; + + return false; +} + +/* Helper to check if curr operation is compatible with store's address. */ + +static bool +is_valid_store_arith_pair (rtx prev_set, rtx curr_set) +{ + rtx store_addr = XEXP (SET_DEST (prev_set), 0); + rtx arith_src = XEXP (SET_SRC (curr_set), 0); + + /* Address register must be a register. */ + if (!REG_P (store_addr)) + return false; + + /* Address register must match first source operand of arithmetic op. */ + if (REGNO (store_addr) != REGNO (arith_src)) + return false; + + /* Check operand constraints for different arithmetic formats. */ + rtx src = SET_SRC (curr_set); + + /* Unary operation. */ + if (GET_RTX_LENGTH (GET_CODE (src)) == 1) + return true; + + /* Immediate operation. */ + if (CONST_INT_P (XEXP (src, 1))) + return true; + + /* Binary register operation: store addr == second source is OK. */ + if (REGNO (store_addr) == REGNO (XEXP (src, 1))) + return true; + + return false; +} /* Return true if PREV and CURR constitute an ordered load/store + op/opimm pair, for the purposes of ARCV-specific macro-op fusion. */ @@ -175,51 +283,21 @@ arcv_memop_arith_pair_p (rtx_insn *prev, rtx_insn *curr) gcc_assert (prev_set); gcc_assert (curr_set); - /* Fuse load/store + register post-{inc,dec}rement: - * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X rs1) (const_int)))) - * or - * prev (st) == (set (mem:X (plus:X (reg:X rs1) (const_int))) (reg:X rs2)) - * ... - */ - if ((get_attr_type (curr) == TYPE_ARITH - || get_attr_type (curr) == TYPE_LOGICAL - || get_attr_type (curr) == TYPE_SHIFT - || get_attr_type (curr) == TYPE_SLT - || get_attr_type (curr) == TYPE_BITMANIP - || get_attr_type (curr) == TYPE_MIN - || get_attr_type (curr) == TYPE_MAX - || get_attr_type (curr) == TYPE_MINU - || get_attr_type (curr) == TYPE_MAXU - || get_attr_type (curr) == TYPE_CLZ - || get_attr_type (curr) == TYPE_CTZ) - && (CONST_INT_P (SET_SRC (curr_set)) - || REG_P (XEXP (SET_SRC (curr_set), 0))) - && ((get_attr_type (prev) == TYPE_LOAD - && REG_P (XEXP (SET_SRC (prev_set), 0)) - && REGNO (XEXP (SET_SRC (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 0)) - && REGNO (XEXP (SET_SRC (prev_set), 0)) - != REGNO (SET_DEST (prev_set)) - && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)) - && (/* (set (reg:X rd1) (not (reg:X rs1))) */ - GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 - /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ - || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) - /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ - || REGNO (SET_DEST (prev_set)) - != REGNO (XEXP (SET_SRC (curr_set), 1)))) - || (get_attr_type (prev) == TYPE_STORE - && REG_P (XEXP (SET_DEST (prev_set), 0)) - && REGNO (XEXP (SET_DEST (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 0)) - && (/* (set (reg:X rd1) (not (reg:X rs1))) */ - GET_RTX_LENGTH (GET_CODE (SET_SRC (curr_set))) == 1 - /* (op-imm) == (set (reg:X rd2) (plus/minus (reg:X rs1) (const_int))) */ - || CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) - /* (op) == (set (reg:X rd2) (plus/minus (reg:X rs1) (reg:X rs2))) */ - || REGNO (XEXP (SET_DEST (prev_set), 0)) - == REGNO (XEXP (SET_SRC (curr_set), 1)))))) - return true; + /* Check if curr is an arithmetic-type instruction. */ + if (!is_arith_type_insn (curr)) + return false; + + /* Check if curr has valid source operands. */ + if (!is_valid_arith_src (curr_set)) + return false; + + /* Check for load + arithmetic fusion. */ + if (get_attr_type (prev) == TYPE_LOAD) + return is_valid_load_arith_pair (prev_set, curr_set); + + /* Check for store + arithmetic fusion. */ + if (get_attr_type (prev) == TYPE_STORE) + return is_valid_store_arith_pair (prev_set, curr_set); return false; } From 375ea0c5b34dc8fd0149c7a515aadfb98bab5e10 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 12 Nov 2025 17:03:33 +0000 Subject: [PATCH 25/47] tmp: arcv: Clean up arcv_memop_lui_pair_p () Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index ebb167e802f0..2b9b46b0c8ae 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -305,6 +305,7 @@ arcv_memop_arith_pair_p (rtx_insn *prev, rtx_insn *curr) /* Return true if PREV and CURR constitute an ordered load/store + lui pair, for the purposes of ARCV-specific macro-op fusion. */ + static bool arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) { @@ -314,21 +315,25 @@ arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) gcc_assert (prev_set); gcc_assert (curr_set); - /* Fuse load/store with lui: - * prev (ld) == (set (reg:X rd1) (mem:X (plus:X (reg:X) (const_int)))) - * or - * prev (st) == (set (mem:X (plus:X (reg:X) (const_int))) (reg:X rD)) - * - * curr (lui) == (set (reg:X rd2) (const_int UPPER_IMM_20)) - */ - if (REG_P (curr) - && ((get_attr_type (curr) == TYPE_MOVE - && GET_CODE (SET_SRC (curr_set)) == HIGH) - || (CONST_INT_P (SET_SRC (curr_set)) - && LUI_OPERAND (INTVAL (SET_SRC (curr_set))))) - && ((get_attr_type (prev) == TYPE_LOAD - && REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set))) - || get_attr_type (prev) == TYPE_STORE)) + /* Check if curr is a LUI instruction: + - LUI via HIGH: (set (reg:X rd) (high (const_int))) + - LUI via immediate: (set (reg:X rd) (const_int UPPER_IMM_20)) */ + bool is_lui = (REG_P (curr) + && ((get_attr_type (curr) == TYPE_MOVE + && GET_CODE (SET_SRC (curr_set)) == HIGH) + || (CONST_INT_P (SET_SRC (curr_set)) + && LUI_OPERAND (INTVAL (SET_SRC (curr_set)))))); + + if (!is_lui) + return false; + + /* Check for load + LUI fusion: + Load and LUI destinations must be different to avoid hazard. */ + if (get_attr_type (prev) == TYPE_LOAD) + return REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)); + + /* Check for store + LUI fusion (always allowed). */ + if (get_attr_type (prev) == TYPE_STORE) return true; return false; From a0d883241e090179bea443a7bf8f9e77261a4678 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 12 Nov 2025 17:25:17 +0000 Subject: [PATCH 26/47] tmp: arcv: Clean up arcv_macro_fusion_pair_p () Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 115 ++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 43 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 2b9b46b0c8ae..ab1a54083fbc 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -352,33 +352,52 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) rtx prev_set = single_set (prev); rtx curr_set = single_set (curr); - /* Fuse multiply-add pair. */ - if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT - && GET_CODE (SET_SRC (curr_set)) == PLUS - && (REG_P (XEXP (SET_SRC (curr_set), 0)) - && REGNO (SET_DEST (prev_set)) == - REGNO (XEXP (SET_SRC (curr_set), 0)) - || (REG_P (XEXP (SET_SRC (curr_set), 1)) - && REGNO (SET_DEST (prev_set)) == - REGNO (XEXP (SET_SRC (curr_set), 1))))) - return true; + /* Fuse multiply-add pair: + prev: (set rd_mult (mult rs1 rs2)) + curr: (set rd_add (plus rd_mult rs3)) */ + if (prev_set && curr_set + && GET_CODE (SET_SRC (prev_set)) == MULT + && GET_CODE (SET_SRC (curr_set)) == PLUS) + { + rtx curr_plus = SET_SRC (curr_set); + rtx mult_dest = SET_DEST (prev_set); + unsigned int mult_dest_regno = REGNO (mult_dest); + + /* Check if multiply result is used in either operand of the addition. */ + if (REG_P (XEXP (curr_plus, 0)) + && REGNO (XEXP (curr_plus, 0)) == mult_dest_regno) + return true; + + if (REG_P (XEXP (curr_plus, 1)) + && REGNO (XEXP (curr_plus, 1)) == mult_dest_regno) + return true; + } - /* Fuse logical shift left with logical shift right (bit-extract pattern). */ - if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT + /* Fuse logical shift left with logical shift right (bit-extract pattern): + prev: (set rd (ashift rs imm1)) + curr: (set rd (lshiftrt rd imm2)) */ + if (prev_set && curr_set + && GET_CODE (SET_SRC (prev_set)) == ASHIFT && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) return true; - /* Fuse load-immediate with a dependent conditional branch. */ + /* Fuse load-immediate with a dependent conditional branch: + prev: (set rd imm) + curr: (if_then_else (cond rd ...) ...) */ if (get_attr_type (prev) == TYPE_MOVE && get_attr_move_type (prev) == MOVE_TYPE_CONST && any_condjump_p (curr)) { + if (!curr_set) + return false; + rtx comp = XEXP (SET_SRC (curr_set), 0); + rtx prev_dest = SET_DEST (prev_set); - return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set)) - || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set)); + return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == prev_dest) + || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == prev_dest); } /* Do not fuse loads/stores before sched2. */ @@ -392,7 +411,7 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) if (!simple_sets_p) return false; - /* Fuse adjacent loads and stores. */ + /* Fuse adjacent loads. */ if (get_attr_type (prev) == TYPE_LOAD && get_attr_type (curr) == TYPE_LOAD) { @@ -400,6 +419,7 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return true; } + /* Fuse adjacent stores. */ if (get_attr_type (prev) == TYPE_STORE && get_attr_type (curr) == TYPE_STORE) { @@ -407,47 +427,56 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) return true; } - /* Look ahead 1 insn to make sure double loads/stores are always - fused together, even in the presence of other opportunities. */ - if (next_insn (curr) && single_set (next_insn (curr)) - && get_attr_type (curr) == TYPE_LOAD - && get_attr_type (next_insn (curr)) == TYPE_LOAD) - { - if (arcv_fused_addr_p (SET_SRC (curr_set), - SET_SRC (single_set (next_insn (curr))), - true)) + /* Look ahead 1 insn to prioritize adjacent load/store pairs. + If curr and next form a better fusion opportunity, defer this fusion. */ + rtx_insn *next = next_insn (curr); + if (next) + { + rtx next_set = single_set (next); + + /* Defer if next instruction forms an adjacent load pair with curr. */ + if (next_set + && get_attr_type (curr) == TYPE_LOAD + && get_attr_type (next) == TYPE_LOAD + && arcv_fused_addr_p (SET_SRC (curr_set), SET_SRC (next_set), true)) return false; - } - if (next_insn (curr) && single_set (next_insn (curr)) - && get_attr_type (curr) == TYPE_STORE - && get_attr_type (next_insn (curr)) == TYPE_STORE) - { - if (arcv_fused_addr_p (SET_DEST (curr_set), - SET_DEST (single_set (next_insn (curr))), - false)) + /* Defer if next instruction forms an adjacent store pair with curr. */ + if (next_set + && get_attr_type (curr) == TYPE_STORE + && get_attr_type (next) == TYPE_STORE + && arcv_fused_addr_p (SET_DEST (curr_set), SET_DEST (next_set), false)) return false; - } + } - /* Fuse a pre- or post-update memory operation. */ + /* Fuse a pre- or post-update memory operation: + Examples: load+add, add+load, store+add, add+store. */ if (arcv_memop_arith_pair_p (prev, curr) || arcv_memop_arith_pair_p (curr, prev)) return true; - /* Fuse a memory operation preceded or followed by a lui. */ + /* Fuse a memory operation preceded or followed by a LUI: + Examples: load+lui, lui+load, store+lui, lui+store. */ if (arcv_memop_lui_pair_p (prev, curr) || arcv_memop_lui_pair_p (curr, prev)) return true; - /* Fuse load-immediate with a store of the destination register. */ + /* Fuse load-immediate with a store of the destination register: + prev: (set rd imm) + curr: (set (mem ...) rd) */ if (get_attr_type (prev) == TYPE_MOVE && get_attr_move_type (prev) == MOVE_TYPE_CONST - && get_attr_type (curr) == TYPE_STORE - && ((REG_P (SET_SRC (curr_set)) - && SET_DEST (prev_set) == SET_SRC (curr_set)) - || (SUBREG_P (SET_SRC (curr_set)) - && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set))))) - return true; + && get_attr_type (curr) == TYPE_STORE) + { + rtx store_src = SET_SRC (curr_set); + rtx load_dest = SET_DEST (prev_set); + + if (REG_P (store_src) && store_src == load_dest) + return true; + + if (SUBREG_P (store_src) && SUBREG_REG (store_src) == load_dest) + return true; + } return false; } From dd9f95b3f420a78b4160c958a7b641a4fa77cea9 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 10:37:13 +0000 Subject: [PATCH 27/47] tmp: arcv: Refactor scheduler state variables into struct. Group alu_pipe_scheduled_p, pipeB_scheduled_p, last_scheduled_insn, and cached_can_issue_more into arcv_sched_state struct. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 102 ++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index ab1a54083fbc..195affba852f 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -51,10 +51,14 @@ along with GCC; see the file COPYING3. If not see #include "arcv.h" /* Scheduler state tracking for dual-pipe ARCV architectures. */ -static int alu_pipe_scheduled_p; -static int pipeB_scheduled_p; -static rtx_insn *last_scheduled_insn; -static short cached_can_issue_more; +struct arcv_sched_state { + int alu_pipe_scheduled_p; + int pipeB_scheduled_p; + rtx_insn *last_scheduled_insn; + short cached_can_issue_more; +}; + +static struct arcv_sched_state sched_state; /* Implement one boolean function for each of the values of the arcv_mpy_option enum, for the needs of rhx100.md. */ @@ -486,7 +490,7 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) void arcv_sched_init (void) { - last_scheduled_insn = 0; + sched_state.last_scheduled_insn = 0; } @@ -498,16 +502,16 @@ int arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) { if (sched_fusion) - return cached_can_issue_more; + return sched_state.cached_can_issue_more; - if (!cached_can_issue_more) + if (!sched_state.cached_can_issue_more) return 0; /* Fuse double load/store instances missed by sched_fusion. */ - if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0 - && !SCHED_GROUP_P (last_scheduled_insn) - && (get_attr_type (last_scheduled_insn) == TYPE_LOAD - || get_attr_type (last_scheduled_insn) == TYPE_STORE)) + if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn && ready && *n_readyp > 0 + && !SCHED_GROUP_P (sched_state.last_scheduled_insn) + && (get_attr_type (sched_state.last_scheduled_insn) == TYPE_LOAD + || get_attr_type (sched_state.last_scheduled_insn) == TYPE_STORE)) { for (int i = 1; i <= *n_readyp; i++) { @@ -516,23 +520,23 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) && (!next_insn (ready[*n_readyp - i]) || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) - && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) + && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, ready[*n_readyp - i])) { std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; - pipeB_scheduled_p = 1; - return cached_can_issue_more; + sched_state.pipeB_scheduled_p = 1; + return sched_state.cached_can_issue_more; } } - pipeB_scheduled_p = 1; + sched_state.pipeB_scheduled_p = 1; } /* Try to fuse a non-memory last_scheduled_insn. */ - if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p) - && last_scheduled_insn && ready && *n_readyp > 0 - && !SCHED_GROUP_P (last_scheduled_insn) - && (get_attr_type (last_scheduled_insn) != TYPE_LOAD - && get_attr_type (last_scheduled_insn) != TYPE_STORE)) + if ((!sched_state.alu_pipe_scheduled_p || !sched_state.pipeB_scheduled_p) + && sched_state.last_scheduled_insn && ready && *n_readyp > 0 + && !SCHED_GROUP_P (sched_state.last_scheduled_insn) + && (get_attr_type (sched_state.last_scheduled_insn) != TYPE_LOAD + && get_attr_type (sched_state.last_scheduled_insn) != TYPE_STORE)) { for (int i = 1; i <= *n_readyp; i++) { @@ -541,36 +545,36 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) && (!next_insn (ready[*n_readyp - i]) || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) - && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i])) + && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, ready[*n_readyp - i])) { if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE) - if (pipeB_scheduled_p) + if (sched_state.pipeB_scheduled_p) continue; else - pipeB_scheduled_p = 1; - else if (!alu_pipe_scheduled_p) - alu_pipe_scheduled_p = 1; + sched_state.pipeB_scheduled_p = 1; + else if (!sched_state.alu_pipe_scheduled_p) + sched_state.alu_pipe_scheduled_p = 1; else - pipeB_scheduled_p = 1; + sched_state.pipeB_scheduled_p = 1; std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; - return cached_can_issue_more; + return sched_state.cached_can_issue_more; } } - alu_pipe_scheduled_p = 1; + sched_state.alu_pipe_scheduled_p = 1; } /* When pipe B is scheduled, we can have no more memops this cycle. */ - if (pipeB_scheduled_p && *n_readyp > 0 + if (sched_state.pipeB_scheduled_p && *n_readyp > 0 && NONDEBUG_INSN_P (ready[*n_readyp - 1]) && recog_memoized (ready[*n_readyp - 1]) >= 0 && !SCHED_GROUP_P (ready[*n_readyp - 1]) && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) { - if (alu_pipe_scheduled_p) + if (sched_state.alu_pipe_scheduled_p) return 0; for (int i = 2; i <= *n_readyp; i++) @@ -590,8 +594,8 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE))) { std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); - alu_pipe_scheduled_p = 1; - cached_can_issue_more = 1; + sched_state.alu_pipe_scheduled_p = 1; + sched_state.cached_can_issue_more = 1; return 1; } } @@ -605,24 +609,24 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) { - if (!pipeB_scheduled_p + if (!sched_state.pipeB_scheduled_p && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) { - alu_pipe_scheduled_p = pipeB_scheduled_p = 1; - cached_can_issue_more = 1; + sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1; + sched_state.cached_can_issue_more = 1; return 1; } else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) { - alu_pipe_scheduled_p = pipeB_scheduled_p = 1; - cached_can_issue_more = 1; + sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1; + sched_state.cached_can_issue_more = 1; return 1; } } - return cached_can_issue_more; + return sched_state.cached_can_issue_more; } int @@ -758,17 +762,17 @@ arcv_can_issue_more_p (rtx_insn *insn, int more) /* Beginning of cycle - reset variables. */ if (more == riscv_get_tune_param_issue_rate ()) { - alu_pipe_scheduled_p = 0; - pipeB_scheduled_p = 0; + sched_state.alu_pipe_scheduled_p = 0; + sched_state.pipeB_scheduled_p = 0; } - if (alu_pipe_scheduled_p && pipeB_scheduled_p) + if (sched_state.alu_pipe_scheduled_p && sched_state.pipeB_scheduled_p) { - cached_can_issue_more = 0; + sched_state.cached_can_issue_more = 0; return false; } - cached_can_issue_more = more; + sched_state.cached_can_issue_more = more; return true; } @@ -783,20 +787,20 @@ arcv_sched_variable_issue (rtx_insn *insn, int more) || get_attr_type (insn) == TYPE_STORE || get_attr_type (next_insn (insn)) == TYPE_LOAD || get_attr_type (next_insn (insn)) == TYPE_STORE) - pipeB_scheduled_p = 1; + sched_state.pipeB_scheduled_p = 1; else - alu_pipe_scheduled_p = 1; + sched_state.alu_pipe_scheduled_p = 1; } if (get_attr_type (insn) == TYPE_ALU_FUSED || get_attr_type (insn) == TYPE_IMUL_FUSED) { - alu_pipe_scheduled_p = 1; + sched_state.alu_pipe_scheduled_p = 1; more -= 1; } - last_scheduled_insn = insn; - cached_can_issue_more = more - 1; + sched_state.last_scheduled_insn = insn; + sched_state.cached_can_issue_more = more - 1; - return cached_can_issue_more; + return sched_state.cached_can_issue_more; } From 66ff6b89c73930e060bd14c211e525a12bcd5188 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 10:41:11 +0000 Subject: [PATCH 28/47] tmp: arcv: Fix 80 character limit lines Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 195affba852f..ee449651d766 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -508,7 +508,8 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) return 0; /* Fuse double load/store instances missed by sched_fusion. */ - if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn && ready && *n_readyp > 0 + if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn + && ready && *n_readyp > 0 && !SCHED_GROUP_P (sched_state.last_scheduled_insn) && (get_attr_type (sched_state.last_scheduled_insn) == TYPE_LOAD || get_attr_type (sched_state.last_scheduled_insn) == TYPE_STORE)) @@ -520,7 +521,8 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) && (!next_insn (ready[*n_readyp - i]) || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) - && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, ready[*n_readyp - i])) + && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, + ready[*n_readyp - i])) { std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; @@ -545,7 +547,8 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) && (!next_insn (ready[*n_readyp - i]) || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) - && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, ready[*n_readyp - i])) + && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, + ready[*n_readyp - i])) { if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE) From fdf6d841e8f6f8898a0cfec50e58fd73d77af048 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 15:36:02 +0000 Subject: [PATCH 29/47] tmp: arcv: Refactor helper function names Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index ee449651d766..82e318839f21 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -170,7 +170,7 @@ arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load) /* Helper function to check if instruction type is arithmetic-like. */ static bool -is_arith_type_insn (rtx_insn *insn) +arcv_arith_type_insn_p (rtx_insn *insn) { enum attr_type type = get_attr_type (insn); @@ -190,7 +190,7 @@ is_arith_type_insn (rtx_insn *insn) /* Helper to check if curr's source operand is valid for fusion. */ static bool -is_valid_arith_src (rtx curr_set) +arcv_arith_src_p (rtx curr_set) { rtx src = SET_SRC (curr_set); @@ -201,7 +201,7 @@ is_valid_arith_src (rtx curr_set) /* Helper to check if curr operation is compatible with load's destination. */ static bool -is_valid_load_arith_pair (rtx prev_set, rtx curr_set) +arcv_load_arith_pair_p (rtx prev_set, rtx curr_set) { rtx load_addr = XEXP (SET_SRC (prev_set), 0); rtx load_dest = SET_DEST (prev_set); @@ -245,7 +245,7 @@ is_valid_load_arith_pair (rtx prev_set, rtx curr_set) /* Helper to check if curr operation is compatible with store's address. */ static bool -is_valid_store_arith_pair (rtx prev_set, rtx curr_set) +arcv_store_arith_pair_p (rtx prev_set, rtx curr_set) { rtx store_addr = XEXP (SET_DEST (prev_set), 0); rtx arith_src = XEXP (SET_SRC (curr_set), 0); @@ -288,20 +288,20 @@ arcv_memop_arith_pair_p (rtx_insn *prev, rtx_insn *curr) gcc_assert (curr_set); /* Check if curr is an arithmetic-type instruction. */ - if (!is_arith_type_insn (curr)) + if (!arcv_arith_type_insn_p (curr)) return false; /* Check if curr has valid source operands. */ - if (!is_valid_arith_src (curr_set)) + if (!arcv_arith_src_p (curr_set)) return false; /* Check for load + arithmetic fusion. */ if (get_attr_type (prev) == TYPE_LOAD) - return is_valid_load_arith_pair (prev_set, curr_set); + return arcv_load_arith_pair_p (prev_set, curr_set); /* Check for store + arithmetic fusion. */ if (get_attr_type (prev) == TYPE_STORE) - return is_valid_store_arith_pair (prev_set, curr_set); + return arcv_store_arith_pair_p (prev_set, curr_set); return false; } From 1edd45f8f1e0bd275b702de56f6fe84dca9507b0 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 15:39:09 +0000 Subject: [PATCH 30/47] tmp: arcv: Remove newlines. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 82e318839f21..850c3bc689af 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -493,11 +493,9 @@ arcv_sched_init (void) sched_state.last_scheduled_insn = 0; } - /* Try to reorder ready queue to promote ARCV fusion opportunities. Returns the number of instructions that can be issued this cycle. */ - int arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) { @@ -667,7 +665,6 @@ arcv_sched_adjust_cost (rtx_insn *insn, int dep_type, int cost) return cost; } - /* If INSN is a load or store of address in the form of [base+offset], extract the two parts and set to BASE and OFFSET. IS_LOAD is set to TRUE if it's a load. Return TRUE if INSN is such an instruction, @@ -709,7 +706,6 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode, return (*base != NULL_RTX && *offset != NULL_RTX); } - void arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, int *pri) From e99ae0d5ade5718002fb30001fca2ced98a6aebf Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 15:55:42 +0000 Subject: [PATCH 31/47] tmp: arcv: Add hook comments Signed-off-by: Luis Silva --- gcc/config/riscv/riscv.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index c01aa5718303..fd271c6f3a31 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11511,6 +11511,8 @@ riscv_sched_can_speculate_insn (rtx_insn *insn) } } +/* Implement TARGET_SCHED_ADJUST_PRIORITY hook. */ + static int riscv_sched_adjust_priority (rtx_insn *insn, int priority) { @@ -11520,6 +11522,7 @@ riscv_sched_adjust_priority (rtx_insn *insn, int priority) return priority; } +/* Implement TARGET_SCHED_INIT hook. */ static void riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, @@ -11530,6 +11533,8 @@ riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, arcv_sched_init (); } +/* Implement TARGET_SCHED_REORDER2 hook. */ + static int riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, From 4ea42df065f2fafc2cf9973b7e99521ad69cb2a2 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 15:59:10 +0000 Subject: [PATCH 32/47] fixup! tmp: arcv: Refactor scheduler state variables into struct. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 850c3bc689af..78e4d35237d3 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -51,10 +51,26 @@ along with GCC; see the file COPYING3. If not see #include "arcv.h" /* Scheduler state tracking for dual-pipe ARCV architectures. */ + struct arcv_sched_state { + /* True if the ALU pipe has been scheduled for the current cycle. + The ALU pipe handles arithmetic, logical, and other computational + instructions. */ int alu_pipe_scheduled_p; + + /* True if pipe B has been scheduled for the current cycle. + Pipe B is the second execution pipe, typically used for memory + operations (loads/stores) but can also handle other instructions. */ int pipeB_scheduled_p; + + /* The last instruction that was scheduled. Used to detect fusion + opportunities by looking ahead at the next instruction to be + scheduled. */ rtx_insn *last_scheduled_insn; + + /* Cached value of how many more instructions can be issued in the + current cycle. Updated as instructions are scheduled and pipes + become occupied. */ short cached_can_issue_more; }; From 3bad3ca76a4d19bfc89d9b161399e6a2a6155627 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 16:06:31 +0000 Subject: [PATCH 33/47] tmp: arcv: Refactor helper function names 2 Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 78e4d35237d3..b41a80f855e1 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -105,7 +105,7 @@ arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, of transfer is determined by the IS_LOAD parameter). */ static bool -pair_fusion_mode_allowed_p (machine_mode mode, bool is_load) +arcv_pair_fusion_mode_allowed_p (machine_mode mode, bool is_load) { if (!TARGET_ARCV_RHX100) return true; @@ -138,7 +138,7 @@ arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load) return false; /* Check if the mode is allowed. */ - if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load)) + if (!arcv_pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load)) return false; rtx reg0 = XEXP (addr0, 0); @@ -687,7 +687,7 @@ arcv_sched_adjust_cost (rtx_insn *insn, int dep_type, int cost) otherwise return FALSE. */ static bool -fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode, +arcv_fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode, bool *is_load) { rtx x, dest, src; @@ -734,8 +734,8 @@ arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, gcc_assert (INSN_P (insn)); tmp = max_pri - 1; - if (!fusion_load_store (insn, &base, &offset, &mode, &is_load) - || !pair_fusion_mode_allowed_p (mode, is_load)) + if (!arcv_fusion_load_store (insn, &base, &offset, &mode, &is_load) + || !arcv_pair_fusion_mode_allowed_p (mode, is_load)) { *pri = tmp; *fusion_pri = tmp; From 367327e38b4ad4b0f6a1845c0a8dfdc010522c45 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Fri, 14 Nov 2025 16:14:10 +0000 Subject: [PATCH 34/47] tmp: arcv: tmp: arcv: Clean up arcv_sched_fusion_priority () Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 62 ++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index b41a80f855e1..17de645a93ac 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -726,48 +726,60 @@ void arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, int *pri) { - int tmp, off_val; - bool is_load; rtx base, offset; machine_mode mode = SImode; + bool is_load; gcc_assert (INSN_P (insn)); - tmp = max_pri - 1; + /* Default priority for non-fusible instructions. */ + int default_pri = max_pri - 1; + + /* Check if this is a fusible load/store instruction. */ if (!arcv_fusion_load_store (insn, &base, &offset, &mode, &is_load) || !arcv_pair_fusion_mode_allowed_p (mode, is_load)) { - *pri = tmp; - *fusion_pri = tmp; + *pri = default_pri; + *fusion_pri = default_pri; return; } - tmp /= 2; + /* Start with half the default priority to distinguish fusible from + non-fusible instructions. */ + int priority = default_pri / 2; + /* Scale priority by access width - narrower accesses get lower priority. + HImode: divide by 2, QImode: divide by 4. This encourages wider + accesses to be scheduled together. */ if (mode == HImode) - tmp /= 2; + priority /= 2; else if (mode == QImode) - tmp /= 4; - - /* INSN with smaller base register goes first. */ - tmp -= ((REGNO (base) & 0xff) << 20); - - /* INSN with smaller offset goes first. */ - off_val = (int)(INTVAL (offset)); - - /* Put loads/stores operating on adjacent words into the same - * scheduling group. */ - *fusion_pri = tmp - - ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1) - + is_load; - + priority /= 4; + + /* Factor in base register: instructions with smaller register numbers + get higher priority. The shift by 20 bits ensures this is the most + significant component of the priority. */ + const int BASE_REG_SHIFT = 20; + const int BASE_REG_MASK = 0xff; + priority -= ((REGNO (base) & BASE_REG_MASK) << BASE_REG_SHIFT); + + /* Calculate fusion priority: group loads/stores with adjacent addresses + into the same scheduling group. We divide the offset by (mode_size * 2) + to group pairs of adjacent accesses, then shift left by 1 to make room + for the load/store bit. */ + int off_val = (int)(INTVAL (offset)); + int addr_group = off_val / (GET_MODE_SIZE (mode).to_constant () * 2); + *fusion_pri = priority - (addr_group << 1) + is_load; + + /* Factor in the actual offset value: instructions with smaller offsets + get higher priority. We use only the lower 20 bits to avoid overflow. */ + const int OFFSET_MASK = 0xfffff; if (off_val >= 0) - tmp -= (off_val & 0xfffff); + priority -= (off_val & OFFSET_MASK); else - tmp += ((- off_val) & 0xfffff); + priority += ((-off_val) & OFFSET_MASK); - *pri = tmp; - return; + *pri = priority; } From aee9fd070a89ca143982a3b1fd17eca47940f4cd Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Mon, 17 Nov 2025 10:30:23 +0000 Subject: [PATCH 35/47] Fix comment Signed-off-by: Luis Silva --- gcc/config/riscv/riscv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index fd271c6f3a31..178006a9cb80 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11411,7 +11411,7 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, return; } - /* Default priority for non-ARCV architectures. */ + /* Default priority. */ *pri = max_pri - 1; *fusion_pri = max_pri - 1; } From 4b1cee452ec816e900f18d346754c00ad781b0e0 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Mon, 17 Nov 2025 10:33:37 +0000 Subject: [PATCH 36/47] tmp: Remove newline. Signed-off-by: Luis Silva --- gcc/config/riscv/riscv.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 178006a9cb80..b7d4c1cebd3c 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11489,7 +11489,6 @@ riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, return new_cost; } - /* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can can be scheduled for speculative execution. Reject vsetvl instructions to prevent the scheduler from hoisting them out of basic blocks without From bca8fced20bbc06015aea371a2c1df9c71973c57 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 10:37:28 +0000 Subject: [PATCH 37/47] tmp: arcv: Revert riscv_macro_fusion_p to static. Signed-off-by: Luis Silva --- gcc/config/riscv/riscv-protos.h | 2 -- gcc/config/riscv/riscv.cc | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 5e585148861b..88bc33cd58fd 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -822,8 +822,6 @@ extern unsigned int th_int_get_save_adjustment (void); extern rtx th_int_adjust_cfi_prologue (unsigned int); extern const char *th_asm_output_opcode (FILE *asm_out_file, const char *p); -extern bool riscv_macro_fusion_p (); - #ifdef RTX_CODE extern const char* th_mempair_output_move (rtx[4], bool, machine_mode, RTX_CODE); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index b7d4c1cebd3c..03cdb35f4ee9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10674,7 +10674,7 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports instruction fusion of some sort. */ -bool +static bool riscv_macro_fusion_p (void) { return tune_param->fusible_ops != RISCV_FUSE_NOTHING; From 46a8a4e0746c3040f280fd7fec87f485ccc5950b Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 10:40:23 +0000 Subject: [PATCH 38/47] tmp: arcv: arcv-rhx100.md: Remove commented code. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv-rhx100.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md index 398f13131606..709ac87a2c0c 100644 --- a/gcc/config/riscv/arcv-rhx100.md +++ b/gcc/config/riscv/arcv-rhx100.md @@ -93,16 +93,9 @@ (eq_attr "type" "fdiv,fsqrt")) "arcv_rhx100_fdivsqrt*20") -;(final_presence_set "arcv_rhx100_issueA_fuse1" "arcv_rhx100_issueA_fuse0") -;(final_presence_set "arcv_rhx100_issueB_fuse1" "arcv_rhx100_issueB_fuse0") -;(final_presence_set "arcv_rhx100_ALU_A_fuse1_early" "arcv_rhx100_ALU_A_fuse0_early") -;(final_presence_set "arcv_rhx100_ALU_B_fuse1_early" "arcv_rhx100_ALU_B_fuse0_early") - ;; Bypasses -;(define_bypass 0 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") (define_bypass 1 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") -;(define_bypass 0 "arcv_rhx100_load_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") (define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") (define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_alu_early_arith") (define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_mpy*_insn") From 222c326f083b5f08d39d0d9a84d327d32e0b2158 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 10:41:34 +0000 Subject: [PATCH 39/47] tmp: arcv-rhx100.md: Update copyright year.. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv-rhx100.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md index 709ac87a2c0c..7cbabac29a58 100644 --- a/gcc/config/riscv/arcv-rhx100.md +++ b/gcc/config/riscv/arcv-rhx100.md @@ -1,6 +1,6 @@ ;; DFA scheduling description of the Synopsys RHX-100 cpu ;; for GNU C compiler -;; Copyright (C) 2023 Free Software Foundation, Inc. +;; Copyright (C) 2025 Free Software Foundation, Inc. ;; This file is part of GCC. From 55cf42fc76114229ba35365bf8b4b003a1257545 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 10:53:52 +0000 Subject: [PATCH 40/47] tmp: arcv.{cc,h}: Update header. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 2 +- gcc/config/riscv/arcv.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 17de645a93ac..74a19a254fa0 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -1,4 +1,4 @@ -/* ARCV-specific macro-op fusion for RISC-V. +/* Subroutines used for code generation for Synopsys ARC-V processors. Copyright (C) 2025 Free Software Foundation, Inc. This file is part of GCC. diff --git a/gcc/config/riscv/arcv.h b/gcc/config/riscv/arcv.h index 01b82b9065f7..c42cd75b0275 100644 --- a/gcc/config/riscv/arcv.h +++ b/gcc/config/riscv/arcv.h @@ -1,4 +1,4 @@ -/* ARCV-specific macro-op fusion for RISC-V. +/* Definition of Synopsys ARC-V processors. Copyright (C) 2025 Free Software Foundation, Inc. This file is part of GCC. From f77ad5e34b7b1ddb23bb353f2a0a0a86c21374ab Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 11:04:50 +0000 Subject: [PATCH 41/47] tmp: riscv-c.cc: Remove __riscv_rhx Signed-off-by: Luis Silva --- gcc/config/riscv/riscv-c.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index c669e889824b..d497326e0611 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -149,9 +149,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) builtin_define_with_int_value ("__riscv_th_v_intrinsic", riscv_ext_version_value (0, 11)); - if (TARGET_ARCV_RHX100) - builtin_define ("__riscv_rhx"); - /* Define architecture extension test macros. */ builtin_define_with_int_value ("__riscv_arch_test", 1); From e242bd83cf7a986999a9b7e67c53bfb98253f068 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 11:07:02 +0000 Subject: [PATCH 42/47] fixup! tmp: fusion: add arcv.{cc,h} files. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 74a19a254fa0..90a09ef51b32 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -365,10 +365,6 @@ arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) bool arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) { - /* Never create sched groups with more than 2 members. */ - if (SCHED_GROUP_P (prev)) - return false; - rtx prev_set = single_set (prev); rtx curr_set = single_set (curr); From b4b8c831dde6868731addf98b3812483b579f4d2 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 11:14:15 +0000 Subject: [PATCH 43/47] fixup! tmp: fusion: add arcv.{cc,h} files. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 90a09ef51b32..533a24a30208 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -382,11 +382,19 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) /* Check if multiply result is used in either operand of the addition. */ if (REG_P (XEXP (curr_plus, 0)) && REGNO (XEXP (curr_plus, 0)) == mult_dest_regno) - return true; + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MULT_ADD (op0)\n"); + return true; + } if (REG_P (XEXP (curr_plus, 1)) && REGNO (XEXP (curr_plus, 1)) == mult_dest_regno) - return true; + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MULT_ADD (op1)\n"); + return true; + } } /* Fuse logical shift left with logical shift right (bit-extract pattern): @@ -397,7 +405,11 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) - return true; + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_SHIFT_BITEXTRACT\n"); + return true; + } /* Fuse load-immediate with a dependent conditional branch: prev: (set rd imm) @@ -412,8 +424,14 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) rtx comp = XEXP (SET_SRC (curr_set), 0); rtx prev_dest = SET_DEST (prev_set); - return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == prev_dest) - || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == prev_dest); + if ((REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == prev_dest) + || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == prev_dest)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_LI_BRANCH\n"); + return true; + } + return false; } /* Do not fuse loads/stores before sched2. */ @@ -432,7 +450,11 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && get_attr_type (curr) == TYPE_LOAD) { if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true)) - return true; + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_ADJACENT_LOAD\n"); + return true; + } } /* Fuse adjacent stores. */ @@ -440,7 +462,11 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && get_attr_type (curr) == TYPE_STORE) { if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false)) - return true; + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_ADJACENT_STORE\n"); + return true; + } } /* Look ahead 1 insn to prioritize adjacent load/store pairs. From f6a18d3e71aabeaf8c14b820b61ee16de3c2ce51 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 11:17:59 +0000 Subject: [PATCH 44/47] fixup! tmp: fusion: add arcv.{cc,h} files. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 42 ++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 533a24a30208..c1a7063341c0 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -493,15 +493,33 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) /* Fuse a pre- or post-update memory operation: Examples: load+add, add+load, store+add, add+store. */ - if (arcv_memop_arith_pair_p (prev, curr) - || arcv_memop_arith_pair_p (curr, prev)) - return true; + if (arcv_memop_arith_pair_p (prev, curr)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_ARITH (prev, curr)\n"); + return true; + } + if (arcv_memop_arith_pair_p (curr, prev)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_ARITH (curr, prev)\n"); + return true; + } /* Fuse a memory operation preceded or followed by a LUI: Examples: load+lui, lui+load, store+lui, lui+store. */ - if (arcv_memop_lui_pair_p (prev, curr) - || arcv_memop_lui_pair_p (curr, prev)) - return true; + if (arcv_memop_lui_pair_p (prev, curr)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_LUI (prev, curr)\n"); + return true; + } + if (arcv_memop_lui_pair_p (curr, prev)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_LUI (curr, prev)\n"); + return true; + } /* Fuse load-immediate with a store of the destination register: prev: (set rd imm) @@ -514,10 +532,18 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) rtx load_dest = SET_DEST (prev_set); if (REG_P (store_src) && store_src == load_dest) - return true; + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_LI_STORE\n"); + return true; + } if (SUBREG_P (store_src) && SUBREG_REG (store_src) == load_dest) - return true; + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_LI_STORE (subreg)\n"); + return true; + } } return false; From eb4055dbf2debdff898ffb023ab8f131d0431661 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 11:22:55 +0000 Subject: [PATCH 45/47] tmp: arcv.{cc,h}/riscv.cc: Remove unused variable. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 2 +- gcc/config/riscv/arcv.h | 2 +- gcc/config/riscv/riscv.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index c1a7063341c0..8354d838c72c 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -832,7 +832,7 @@ arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, bool -arcv_can_issue_more_p (rtx_insn *insn, int more) +arcv_can_issue_more_p (int more) { /* Beginning of cycle - reset variables. */ if (more == riscv_get_tune_param_issue_rate ()) diff --git a/gcc/config/riscv/arcv.h b/gcc/config/riscv/arcv.h index c42cd75b0275..f83d08d75732 100644 --- a/gcc/config/riscv/arcv.h +++ b/gcc/config/riscv/arcv.h @@ -21,7 +21,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_RISCV_ARCV_H /* ARCV scheduler interface functions. */ -extern bool arcv_can_issue_more_p (rtx_insn *, int); +extern bool arcv_can_issue_more_p (int); extern int arcv_sched_variable_issue (rtx_insn *, int); extern bool arcv_macro_fusion_pair_p (rtx_insn *, rtx_insn *); extern void arcv_sched_init (void); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 03cdb35f4ee9..fb5551cf6f16 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10642,7 +10642,7 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) { if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) - if (!arcv_can_issue_more_p (insn, more)) + if (!arcv_can_issue_more_p (more)) return 0; if (DEBUG_INSN_P (insn)) From eb71e74e55dd1bdf5c2f06beeda70f5a14b80ee8 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 11:24:49 +0000 Subject: [PATCH 46/47] fixup! arcv: add scheduling information for the Synopsys RMX-100 CPU Signed-off-by: Luis Silva --- gcc/config/riscv/arcv-rmx100.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md index 5a25dfb67cfc..29dde32a40c2 100644 --- a/gcc/config/riscv/arcv-rmx100.md +++ b/gcc/config/riscv/arcv-rmx100.md @@ -1,6 +1,6 @@ ;; DFA scheduling description of the Synopsys RMX-100 cpu ;; for GNU C compiler -;; Copyright (C) 2023 Free Software Foundation, Inc. +;; Copyright (C) 2025 Free Software Foundation, Inc. ;; This file is part of GCC. From 4f60a816bd1f2dd28e471b473460321e48e2a99c Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 18 Nov 2025 15:12:07 +0000 Subject: [PATCH 47/47] tmp: arcv: Fix incorrect condition and logic in reorder scheduler. Signed-off-by: Luis Silva --- gcc/config/riscv/arcv.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 8354d838c72c..f0a39a23cb77 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -670,22 +670,23 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) /* If all else fails, schedule a single instruction. */ if (ready && *n_readyp > 0 && NONDEBUG_INSN_P (ready[*n_readyp - 1]) - && recog_memoized (ready[*n_readyp - 1]) >= 0 - && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD - && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + && recog_memoized (ready[*n_readyp - 1]) >= 0) { + rtx_insn *insn = ready[*n_readyp - 1]; + enum attr_type insn_type = get_attr_type (insn); + + /* Memory operations go to pipeB if available. */ if (!sched_state.pipeB_scheduled_p - && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + && (insn_type == TYPE_LOAD || insn_type == TYPE_STORE)) { - sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1; + sched_state.pipeB_scheduled_p = 1; sched_state.cached_can_issue_more = 1; return 1; } - else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + /* Non-memory operations go to ALU pipe. */ + else if (insn_type != TYPE_LOAD && insn_type != TYPE_STORE) { - sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1; + sched_state.alu_pipe_scheduled_p = 1; sched_state.cached_can_issue_more = 1; return 1; }