LoongArch: C and .h files for LA32

gcc/ChangeLog: * config/loongarch/loongarch-opts.cc (loongarch_target_option_override): Delete opts->x_flag_pcc_struct_return and enable mstrict-align by default on LA32. * config/loongarch/loongarch.cc (loongarch_for_each_saved_reg): Save reg depend on float abi. (loongarch_explicit_relocs_p): Disable explicit relocs on LA32. (loongarch_valid_offset_p): Disable const_imm16_operand with 4 byte aligned. (loongarch_valid_lo_sum_p): Allow lo_sum to be used with DF in ilp32d. (loongarch_valid_index_p): Disable ADDRESS_REG_REG on LA32. (loongarch_legitimize_address): Disable mem_shadd_or_shadd_rtx_p on LA32. (loongarch_output_move_index): Assert TARGET_64BIT for ldx/stx. (loongarch_output_move): Disable ldptr/stptr if offset is 0. (loongarch_output_equal_conditional_branch): Disable beqz/bnez on LA32R. (loongarch_trampoline_init): Change pcaddi to pcaddu12i. (loongarch_get_separate_components): Disable ldptr/stptr on LA32. (loongarch_c_mode_for_floating_type): Use TFmode for long double regardless of target bitness. (loongarch_bitint_type_info): Disable BitInt on LA32. (loongarch_call_tls_get_addr): Use call30 on LA32. (loongarch_split_move): Add split for DI, DF, TF. * config/loongarch/loongarch.h (LA_LONG_DOUBLE_TYPE_SIZE): Set LONG_DOUBLE_TYPE_SIZE to 128 regardless of target bitness. (MAX_FIXED_MODE_SIZE): Set to 64 on LA32. (DEFAULT_PCC_STRUCT_RETURN): New. (STACK_BOUNDARY): Set to 128 on LA64 and LA32. (LARCH_STACK_ALIGN): Set to 16 on LA64 and LA32. (TRAMPOLINE_SIZE): Set to same value on LA64 and LA32. include/ChangeLog: * longlong.h (count_leading_zeros): Delete because LA32R no clz. (count_trailing_zeros): Delete because LA32R no ctz. (COUNT_LEADING_ZEROS_0): Delete. Co-authored-by: Jiajie Chen <c@jia.je> Reviewed-by: Xi Ruoyao <xry111@xry111.site> Reviewed-by: Lulu Cheng <chenglulu@loongson.cn>
2026-02-21 19:35:28 -05:00 · 2025-11-25 19:07:42 +08:00
parent f5ae30e614
commit 5c821cbae4
4 changed files with 120 additions and 50 deletions
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -358,6 +358,12 @@ config_target_isa:
 	  }
      }

+  /* TARGET_32BIT and TARGET_64BIT init at the end of this function,
+     can't use here.  */
+  if ((t.isa.base == ISA_BASE_LA32 || t.isa.base == ISA_BASE_LA32R)
+      && (t.isa.simd == ISA_EXT_SIMD_LSX || t.isa.simd == ISA_EXT_SIMD_LASX))
+    fatal_error (UNKNOWN_LOCATION, "SIMD is not supported on LA32");
+
  /* All SIMD extensions imply a 64-bit FPU:
     - silently adjust t.isa.fpu to "fpu64" if it is unconstrained.
     - warn if -msingle-float / -msoft-float is on,
@@ -557,7 +563,15 @@ fallback:

    case CMODEL_NORMAL:
    case CMODEL_MEDIUM:
+      break;
+
    case CMODEL_EXTREME:
+      if (t.isa.base == ISA_BASE_LA32 || t.isa.base == ISA_BASE_LA32R)
+	{
+	  warning (0, "%qs is not supported, now cmodel is set to %qs",
+		   loongarch_cmodel_strings[t.cmodel], "normal");
+	  t.cmodel = CMODEL_NORMAL;
+	}
      break;

    default:
@@ -570,6 +584,10 @@ fallback:
  /* Cleanup and return.  */
  obstack_free (&msg_obstack, NULL);
  *target = t;
+
+  /* TODO: mexplicit-relocs support for LA32.  */
+  if (TARGET_32BIT)
+    la_opt_explicit_relocs = EXPLICIT_RELOCS_NONE;
 }

 /* Returns the default ABI for the given instruction set.  */
@@ -1039,9 +1057,9 @@ loongarch_target_option_override (struct loongarch_target *target,
  if (!opts_set->x_la_addr_reg_reg_cost)
    opts->x_la_addr_reg_reg_cost = loongarch_cost->addr_reg_reg_cost;

-  /* other stuff */
-  if (ABI_LP64_P (target->abi.base))
-    opts->x_flag_pcc_struct_return = 0;
+  /* Enable -mstrict-align by default on LA32.  */
+  if (TARGET_32BIT && !(opts_set->x_target_flags & MASK_STRICT_ALIGN))
+    opts->x_target_flags |= MASK_STRICT_ALIGN;

  switch (target->cmodel)
    {
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1051,7 +1051,7 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
      {
 	if (!cfun->machine->reg_is_wrapped_separately[regno])
-	  loongarch_save_restore_reg (word_mode, regno, offset, fn);
+	  loongarch_save_restore_reg (mode, regno, offset, fn);

 	offset -= GET_MODE_SIZE (mode);
      }
@@ -2289,6 +2289,9 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
 bool
 loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
 {
+  if (TARGET_32BIT)
+    return false;
+
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
    return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;

@@ -2445,7 +2448,9 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
     or check that X is a signed 16-bit number
     and offset 4 byte aligned.  */
  if (!(const_arith_operand (x, Pmode)
-	|| ((mode == E_SImode || mode == E_DImode)
+	/* FIXME: la32 atomic insns support 16-bit imm.  */
+	|| (TARGET_64BIT
+	    && (mode == E_SImode || mode == E_DImode)
 	    && const_imm16_operand (x, Pmode)
 	    && (loongarch_signed_immediate_p (INTVAL (x), 14, 2)))))
    return false;
@@ -2491,7 +2496,7 @@ static bool
 loongarch_valid_lo_sum_p (enum loongarch_symbol_type symbol_type,
 			  machine_mode mode, rtx x)
 {
-  int align, size;
+  int align, size, word_size;

  /* Check that symbols of type SYMBOL_TYPE can be used to access values
     of mode MODE.  */
@@ -2532,7 +2537,10 @@ loongarch_valid_lo_sum_p (enum loongarch_symbol_type symbol_type,

  /* We may need to split multiword moves, so make sure that each word
     can be accessed without inducing a carry.  */
-  if (size > BITS_PER_WORD
+  word_size = (GET_MODE_CLASS (mode) == MODE_FLOAT
+	       ? (UNITS_PER_HWFPVALUE * BITS_PER_UNIT)
+	       : BITS_PER_WORD);
+  if (size > word_size
      && (!TARGET_STRICT_ALIGN || size > align))
    return false;

@@ -2558,7 +2566,8 @@ loongarch_valid_index_p (struct loongarch_address_info *info, rtx x,
      && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
    index = SUBREG_REG (index);

-  if (loongarch_valid_base_register_p (index, mode, strict_p))
+  /* LA32 does not provide LDX/STX.  */
+  if (TARGET_64BIT && loongarch_valid_base_register_p (index, mode, strict_p))
    {
      info->type = ADDRESS_REG_REG;
      info->offset = index;
@@ -3142,19 +3151,22 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
 		{
 		  rtx call;

-		 if (HAVE_AS_SUPPORT_CALL36)
-		   call = gen_call_value_internal (v0, loongarch_tls_symbol,
-						   const0_rtx);
-		 else
-		   {
-		     rtx reg = gen_reg_rtx (Pmode);
-		     emit_insn (gen_pcalau12i (Pmode, reg,
-					       loongarch_tls_symbol));
-		     call = gen_call_value_internal_1 (Pmode, v0, reg,
-						       loongarch_tls_symbol,
-						       const0_rtx);
-		   }
-		 insn = emit_call_insn (call);
+		  /* Use call36 or call30.
+		    TARGET_32BIT always support call30.  */
+		  if ((TARGET_64BIT && HAVE_AS_SUPPORT_CALL36)
+		      || TARGET_32BIT)
+		    call = gen_call_value_internal (v0, loongarch_tls_symbol,
+						    const0_rtx);
+		  else
+		    {
+		      rtx reg = gen_reg_rtx (Pmode);
+		      emit_insn (gen_pcalau12i (Pmode, reg,
+						loongarch_tls_symbol));
+		      call = gen_call_value_internal_1 (Pmode, v0, reg,
+							loongarch_tls_symbol,
+							const0_rtx);
+		    }
+		  insn = emit_call_insn (call);
 		}
 	      else
 		{
@@ -3608,7 +3620,9 @@ loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
  if (offset != 0)
    {
      /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case.  */
-      if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
+      if ((TARGET_64BIT || TARGET_32BIT_S)
+	  && GET_CODE (base) == PLUS
+	  && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
 	  && IMM12_OPERAND (offset))
 	{
 	  rtx index = XEXP (base, 0);
@@ -4899,12 +4913,41 @@ loongarch_split_move_p (rtx dest, rtx src)
 void
 loongarch_split_move (rtx dest, rtx src)
 {
+  rtx low_dest;
+
  gcc_checking_assert (loongarch_split_move_p (dest, src));
  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
      || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
    loongarch_split_vector_move (dest, src);
+  else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
+    {
+      if (TARGET_32BIT && GET_MODE (dest) == DImode)
+	emit_insn (gen_move_doubleword_2_di (dest, src));
+      else if (TARGET_32BIT && GET_MODE (dest) == DFmode)
+	emit_insn (gen_move_doubleword_2_df (dest, src));
+      else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
+	emit_insn (gen_move_doubleword_2_tf (dest, src));
+      else
+	gcc_unreachable ();
+    }
  else
-    gcc_unreachable ();
+    {
+      /* The operation can be split into two normal moves.  Decide in
+	 which order to do them.  */
+      low_dest = loongarch_subword (dest, false);
+      if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
+	{
+	  loongarch_emit_move (loongarch_subword (dest, true),
+			       loongarch_subword (src, true));
+	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+	}
+      else
+	{
+	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+	  loongarch_emit_move (loongarch_subword (dest, true),
+			       loongarch_subword (src, true));
+	}
+    }
 }

 /* Check if adding an integer constant value for a specific mode can be
@@ -5033,6 +5076,7 @@ loongarch_output_move_index (rtx x, machine_mode mode, bool ldr)
      }
    };

+  gcc_assert (TARGET_64BIT);
  return insn[ldr][index];
 }

@@ -5284,10 +5328,14 @@ loongarch_output_move (rtx *operands)
 	      /* Matching address type with a 12bit offset and
 		 ADDRESS_LO_SUM.  */
 	      if (const_arith_operand (offset, Pmode)
-		  || GET_CODE (offset) == LO_SUM)
+		  || GET_CODE (offset) == LO_SUM
+		  || GET_CODE (XEXP (dest, 0)) == REG)
 		return "st.w\t%z1,%0";
 	      else
-		return "stptr.w\t%z1,%0";
+		{
+		  gcc_assert (TARGET_64BIT);
+		  return "stptr.w\t%z1,%0";
+		}
 	    case 8:
 	      if (const_arith_operand (offset, Pmode)
 		  || GET_CODE (offset) == LO_SUM)
@@ -5329,10 +5377,14 @@ loongarch_output_move (rtx *operands)
 	      /* Matching address type with a 12bit offset and
 		 ADDRESS_LO_SUM.  */
 	      if (const_arith_operand (offset, Pmode)
-		  || GET_CODE (offset) == LO_SUM)
+		  || GET_CODE (offset) == LO_SUM
+		  || GET_CODE (XEXP (src, 0)) == REG)
 		return "ld.w\t%0,%1";
 	      else
-		return "ldptr.w\t%0,%1";
+		{
+		  gcc_assert (TARGET_64BIT);
+		  return "ldptr.w\t%0,%1";
+		}
 	    case 8:
 	      if (const_arith_operand (offset, Pmode)
 		  || GET_CODE (offset) == LO_SUM)
@@ -7840,7 +7892,8 @@ loongarch_output_equal_conditional_branch (rtx_insn *insn, rtx *operands,
 					   bool inverted_p)
 {
  const char *branch[2];
-  if (operands[3] == const0_rtx)
+  if ((TARGET_64BIT || TARGET_32BIT_S)
+      && operands[3] == const0_rtx)
    {
      branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0");
      branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0");
@@ -8508,11 +8561,11 @@ loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)

  /* Build up the code in TRAMPOLINE.  */
  i = 0;
-  /*pcaddi $static_chain,0
+  /*pcaddu12i $static_chain,0
    ld.[dw] $tmp,$static_chain,target_function_offset
    ld.[dw] $static_chain,$static_chain,static_chain_offset
    jirl $r0,$tmp,0  */
-  trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
+  trampoline[i++] = OP (0x1c000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
  trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
 			| 19 /* $t7  */
 			| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
@@ -8721,11 +8774,9 @@ loongarch_get_separate_components (void)
 	/* We can wrap general registers saved at [sp, sp + 32768) using the
 	   ldptr/stptr instructions.  For large offsets a pseudo register
 	   might be needed which cannot be created during the shrink
-	   wrapping pass.
-
-	   TODO: This may need a revise when we add LA32 as ldptr.w is not
-	   guaranteed available by the manual.  */
-	if (offset < 32768)
+	   wrapping pass.  */
+	if ((TARGET_64BIT && IMM16_OPERAND (offset))
+	    || IMM12_OPERAND (offset))
 	  bitmap_set_bit (components, regno);

 	offset -= UNITS_PER_WORD;
@@ -11323,7 +11374,7 @@ static machine_mode
 loongarch_c_mode_for_floating_type (enum tree_index ti)
 {
  if (ti == TI_LONG_DOUBLE_TYPE)
-    return TARGET_64BIT ? TFmode : DFmode;
+    return TFmode;
  return default_mode_for_floating_type (ti);
 }

@@ -11393,6 +11444,10 @@ loongarch_c_mode_for_suffix (char suffix)
 bool
 loongarch_bitint_type_info (int n, struct bitint_info *info)
 {
+  /* LA32 not support BitInt.  */
+  if (TARGET_32BIT)
+    return false;
+
  if (n <= 8)
    info->limb_mode = QImode;
  else if (n <= 16)
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -158,7 +158,7 @@ along with GCC; see the file COPYING3.  If not see
 #define LONG_LONG_TYPE_SIZE 64

 /* LONG_DOUBLE_TYPE_SIZE get poisoned, so add LA_ prefix.  */
-#define LA_LONG_DOUBLE_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+#define LA_LONG_DOUBLE_TYPE_SIZE 128

 /* Define the sizes of fixed-point types.  */
 #define SHORT_FRACT_TYPE_SIZE 8
@@ -171,9 +171,9 @@ along with GCC; see the file COPYING3.  If not see
 #define LONG_ACCUM_TYPE_SIZE 64
 #define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64)

-/* long double is not a fixed mode, but the idea is that, if we
-   support long double, we also want a 128-bit integer type.  */
-#define MAX_FIXED_MODE_SIZE LA_LONG_DOUBLE_TYPE_SIZE
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_64BIT ? TImode : DImode)

 /* Width in bits of a pointer.  */
 #ifndef POINTER_SIZE
@@ -656,6 +656,9 @@ enum reg_class

 #define REG_PARM_STACK_SPACE(FNDECL) 0

+/* If the size of struct <= 2 * GRLEN, pass by registers if available.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
 /* Define this if it is the responsibility of the caller to
   allocate the area reserved for arguments passed in registers.
   If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect
@@ -663,7 +666,7 @@ enum reg_class
   `crtl->outgoing_args_size'.  */
 #define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1

-#define STACK_BOUNDARY (TARGET_ABI_LP64 ? 128 : 64)
+#define STACK_BOUNDARY 128

 /* This value controls how many pages we manually unroll the loop for when
   generating stack clash probes.  */
@@ -750,8 +753,7 @@ typedef struct {

 /* Treat LOC as a byte offset from the stack pointer and round it up
   to the next fully-aligned offset.  */
-#define LARCH_STACK_ALIGN(LOC) \
-  (TARGET_ABI_LP64 ? ROUND_UP ((LOC), 16) : ROUND_UP ((LOC), 8))
+#define LARCH_STACK_ALIGN(LOC) ROUND_UP ((LOC), 16)

 #define MCOUNT_NAME "_mcount"

@@ -781,8 +783,7 @@ typedef struct {

 #define TRAMPOLINE_CODE_SIZE 16
 #define TRAMPOLINE_SIZE \
-  ((Pmode == SImode) ? TRAMPOLINE_CODE_SIZE \
-		     : (TRAMPOLINE_CODE_SIZE + POINTER_SIZE * 2))
+	  (TRAMPOLINE_CODE_SIZE + GET_MODE_SIZE (ptr_mode) * 2)
 #define TRAMPOLINE_ALIGNMENT POINTER_SIZE

 /* loongarch_trampoline_init calls this library function to flush
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -594,11 +594,7 @@ extern UDItype __umulsidi3 (USItype, USItype);
 #endif

 #ifdef __loongarch__
-# if W_TYPE_SIZE == 32
-#  define count_leading_zeros(count, x)  ((count) = __builtin_clz (x))
-#  define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
-#  define COUNT_LEADING_ZEROS_0 32
-# elif W_TYPE_SIZE == 64
+# if W_TYPE_SIZE == 64
 #  define count_leading_zeros(count, x)  ((count) = __builtin_clzll (x))
 #  define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
 #  define COUNT_LEADING_ZEROS_0 64