Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   203  				pendingLines.remove(vl)
   204  				v.Pos = v.Pos.WithIsStmt()
   205  			}
   206  			if i != j {
   207  				b.Values[j] = v
   208  			}
   209  			j++
   210  		}
   211  		if pendingLines.get(b.Pos) == int32(b.ID) {
   212  			b.Pos = b.Pos.WithIsStmt()
   213  			pendingLines.remove(b.Pos)
   214  		}
   215  		b.truncateValues(j)
   216  	}
   217  }
   218  
   219  // Common functions called from rewriting rules
   220  
   221  func is64BitFloat(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsFloat()
   223  }
   224  
   225  func is32BitFloat(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsFloat()
   227  }
   228  
   229  func is64BitInt(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsInteger()
   231  }
   232  
   233  func is32BitInt(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsInteger()
   235  }
   236  
   237  func is16BitInt(t *types.Type) bool {
   238  	return t.Size() == 2 && t.IsInteger()
   239  }
   240  
   241  func is8BitInt(t *types.Type) bool {
   242  	return t.Size() == 1 && t.IsInteger()
   243  }
   244  
   245  func isPtr(t *types.Type) bool {
   246  	return t.IsPtrShaped()
   247  }
   248  
   249  // mergeSym merges two symbolic offsets. There is no real merging of
   250  // offsets, we just pick the non-nil one.
   251  func mergeSym(x, y Sym) Sym {
   252  	if x == nil {
   253  		return y
   254  	}
   255  	if y == nil {
   256  		return x
   257  	}
   258  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   259  }
   260  
   261  func canMergeSym(x, y Sym) bool {
   262  	return x == nil || y == nil
   263  }
   264  
   265  // canMergeLoadClobber reports whether the load can be merged into target without
   266  // invalidating the schedule.
   267  // It also checks that the other non-load argument x is something we
   268  // are ok with clobbering.
   269  func canMergeLoadClobber(target, load, x *Value) bool {
   270  	// The register containing x is going to get clobbered.
   271  	// Don't merge if we still need the value of x.
   272  	// We don't have liveness information here, but we can
   273  	// approximate x dying with:
   274  	//  1) target is x's only use.
   275  	//  2) target is not in a deeper loop than x.
   276  	if x.Uses != 1 {
   277  		return false
   278  	}
   279  	loopnest := x.Block.Func.loopnest()
   280  	loopnest.calculateDepths()
   281  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   282  		return false
   283  	}
   284  	return canMergeLoad(target, load)
   285  }
   286  
   287  // canMergeLoad reports whether the load can be merged into target without
   288  // invalidating the schedule.
   289  func canMergeLoad(target, load *Value) bool {
   290  	if target.Block.ID != load.Block.ID {
   291  		// If the load is in a different block do not merge it.
   292  		return false
   293  	}
   294  
   295  	// We can't merge the load into the target if the load
   296  	// has more than one use.
   297  	if load.Uses != 1 {
   298  		return false
   299  	}
   300  
   301  	mem := load.MemoryArg()
   302  
   303  	// We need the load's memory arg to still be alive at target. That
   304  	// can't be the case if one of target's args depends on a memory
   305  	// state that is a successor of load's memory arg.
   306  	//
   307  	// For example, it would be invalid to merge load into target in
   308  	// the following situation because newmem has killed oldmem
   309  	// before target is reached:
   310  	//     load = read ... oldmem
   311  	//   newmem = write ... oldmem
   312  	//     arg0 = read ... newmem
   313  	//   target = add arg0 load
   314  	//
   315  	// If the argument comes from a different block then we can exclude
   316  	// it immediately because it must dominate load (which is in the
   317  	// same block as target).
   318  	var args []*Value
   319  	for _, a := range target.Args {
   320  		if a != load && a.Block.ID == target.Block.ID {
   321  			args = append(args, a)
   322  		}
   323  	}
   324  
   325  	// memPreds contains memory states known to be predecessors of load's
   326  	// memory state. It is lazily initialized.
   327  	var memPreds map[*Value]bool
   328  	for i := 0; len(args) > 0; i++ {
   329  		const limit = 100
   330  		if i >= limit {
   331  			// Give up if we have done a lot of iterations.
   332  			return false
   333  		}
   334  		v := args[len(args)-1]
   335  		args = args[:len(args)-1]
   336  		if target.Block.ID != v.Block.ID {
   337  			// Since target and load are in the same block
   338  			// we can stop searching when we leave the block.
   339  			continue
   340  		}
   341  		if v.Op == OpPhi {
   342  			// A Phi implies we have reached the top of the block.
   343  			// The memory phi, if it exists, is always
   344  			// the first logical store in the block.
   345  			continue
   346  		}
   347  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   348  			// We could handle this situation however it is likely
   349  			// to be very rare.
   350  			return false
   351  		}
   352  		if v.Op.SymEffect()&SymAddr != 0 {
   353  			// This case prevents an operation that calculates the
   354  			// address of a local variable from being forced to schedule
   355  			// before its corresponding VarDef.
   356  			// See issue 28445.
   357  			//   v1 = LOAD ...
   358  			//   v2 = VARDEF
   359  			//   v3 = LEAQ
   360  			//   v4 = CMPQ v1 v3
   361  			// We don't want to combine the CMPQ with the load, because
   362  			// that would force the CMPQ to schedule before the VARDEF, which
   363  			// in turn requires the LEAQ to schedule before the VARDEF.
   364  			return false
   365  		}
   366  		if v.Type.IsMemory() {
   367  			if memPreds == nil {
   368  				// Initialise a map containing memory states
   369  				// known to be predecessors of load's memory
   370  				// state.
   371  				memPreds = make(map[*Value]bool)
   372  				m := mem
   373  				const limit = 50
   374  				for i := 0; i < limit; i++ {
   375  					if m.Op == OpPhi {
   376  						// The memory phi, if it exists, is always
   377  						// the first logical store in the block.
   378  						break
   379  					}
   380  					if m.Block.ID != target.Block.ID {
   381  						break
   382  					}
   383  					if !m.Type.IsMemory() {
   384  						break
   385  					}
   386  					memPreds[m] = true
   387  					if len(m.Args) == 0 {
   388  						break
   389  					}
   390  					m = m.MemoryArg()
   391  				}
   392  			}
   393  
   394  			// We can merge if v is a predecessor of mem.
   395  			//
   396  			// For example, we can merge load into target in the
   397  			// following scenario:
   398  			//      x = read ... v
   399  			//    mem = write ... v
   400  			//   load = read ... mem
   401  			// target = add x load
   402  			if memPreds[v] {
   403  				continue
   404  			}
   405  			return false
   406  		}
   407  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   408  			// If v takes mem as an input then we know mem
   409  			// is valid at this point.
   410  			continue
   411  		}
   412  		for _, a := range v.Args {
   413  			if target.Block.ID == a.Block.ID {
   414  				args = append(args, a)
   415  			}
   416  		}
   417  	}
   418  
   419  	return true
   420  }
   421  
   422  // isSameCall reports whether sym is the same as the given named symbol.
   423  func isSameCall(sym interface{}, name string) bool {
   424  	fn := sym.(*AuxCall).Fn
   425  	return fn != nil && fn.String() == name
   426  }
   427  
   428  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   429  func canLoadUnaligned(c *Config) bool {
   430  	return c.ctxt.Arch.Alignment == 1
   431  }
   432  
   433  // nlzX returns the number of leading zeros.
   434  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   435  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   436  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   437  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   438  
   439  // ntzX returns the number of trailing zeros.
   440  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   441  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   442  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   443  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   444  
   445  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   446  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   447  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   448  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   449  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   450  
   451  // nto returns the number of trailing ones.
   452  func nto(x int64) int64 {
   453  	return int64(ntz64(^x))
   454  }
   455  
   456  // logX returns logarithm of n base 2.
   457  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   458  func log8(n int8) int64 {
   459  	return int64(bits.Len8(uint8(n))) - 1
   460  }
   461  func log16(n int16) int64 {
   462  	return int64(bits.Len16(uint16(n))) - 1
   463  }
   464  func log32(n int32) int64 {
   465  	return int64(bits.Len32(uint32(n))) - 1
   466  }
   467  func log64(n int64) int64 {
   468  	return int64(bits.Len64(uint64(n))) - 1
   469  }
   470  
   471  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   472  // Rounds down.
   473  func log2uint32(n int64) int64 {
   474  	return int64(bits.Len32(uint32(n))) - 1
   475  }
   476  
   477  // isPowerOfTwoX functions report whether n is a power of 2.
   478  func isPowerOfTwo8(n int8) bool {
   479  	return n > 0 && n&(n-1) == 0
   480  }
   481  func isPowerOfTwo16(n int16) bool {
   482  	return n > 0 && n&(n-1) == 0
   483  }
   484  func isPowerOfTwo32(n int32) bool {
   485  	return n > 0 && n&(n-1) == 0
   486  }
   487  func isPowerOfTwo64(n int64) bool {
   488  	return n > 0 && n&(n-1) == 0
   489  }
   490  
   491  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   492  func isUint64PowerOfTwo(in int64) bool {
   493  	n := uint64(in)
   494  	return n > 0 && n&(n-1) == 0
   495  }
   496  
   497  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   498  func isUint32PowerOfTwo(in int64) bool {
   499  	n := uint64(uint32(in))
   500  	return n > 0 && n&(n-1) == 0
   501  }
   502  
   503  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   504  func is32Bit(n int64) bool {
   505  	return n == int64(int32(n))
   506  }
   507  
   508  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   509  func is16Bit(n int64) bool {
   510  	return n == int64(int16(n))
   511  }
   512  
   513  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   514  func is8Bit(n int64) bool {
   515  	return n == int64(int8(n))
   516  }
   517  
   518  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   519  func isU8Bit(n int64) bool {
   520  	return n == int64(uint8(n))
   521  }
   522  
   523  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   524  func isU12Bit(n int64) bool {
   525  	return 0 <= n && n < (1<<12)
   526  }
   527  
   528  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   529  func isU16Bit(n int64) bool {
   530  	return n == int64(uint16(n))
   531  }
   532  
   533  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   534  func isU32Bit(n int64) bool {
   535  	return n == int64(uint32(n))
   536  }
   537  
   538  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   539  func is20Bit(n int64) bool {
   540  	return -(1<<19) <= n && n < (1<<19)
   541  }
   542  
   543  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   544  func b2i(b bool) int64 {
   545  	if b {
   546  		return 1
   547  	}
   548  	return 0
   549  }
   550  
   551  // b2i32 translates a boolean value to 0 or 1.
   552  func b2i32(b bool) int32 {
   553  	if b {
   554  		return 1
   555  	}
   556  	return 0
   557  }
   558  
   559  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   560  // A shift is bounded if it is shifting by less than the width of the shifted value.
   561  func shiftIsBounded(v *Value) bool {
   562  	return v.AuxInt != 0
   563  }
   564  
   565  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   566  // generated code as much as possible.
   567  func canonLessThan(x, y *Value) bool {
   568  	if x.Op != y.Op {
   569  		return x.Op < y.Op
   570  	}
   571  	if !x.Pos.SameFileAndLine(y.Pos) {
   572  		return x.Pos.Before(y.Pos)
   573  	}
   574  	return x.ID < y.ID
   575  }
   576  
   577  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   578  // of the mantissa. It will panic if the truncation results in lost information.
   579  func truncate64Fto32F(f float64) float32 {
   580  	if !isExactFloat32(f) {
   581  		panic("truncate64Fto32F: truncation is not exact")
   582  	}
   583  	if !math.IsNaN(f) {
   584  		return float32(f)
   585  	}
   586  	// NaN bit patterns aren't necessarily preserved across conversion
   587  	// instructions so we need to do the conversion manually.
   588  	b := math.Float64bits(f)
   589  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   590  	//          | sign                  | exponent   | mantissa       |
   591  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   592  	return math.Float32frombits(r)
   593  }
   594  
   595  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   596  // pattern of the mantissa.
   597  func extend32Fto64F(f float32) float64 {
   598  	if !math.IsNaN(float64(f)) {
   599  		return float64(f)
   600  	}
   601  	// NaN bit patterns aren't necessarily preserved across conversion
   602  	// instructions so we need to do the conversion manually.
   603  	b := uint64(math.Float32bits(f))
   604  	//   | sign                  | exponent      | mantissa                    |
   605  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   606  	return math.Float64frombits(r)
   607  }
   608  
   609  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   610  func DivisionNeedsFixUp(v *Value) bool {
   611  	return v.AuxInt == 0
   612  }
   613  
   614  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   615  func auxFrom64F(f float64) int64 {
   616  	if f != f {
   617  		panic("can't encode a NaN in AuxInt field")
   618  	}
   619  	return int64(math.Float64bits(f))
   620  }
   621  
   622  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   623  func auxFrom32F(f float32) int64 {
   624  	if f != f {
   625  		panic("can't encode a NaN in AuxInt field")
   626  	}
   627  	return int64(math.Float64bits(extend32Fto64F(f)))
   628  }
   629  
   630  // auxTo32F decodes a float32 from the AuxInt value provided.
   631  func auxTo32F(i int64) float32 {
   632  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   633  }
   634  
   635  // auxTo64F decodes a float64 from the AuxInt value provided.
   636  func auxTo64F(i int64) float64 {
   637  	return math.Float64frombits(uint64(i))
   638  }
   639  
   640  func auxIntToBool(i int64) bool {
   641  	if i == 0 {
   642  		return false
   643  	}
   644  	return true
   645  }
   646  func auxIntToInt8(i int64) int8 {
   647  	return int8(i)
   648  }
   649  func auxIntToInt16(i int64) int16 {
   650  	return int16(i)
   651  }
   652  func auxIntToInt32(i int64) int32 {
   653  	return int32(i)
   654  }
   655  func auxIntToInt64(i int64) int64 {
   656  	return i
   657  }
   658  func auxIntToUint8(i int64) uint8 {
   659  	return uint8(i)
   660  }
   661  func auxIntToFloat32(i int64) float32 {
   662  	return float32(math.Float64frombits(uint64(i)))
   663  }
   664  func auxIntToFloat64(i int64) float64 {
   665  	return math.Float64frombits(uint64(i))
   666  }
   667  func auxIntToValAndOff(i int64) ValAndOff {
   668  	return ValAndOff(i)
   669  }
   670  func auxIntToArm64BitField(i int64) arm64BitField {
   671  	return arm64BitField(i)
   672  }
   673  func auxIntToInt128(x int64) int128 {
   674  	if x != 0 {
   675  		panic("nonzero int128 not allowed")
   676  	}
   677  	return 0
   678  }
   679  func auxIntToFlagConstant(x int64) flagConstant {
   680  	return flagConstant(x)
   681  }
   682  
   683  func auxIntToOp(cc int64) Op {
   684  	return Op(cc)
   685  }
   686  
   687  func boolToAuxInt(b bool) int64 {
   688  	if b {
   689  		return 1
   690  	}
   691  	return 0
   692  }
   693  func int8ToAuxInt(i int8) int64 {
   694  	return int64(i)
   695  }
   696  func int16ToAuxInt(i int16) int64 {
   697  	return int64(i)
   698  }
   699  func int32ToAuxInt(i int32) int64 {
   700  	return int64(i)
   701  }
   702  func int64ToAuxInt(i int64) int64 {
   703  	return int64(i)
   704  }
   705  func uint8ToAuxInt(i uint8) int64 {
   706  	return int64(int8(i))
   707  }
   708  func float32ToAuxInt(f float32) int64 {
   709  	return int64(math.Float64bits(float64(f)))
   710  }
   711  func float64ToAuxInt(f float64) int64 {
   712  	return int64(math.Float64bits(f))
   713  }
   714  func valAndOffToAuxInt(v ValAndOff) int64 {
   715  	return int64(v)
   716  }
   717  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   718  	return int64(v)
   719  }
   720  func int128ToAuxInt(x int128) int64 {
   721  	if x != 0 {
   722  		panic("nonzero int128 not allowed")
   723  	}
   724  	return 0
   725  }
   726  func flagConstantToAuxInt(x flagConstant) int64 {
   727  	return int64(x)
   728  }
   729  
   730  func opToAuxInt(o Op) int64 {
   731  	return int64(o)
   732  }
   733  
   734  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   735  type Aux interface {
   736  	CanBeAnSSAAux()
   737  }
   738  
   739  // for now only used to mark moves that need to avoid clobbering flags
   740  type auxMark bool
   741  
   742  func (auxMark) CanBeAnSSAAux() {}
   743  
   744  var AuxMark auxMark
   745  
   746  // stringAux wraps string values for use in Aux.
   747  type stringAux string
   748  
   749  func (stringAux) CanBeAnSSAAux() {}
   750  
   751  func auxToString(i Aux) string {
   752  	return string(i.(stringAux))
   753  }
   754  func auxToSym(i Aux) Sym {
   755  	// TODO: kind of a hack - allows nil interface through
   756  	s, _ := i.(Sym)
   757  	return s
   758  }
   759  func auxToType(i Aux) *types.Type {
   760  	return i.(*types.Type)
   761  }
   762  func auxToCall(i Aux) *AuxCall {
   763  	return i.(*AuxCall)
   764  }
   765  func auxToS390xCCMask(i Aux) s390x.CCMask {
   766  	return i.(s390x.CCMask)
   767  }
   768  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   769  	return i.(s390x.RotateParams)
   770  }
   771  
   772  func StringToAux(s string) Aux {
   773  	return stringAux(s)
   774  }
   775  func symToAux(s Sym) Aux {
   776  	return s
   777  }
   778  func callToAux(s *AuxCall) Aux {
   779  	return s
   780  }
   781  func typeToAux(t *types.Type) Aux {
   782  	return t
   783  }
   784  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   785  	return c
   786  }
   787  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   788  	return r
   789  }
   790  
   791  // uaddOvf reports whether unsigned a+b would overflow.
   792  func uaddOvf(a, b int64) bool {
   793  	return uint64(a)+uint64(b) < uint64(a)
   794  }
   795  
   796  // loadLSymOffset simulates reading a word at an offset into a
   797  // read-only symbol's runtime memory. If it would read a pointer to
   798  // another symbol, that symbol is returned. Otherwise, it returns nil.
   799  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   800  	if lsym.Type != objabi.SRODATA {
   801  		return nil
   802  	}
   803  
   804  	for _, r := range lsym.R {
   805  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   806  			return r.Sym
   807  		}
   808  	}
   809  
   810  	return nil
   811  }
   812  
   813  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   814  	v.Op = OpStaticLECall
   815  	auxcall := v.Aux.(*AuxCall)
   816  	auxcall.Fn = sym
   817  	// Remove first arg
   818  	v.Args[0].Uses--
   819  	copy(v.Args[0:], v.Args[1:])
   820  	v.Args[len(v.Args)-1] = nil // aid GC
   821  	v.Args = v.Args[:len(v.Args)-1]
   822  	if f := v.Block.Func; f.pass.debug > 0 {
   823  		f.Warnl(v.Pos, "de-virtualizing call")
   824  	}
   825  	return v
   826  }
   827  
   828  // isSamePtr reports whether p1 and p2 point to the same address.
   829  func isSamePtr(p1, p2 *Value) bool {
   830  	if p1 == p2 {
   831  		return true
   832  	}
   833  	if p1.Op != p2.Op {
   834  		return false
   835  	}
   836  	switch p1.Op {
   837  	case OpOffPtr:
   838  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   839  	case OpAddr, OpLocalAddr:
   840  		return p1.Aux == p2.Aux
   841  	case OpAddPtr:
   842  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   843  	}
   844  	return false
   845  }
   846  
   847  func isStackPtr(v *Value) bool {
   848  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   849  		v = v.Args[0]
   850  	}
   851  	return v.Op == OpSP || v.Op == OpLocalAddr
   852  }
   853  
   854  // disjoint reports whether the memory region specified by [p1:p1+n1)
   855  // does not overlap with [p2:p2+n2).
   856  // A return value of false does not imply the regions overlap.
   857  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   858  	if n1 == 0 || n2 == 0 {
   859  		return true
   860  	}
   861  	if p1 == p2 {
   862  		return false
   863  	}
   864  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   865  		base, offset = ptr, 0
   866  		for base.Op == OpOffPtr {
   867  			offset += base.AuxInt
   868  			base = base.Args[0]
   869  		}
   870  		if opcodeTable[base.Op].nilCheck {
   871  			base = base.Args[0]
   872  		}
   873  		return base, offset
   874  	}
   875  	p1, off1 := baseAndOffset(p1)
   876  	p2, off2 := baseAndOffset(p2)
   877  	if isSamePtr(p1, p2) {
   878  		return !overlap(off1, n1, off2, n2)
   879  	}
   880  	// p1 and p2 are not the same, so if they are both OpAddrs then
   881  	// they point to different variables.
   882  	// If one pointer is on the stack and the other is an argument
   883  	// then they can't overlap.
   884  	switch p1.Op {
   885  	case OpAddr, OpLocalAddr:
   886  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   887  			return true
   888  		}
   889  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   890  	case OpArg, OpArgIntReg:
   891  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   892  			return true
   893  		}
   894  	case OpSP:
   895  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   896  	}
   897  	return false
   898  }
   899  
   900  // moveSize returns the number of bytes an aligned MOV instruction moves.
   901  func moveSize(align int64, c *Config) int64 {
   902  	switch {
   903  	case align%8 == 0 && c.PtrSize == 8:
   904  		return 8
   905  	case align%4 == 0:
   906  		return 4
   907  	case align%2 == 0:
   908  		return 2
   909  	}
   910  	return 1
   911  }
   912  
   913  // mergePoint finds a block among a's blocks which dominates b and is itself
   914  // dominated by all of a's blocks. Returns nil if it can't find one.
   915  // Might return nil even if one does exist.
   916  func mergePoint(b *Block, a ...*Value) *Block {
   917  	// Walk backward from b looking for one of the a's blocks.
   918  
   919  	// Max distance
   920  	d := 100
   921  
   922  	for d > 0 {
   923  		for _, x := range a {
   924  			if b == x.Block {
   925  				goto found
   926  			}
   927  		}
   928  		if len(b.Preds) > 1 {
   929  			// Don't know which way to go back. Abort.
   930  			return nil
   931  		}
   932  		b = b.Preds[0].b
   933  		d--
   934  	}
   935  	return nil // too far away
   936  found:
   937  	// At this point, r is the first value in a that we find by walking backwards.
   938  	// if we return anything, r will be it.
   939  	r := b
   940  
   941  	// Keep going, counting the other a's that we find. They must all dominate r.
   942  	na := 0
   943  	for d > 0 {
   944  		for _, x := range a {
   945  			if b == x.Block {
   946  				na++
   947  			}
   948  		}
   949  		if na == len(a) {
   950  			// Found all of a in a backwards walk. We can return r.
   951  			return r
   952  		}
   953  		if len(b.Preds) > 1 {
   954  			return nil
   955  		}
   956  		b = b.Preds[0].b
   957  		d--
   958  
   959  	}
   960  	return nil // too far away
   961  }
   962  
   963  // clobber invalidates values. Returns true.
   964  // clobber is used by rewrite rules to:
   965  //
   966  //	A) make sure the values are really dead and never used again.
   967  //	B) decrement use counts of the values' args.
   968  func clobber(vv ...*Value) bool {
   969  	for _, v := range vv {
   970  		v.reset(OpInvalid)
   971  		// Note: leave v.Block intact.  The Block field is used after clobber.
   972  	}
   973  	return true
   974  }
   975  
   976  // clobberIfDead resets v when use count is 1. Returns true.
   977  // clobberIfDead is used by rewrite rules to decrement
   978  // use counts of v's args when v is dead and never used.
   979  func clobberIfDead(v *Value) bool {
   980  	if v.Uses == 1 {
   981  		v.reset(OpInvalid)
   982  	}
   983  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   984  	return true
   985  }
   986  
   987  // noteRule is an easy way to track if a rule is matched when writing
   988  // new ones.  Make the rule of interest also conditional on
   989  //
   990  //	noteRule("note to self: rule of interest matched")
   991  //
   992  // and that message will print when the rule matches.
   993  func noteRule(s string) bool {
   994  	fmt.Println(s)
   995  	return true
   996  }
   997  
   998  // countRule increments Func.ruleMatches[key].
   999  // If Func.ruleMatches is non-nil at the end
  1000  // of compilation, it will be printed to stdout.
  1001  // This is intended to make it easier to find which functions
  1002  // which contain lots of rules matches when developing new rules.
  1003  func countRule(v *Value, key string) bool {
  1004  	f := v.Block.Func
  1005  	if f.ruleMatches == nil {
  1006  		f.ruleMatches = make(map[string]int)
  1007  	}
  1008  	f.ruleMatches[key]++
  1009  	return true
  1010  }
  1011  
  1012  // warnRule generates compiler debug output with string s when
  1013  // v is not in autogenerated code, cond is true and the rule has fired.
  1014  func warnRule(cond bool, v *Value, s string) bool {
  1015  	if pos := v.Pos; pos.Line() > 1 && cond {
  1016  		v.Block.Func.Warnl(pos, s)
  1017  	}
  1018  	return true
  1019  }
  1020  
  1021  // for a pseudo-op like (LessThan x), extract x.
  1022  func flagArg(v *Value) *Value {
  1023  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1024  		return nil
  1025  	}
  1026  	return v.Args[0]
  1027  }
  1028  
  1029  // arm64Negate finds the complement to an ARM64 condition code,
  1030  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1031  //
  1032  // For floating point, it's more subtle because NaN is unordered. We do
  1033  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1034  func arm64Negate(op Op) Op {
  1035  	switch op {
  1036  	case OpARM64LessThan:
  1037  		return OpARM64GreaterEqual
  1038  	case OpARM64LessThanU:
  1039  		return OpARM64GreaterEqualU
  1040  	case OpARM64GreaterThan:
  1041  		return OpARM64LessEqual
  1042  	case OpARM64GreaterThanU:
  1043  		return OpARM64LessEqualU
  1044  	case OpARM64LessEqual:
  1045  		return OpARM64GreaterThan
  1046  	case OpARM64LessEqualU:
  1047  		return OpARM64GreaterThanU
  1048  	case OpARM64GreaterEqual:
  1049  		return OpARM64LessThan
  1050  	case OpARM64GreaterEqualU:
  1051  		return OpARM64LessThanU
  1052  	case OpARM64Equal:
  1053  		return OpARM64NotEqual
  1054  	case OpARM64NotEqual:
  1055  		return OpARM64Equal
  1056  	case OpARM64LessThanF:
  1057  		return OpARM64NotLessThanF
  1058  	case OpARM64NotLessThanF:
  1059  		return OpARM64LessThanF
  1060  	case OpARM64LessEqualF:
  1061  		return OpARM64NotLessEqualF
  1062  	case OpARM64NotLessEqualF:
  1063  		return OpARM64LessEqualF
  1064  	case OpARM64GreaterThanF:
  1065  		return OpARM64NotGreaterThanF
  1066  	case OpARM64NotGreaterThanF:
  1067  		return OpARM64GreaterThanF
  1068  	case OpARM64GreaterEqualF:
  1069  		return OpARM64NotGreaterEqualF
  1070  	case OpARM64NotGreaterEqualF:
  1071  		return OpARM64GreaterEqualF
  1072  	default:
  1073  		panic("unreachable")
  1074  	}
  1075  }
  1076  
  1077  // arm64Invert evaluates (InvertFlags op), which
  1078  // is the same as altering the condition codes such
  1079  // that the same result would be produced if the arguments
  1080  // to the flag-generating instruction were reversed, e.g.
  1081  // (InvertFlags (CMP x y)) -> (CMP y x)
  1082  func arm64Invert(op Op) Op {
  1083  	switch op {
  1084  	case OpARM64LessThan:
  1085  		return OpARM64GreaterThan
  1086  	case OpARM64LessThanU:
  1087  		return OpARM64GreaterThanU
  1088  	case OpARM64GreaterThan:
  1089  		return OpARM64LessThan
  1090  	case OpARM64GreaterThanU:
  1091  		return OpARM64LessThanU
  1092  	case OpARM64LessEqual:
  1093  		return OpARM64GreaterEqual
  1094  	case OpARM64LessEqualU:
  1095  		return OpARM64GreaterEqualU
  1096  	case OpARM64GreaterEqual:
  1097  		return OpARM64LessEqual
  1098  	case OpARM64GreaterEqualU:
  1099  		return OpARM64LessEqualU
  1100  	case OpARM64Equal, OpARM64NotEqual:
  1101  		return op
  1102  	case OpARM64LessThanF:
  1103  		return OpARM64GreaterThanF
  1104  	case OpARM64GreaterThanF:
  1105  		return OpARM64LessThanF
  1106  	case OpARM64LessEqualF:
  1107  		return OpARM64GreaterEqualF
  1108  	case OpARM64GreaterEqualF:
  1109  		return OpARM64LessEqualF
  1110  	case OpARM64NotLessThanF:
  1111  		return OpARM64NotGreaterThanF
  1112  	case OpARM64NotGreaterThanF:
  1113  		return OpARM64NotLessThanF
  1114  	case OpARM64NotLessEqualF:
  1115  		return OpARM64NotGreaterEqualF
  1116  	case OpARM64NotGreaterEqualF:
  1117  		return OpARM64NotLessEqualF
  1118  	default:
  1119  		panic("unreachable")
  1120  	}
  1121  }
  1122  
  1123  // evaluate an ARM64 op against a flags value
  1124  // that is potentially constant; return 1 for true,
  1125  // -1 for false, and 0 for not constant.
  1126  func ccARM64Eval(op Op, flags *Value) int {
  1127  	fop := flags.Op
  1128  	if fop == OpARM64InvertFlags {
  1129  		return -ccARM64Eval(op, flags.Args[0])
  1130  	}
  1131  	if fop != OpARM64FlagConstant {
  1132  		return 0
  1133  	}
  1134  	fc := flagConstant(flags.AuxInt)
  1135  	b2i := func(b bool) int {
  1136  		if b {
  1137  			return 1
  1138  		}
  1139  		return -1
  1140  	}
  1141  	switch op {
  1142  	case OpARM64Equal:
  1143  		return b2i(fc.eq())
  1144  	case OpARM64NotEqual:
  1145  		return b2i(fc.ne())
  1146  	case OpARM64LessThan:
  1147  		return b2i(fc.lt())
  1148  	case OpARM64LessThanU:
  1149  		return b2i(fc.ult())
  1150  	case OpARM64GreaterThan:
  1151  		return b2i(fc.gt())
  1152  	case OpARM64GreaterThanU:
  1153  		return b2i(fc.ugt())
  1154  	case OpARM64LessEqual:
  1155  		return b2i(fc.le())
  1156  	case OpARM64LessEqualU:
  1157  		return b2i(fc.ule())
  1158  	case OpARM64GreaterEqual:
  1159  		return b2i(fc.ge())
  1160  	case OpARM64GreaterEqualU:
  1161  		return b2i(fc.uge())
  1162  	}
  1163  	return 0
  1164  }
  1165  
  1166  // logRule logs the use of the rule s. This will only be enabled if
  1167  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1168  func logRule(s string) {
  1169  	if ruleFile == nil {
  1170  		// Open a log file to write log to. We open in append
  1171  		// mode because all.bash runs the compiler lots of times,
  1172  		// and we want the concatenation of all of those logs.
  1173  		// This means, of course, that users need to rm the old log
  1174  		// to get fresh data.
  1175  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1176  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1177  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1178  		if err != nil {
  1179  			panic(err)
  1180  		}
  1181  		ruleFile = w
  1182  	}
  1183  	_, err := fmt.Fprintln(ruleFile, s)
  1184  	if err != nil {
  1185  		panic(err)
  1186  	}
  1187  }
  1188  
  1189  var ruleFile io.Writer
  1190  
  1191  func min(x, y int64) int64 {
  1192  	if x < y {
  1193  		return x
  1194  	}
  1195  	return y
  1196  }
  1197  func max(x, y int64) int64 {
  1198  	if x > y {
  1199  		return x
  1200  	}
  1201  	return y
  1202  }
  1203  
  1204  func isConstZero(v *Value) bool {
  1205  	switch v.Op {
  1206  	case OpConstNil:
  1207  		return true
  1208  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1209  		return v.AuxInt == 0
  1210  	}
  1211  	return false
  1212  }
  1213  
  1214  // reciprocalExact64 reports whether 1/c is exactly representable.
  1215  func reciprocalExact64(c float64) bool {
  1216  	b := math.Float64bits(c)
  1217  	man := b & (1<<52 - 1)
  1218  	if man != 0 {
  1219  		return false // not a power of 2, denormal, or NaN
  1220  	}
  1221  	exp := b >> 52 & (1<<11 - 1)
  1222  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1223  	// changes the exponent to 0x7fe-exp.
  1224  	switch exp {
  1225  	case 0:
  1226  		return false // ±0
  1227  	case 0x7ff:
  1228  		return false // ±inf
  1229  	case 0x7fe:
  1230  		return false // exponent is not representable
  1231  	default:
  1232  		return true
  1233  	}
  1234  }
  1235  
  1236  // reciprocalExact32 reports whether 1/c is exactly representable.
  1237  func reciprocalExact32(c float32) bool {
  1238  	b := math.Float32bits(c)
  1239  	man := b & (1<<23 - 1)
  1240  	if man != 0 {
  1241  		return false // not a power of 2, denormal, or NaN
  1242  	}
  1243  	exp := b >> 23 & (1<<8 - 1)
  1244  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1245  	// changes the exponent to 0xfe-exp.
  1246  	switch exp {
  1247  	case 0:
  1248  		return false // ±0
  1249  	case 0xff:
  1250  		return false // ±inf
  1251  	case 0xfe:
  1252  		return false // exponent is not representable
  1253  	default:
  1254  		return true
  1255  	}
  1256  }
  1257  
  1258  // check if an immediate can be directly encoded into an ARM's instruction.
  1259  func isARMImmRot(v uint32) bool {
  1260  	for i := 0; i < 16; i++ {
  1261  		if v&^0xff == 0 {
  1262  			return true
  1263  		}
  1264  		v = v<<2 | v>>30
  1265  	}
  1266  
  1267  	return false
  1268  }
  1269  
  1270  // overlap reports whether the ranges given by the given offset and
  1271  // size pairs overlap.
  1272  func overlap(offset1, size1, offset2, size2 int64) bool {
  1273  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1274  		return true
  1275  	}
  1276  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1277  		return true
  1278  	}
  1279  	return false
  1280  }
  1281  
  1282  func areAdjacentOffsets(off1, off2, size int64) bool {
  1283  	return off1+size == off2 || off1 == off2+size
  1284  }
  1285  
  1286  // check if value zeroes out upper 32-bit of 64-bit register.
  1287  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1288  // because it catches same amount of cases as 4.
  1289  func zeroUpper32Bits(x *Value, depth int) bool {
  1290  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1291  		// If the value is signed, it might get re-sign-extended
  1292  		// during spill and restore. See issue 68227.
  1293  		return false
  1294  	}
  1295  	switch x.Op {
  1296  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1297  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1298  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1299  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1300  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1301  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1302  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1303  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1304  		OpAMD64SHLL, OpAMD64SHLLconst:
  1305  		return true
  1306  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1307  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1308  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1309  		return true
  1310  	case OpArg: // note: but not ArgIntReg
  1311  		// amd64 always loads args from the stack unsigned.
  1312  		// most other architectures load them sign/zero extended based on the type.
  1313  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1314  	case OpPhi, OpSelect0, OpSelect1:
  1315  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1316  		// just limit recursion depth.
  1317  		if depth <= 0 {
  1318  			return false
  1319  		}
  1320  		for i := range x.Args {
  1321  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1322  				return false
  1323  			}
  1324  		}
  1325  		return true
  1326  
  1327  	}
  1328  	return false
  1329  }
  1330  
  1331  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1332  func zeroUpper48Bits(x *Value, depth int) bool {
  1333  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1334  		return false
  1335  	}
  1336  	switch x.Op {
  1337  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1338  		return true
  1339  	case OpArg: // note: but not ArgIntReg
  1340  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1341  	case OpPhi, OpSelect0, OpSelect1:
  1342  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1343  		// just limit recursion depth.
  1344  		if depth <= 0 {
  1345  			return false
  1346  		}
  1347  		for i := range x.Args {
  1348  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1349  				return false
  1350  			}
  1351  		}
  1352  		return true
  1353  
  1354  	}
  1355  	return false
  1356  }
  1357  
  1358  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1359  func zeroUpper56Bits(x *Value, depth int) bool {
  1360  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1361  		return false
  1362  	}
  1363  	switch x.Op {
  1364  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1365  		return true
  1366  	case OpArg: // note: but not ArgIntReg
  1367  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1368  	case OpPhi, OpSelect0, OpSelect1:
  1369  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1370  		// just limit recursion depth.
  1371  		if depth <= 0 {
  1372  			return false
  1373  		}
  1374  		for i := range x.Args {
  1375  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1376  				return false
  1377  			}
  1378  		}
  1379  		return true
  1380  
  1381  	}
  1382  	return false
  1383  }
  1384  
  1385  func isInlinableMemclr(c *Config, sz int64) bool {
  1386  	if sz < 0 {
  1387  		return false
  1388  	}
  1389  	// TODO: expand this check to allow other architectures
  1390  	// see CL 454255 and issue 56997
  1391  	switch c.arch {
  1392  	case "amd64", "arm64":
  1393  		return true
  1394  	case "ppc64le", "ppc64":
  1395  		return sz < 512
  1396  	}
  1397  	return false
  1398  }
  1399  
  1400  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1401  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1402  // safe, either because Move will do all of its loads before any of its stores, or
  1403  // because the arguments are known to be disjoint.
  1404  // This is used as a check for replacing memmove with Move ops.
  1405  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1406  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1407  	// Move ops may or may not be faster for large sizes depending on how the platform
  1408  	// lowers them, so we only perform this optimization on platforms that we know to
  1409  	// have fast Move ops.
  1410  	switch c.arch {
  1411  	case "amd64":
  1412  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1413  	case "386", "arm64":
  1414  		return sz <= 8
  1415  	case "s390x", "ppc64", "ppc64le":
  1416  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1417  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1418  		return sz <= 4
  1419  	}
  1420  	return false
  1421  }
  1422  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1423  	return isInlinableMemmove(dst, src, sz, c)
  1424  }
  1425  
  1426  // logLargeCopy logs the occurrence of a large copy.
  1427  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1428  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1429  func logLargeCopy(v *Value, s int64) bool {
  1430  	if s < 128 {
  1431  		return true
  1432  	}
  1433  	if logopt.Enabled() {
  1434  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1435  	}
  1436  	return true
  1437  }
  1438  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1439  	if s < 128 {
  1440  		return
  1441  	}
  1442  	if logopt.Enabled() {
  1443  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1444  	}
  1445  }
  1446  
  1447  // hasSmallRotate reports whether the architecture has rotate instructions
  1448  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1449  func hasSmallRotate(c *Config) bool {
  1450  	switch c.arch {
  1451  	case "amd64", "386":
  1452  		return true
  1453  	default:
  1454  		return false
  1455  	}
  1456  }
  1457  
  1458  func supportsPPC64PCRel() bool {
  1459  	// PCRel is currently supported for >= power10, linux only
  1460  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1461  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1462  }
  1463  
  1464  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1465  	if sh < 0 || sh >= sz {
  1466  		panic("PPC64 shift arg sh out of range")
  1467  	}
  1468  	if mb < 0 || mb >= sz {
  1469  		panic("PPC64 shift arg mb out of range")
  1470  	}
  1471  	if me < 0 || me >= sz {
  1472  		panic("PPC64 shift arg me out of range")
  1473  	}
  1474  	return int32(sh<<16 | mb<<8 | me)
  1475  }
  1476  
  1477  func GetPPC64Shiftsh(auxint int64) int64 {
  1478  	return int64(int8(auxint >> 16))
  1479  }
  1480  
  1481  func GetPPC64Shiftmb(auxint int64) int64 {
  1482  	return int64(int8(auxint >> 8))
  1483  }
  1484  
  1485  func GetPPC64Shiftme(auxint int64) int64 {
  1486  	return int64(int8(auxint))
  1487  }
  1488  
  1489  // Test if this value can encoded as a mask for a rlwinm like
  1490  // operation.  Masks can also extend from the msb and wrap to
  1491  // the lsb too.  That is, the valid masks are 32 bit strings
  1492  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1493  func isPPC64WordRotateMask(v64 int64) bool {
  1494  	// Isolate rightmost 1 (if none 0) and add.
  1495  	v := uint32(v64)
  1496  	vp := (v & -v) + v
  1497  	// Likewise, for the wrapping case.
  1498  	vn := ^v
  1499  	vpn := (vn & -vn) + vn
  1500  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1501  }
  1502  
  1503  // Compress mask and shift into single value of the form
  1504  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1505  // be used to regenerate the input mask.
  1506  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1507  	var mb, me, mbn, men int
  1508  
  1509  	// Determine boundaries and then decode them
  1510  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1511  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1512  	} else if nbits == 32 {
  1513  		mb = bits.LeadingZeros32(uint32(mask))
  1514  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1515  		mbn = bits.LeadingZeros32(^uint32(mask))
  1516  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1517  	} else {
  1518  		mb = bits.LeadingZeros64(uint64(mask))
  1519  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1520  		mbn = bits.LeadingZeros64(^uint64(mask))
  1521  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1522  	}
  1523  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1524  	if mb == 0 && me == int(nbits) {
  1525  		// swap the inverted values
  1526  		mb, me = men, mbn
  1527  	}
  1528  
  1529  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1530  }
  1531  
  1532  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1533  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1534  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1535  // operations can be combined. This functions assumes the two opcodes can
  1536  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1537  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1538  	mb := s
  1539  	r := 64 - s
  1540  	// A larger mb is a smaller mask.
  1541  	if (encoded>>8)&0xFF < mb {
  1542  		encoded = (encoded &^ 0xFF00) | mb<<8
  1543  	}
  1544  	// The rotate is expected to be 0.
  1545  	if (encoded & 0xFF0000) != 0 {
  1546  		panic("non-zero rotate")
  1547  	}
  1548  	return encoded | r<<16
  1549  }
  1550  
  1551  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1552  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1553  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1554  	auxint := uint64(sauxint)
  1555  	rotate = int64((auxint >> 16) & 0xFF)
  1556  	mb = int64((auxint >> 8) & 0xFF)
  1557  	me = int64((auxint >> 0) & 0xFF)
  1558  	nbits := int64((auxint >> 24) & 0xFF)
  1559  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1560  	if mb > me {
  1561  		mask = ^mask
  1562  	}
  1563  	if nbits == 32 {
  1564  		mask = uint64(uint32(mask))
  1565  	}
  1566  
  1567  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1568  	// is inclusive.
  1569  	me = (me - 1) & (nbits - 1)
  1570  	return
  1571  }
  1572  
  1573  // This verifies that the mask is a set of
  1574  // consecutive bits including the least
  1575  // significant bit.
  1576  func isPPC64ValidShiftMask(v int64) bool {
  1577  	if (v != 0) && ((v+1)&v) == 0 {
  1578  		return true
  1579  	}
  1580  	return false
  1581  }
  1582  
  1583  func getPPC64ShiftMaskLength(v int64) int64 {
  1584  	return int64(bits.Len64(uint64(v)))
  1585  }
  1586  
  1587  // Decompose a shift right into an equivalent rotate/mask,
  1588  // and return mask & m.
  1589  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1590  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1591  	return m & int64(smask)
  1592  }
  1593  
  1594  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1595  func mergePPC64AndSrwi(m, s int64) int64 {
  1596  	mask := mergePPC64RShiftMask(m, s, 32)
  1597  	if !isPPC64WordRotateMask(mask) {
  1598  		return 0
  1599  	}
  1600  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1601  }
  1602  
  1603  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1604  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1605  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1606  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1607  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1608  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1609  
  1610  	// Rewrite mask to apply after the final left shift.
  1611  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1612  
  1613  	r_1 := 32 - srw
  1614  	r_2 := GetPPC64Shiftsh(sld)
  1615  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1616  
  1617  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1618  		return 0
  1619  	}
  1620  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1621  }
  1622  
  1623  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1624  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1625  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1626  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1627  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1628  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1629  
  1630  	// Rewrite mask to apply after the final left shift.
  1631  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1632  
  1633  	r_1 := 64 - srd
  1634  	r_2 := GetPPC64Shiftsh(sld)
  1635  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1636  
  1637  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1638  		return 0
  1639  	}
  1640  	// This combine only works when selecting and shifting the lower 32 bits.
  1641  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1642  	if v1&mask_3 != 0 {
  1643  		return 0
  1644  	}
  1645  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1646  }
  1647  
  1648  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1649  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1650  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1651  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1652  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1653  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1654  
  1655  	// combine the masks, and adjust for the final left shift.
  1656  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1657  	r_2 := GetPPC64Shiftsh(int64(sld))
  1658  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1659  
  1660  	// Verify the result is still a valid bitmask of <= 32 bits.
  1661  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1662  		return 0
  1663  	}
  1664  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1665  }
  1666  
  1667  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1668  // or 0 if they cannot be merged.
  1669  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1670  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1671  	mask_out := (mask_rlw & uint64(mask))
  1672  
  1673  	// Verify the result is still a valid bitmask of <= 32 bits.
  1674  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1675  		return 0
  1676  	}
  1677  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1678  }
  1679  
  1680  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1681  // result. Return rlw if it does, 0 otherwise.
  1682  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1683  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1684  	if mb > me {
  1685  		return 0
  1686  	}
  1687  	return rlw
  1688  }
  1689  
  1690  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1691  // or 0 if they cannot be merged.
  1692  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1693  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1694  
  1695  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1696  	r_mask := bits.RotateLeft32(mask, int(r))
  1697  
  1698  	mask_out := (mask_rlw & uint64(r_mask))
  1699  
  1700  	// Verify the result is still a valid bitmask of <= 32 bits.
  1701  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1702  		return 0
  1703  	}
  1704  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1705  }
  1706  
  1707  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1708  // or 0 if they cannot be merged.
  1709  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1710  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1711  	if mb > me || mb < sldi {
  1712  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1713  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1714  		return 0
  1715  	}
  1716  	// combine the masks, and adjust for the final left shift.
  1717  	mask_3 := mask_1 << sldi
  1718  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1719  
  1720  	// Verify the result is still a valid bitmask of <= 32 bits.
  1721  	if uint64(uint32(mask_3)) != mask_3 {
  1722  		return 0
  1723  	}
  1724  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1725  }
  1726  
  1727  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1728  // or return 0 if they cannot be combined.
  1729  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1730  	if sld > srw || srw >= 32 {
  1731  		return 0
  1732  	}
  1733  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1734  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1735  	mask := (mask_r & mask_l) << uint(sld)
  1736  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1737  }
  1738  
  1739  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1740  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1741  // of op.
  1742  //
  1743  // E.g consider the case:
  1744  // a = (ADD x y)
  1745  // b = (CMPconst [0] a)
  1746  // c = (OR a z)
  1747  //
  1748  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1749  // would produce:
  1750  // a  = (ADD x y)
  1751  // a' = (ADDCC x y)
  1752  // a” = (Select0 a')
  1753  // b  = (CMPconst [0] a”)
  1754  // c  = (OR a z)
  1755  //
  1756  // which makes it impossible to rewrite the second user. Instead the result
  1757  // of this conversion is:
  1758  // a' = (ADDCC x y)
  1759  // a  = (Select0 a')
  1760  // b  = (CMPconst [0] a)
  1761  // c  = (OR a z)
  1762  //
  1763  // Which makes it trivial to rewrite b using a lowering rule.
  1764  func convertPPC64OpToOpCC(op *Value) *Value {
  1765  	ccOpMap := map[Op]Op{
  1766  		OpPPC64ADD:      OpPPC64ADDCC,
  1767  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1768  		OpPPC64AND:      OpPPC64ANDCC,
  1769  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1770  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1771  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1772  		OpPPC64OR:       OpPPC64ORCC,
  1773  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1774  		OpPPC64SUB:      OpPPC64SUBCC,
  1775  		OpPPC64NEG:      OpPPC64NEGCC,
  1776  		OpPPC64NOR:      OpPPC64NORCC,
  1777  		OpPPC64XOR:      OpPPC64XORCC,
  1778  	}
  1779  	b := op.Block
  1780  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1781  	opCC.AddArgs(op.Args...)
  1782  	op.reset(OpSelect0)
  1783  	op.AddArgs(opCC)
  1784  	return op
  1785  }
  1786  
  1787  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1788  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1789  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1790  	if r != 0 || mask&0xFFFF != mask {
  1791  		return 0
  1792  	}
  1793  	return int64(mask)
  1794  }
  1795  
  1796  // Convenience function to rotate a 32 bit constant value by another constant.
  1797  func rotateLeft32(v, rotate int64) int64 {
  1798  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1799  }
  1800  
  1801  func rotateRight64(v, rotate int64) int64 {
  1802  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1803  }
  1804  
  1805  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1806  func armBFAuxInt(lsb, width int64) arm64BitField {
  1807  	if lsb < 0 || lsb > 63 {
  1808  		panic("ARM(64) bit field lsb constant out of range")
  1809  	}
  1810  	if width < 1 || lsb+width > 64 {
  1811  		panic("ARM(64) bit field width constant out of range")
  1812  	}
  1813  	return arm64BitField(width | lsb<<8)
  1814  }
  1815  
  1816  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1817  func (bfc arm64BitField) getARM64BFlsb() int64 {
  1818  	return int64(uint64(bfc) >> 8)
  1819  }
  1820  
  1821  // returns the width part of the auxInt field of arm64 bitfield ops.
  1822  func (bfc arm64BitField) getARM64BFwidth() int64 {
  1823  	return int64(bfc) & 0xff
  1824  }
  1825  
  1826  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1827  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1828  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1829  	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1830  }
  1831  
  1832  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1833  func arm64BFWidth(mask, rshift int64) int64 {
  1834  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1835  	if shiftedMask == 0 {
  1836  		panic("ARM64 BF mask is zero")
  1837  	}
  1838  	return nto(shiftedMask)
  1839  }
  1840  
  1841  // sizeof returns the size of t in bytes.
  1842  // It will panic if t is not a *types.Type.
  1843  func sizeof(t interface{}) int64 {
  1844  	return t.(*types.Type).Size()
  1845  }
  1846  
  1847  // registerizable reports whether t is a primitive type that fits in
  1848  // a register. It assumes float64 values will always fit into registers
  1849  // even if that isn't strictly true.
  1850  func registerizable(b *Block, typ *types.Type) bool {
  1851  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1852  		return true
  1853  	}
  1854  	if typ.IsInteger() {
  1855  		return typ.Size() <= b.Func.Config.RegSize
  1856  	}
  1857  	return false
  1858  }
  1859  
  1860  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1861  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1862  	f := v.Block.Func
  1863  	if !f.Config.Race {
  1864  		return false
  1865  	}
  1866  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1867  		return false
  1868  	}
  1869  	for _, b := range f.Blocks {
  1870  		for _, v := range b.Values {
  1871  			switch v.Op {
  1872  			case OpStaticCall, OpStaticLECall:
  1873  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1874  				// Allow calls to panic*
  1875  				s := v.Aux.(*AuxCall).Fn.String()
  1876  				switch s {
  1877  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1878  					"runtime.panicdivide", "runtime.panicwrap",
  1879  					"runtime.panicshift":
  1880  					continue
  1881  				}
  1882  				// If we encountered any call, we need to keep racefunc*,
  1883  				// for accurate stacktraces.
  1884  				return false
  1885  			case OpPanicBounds, OpPanicExtend:
  1886  				// Note: these are panic generators that are ok (like the static calls above).
  1887  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1888  				// We must keep the race functions if there are any other call types.
  1889  				return false
  1890  			}
  1891  		}
  1892  	}
  1893  	if isSameCall(sym, "runtime.racefuncenter") {
  1894  		// TODO REGISTER ABI this needs to be cleaned up.
  1895  		// If we're removing racefuncenter, remove its argument as well.
  1896  		if v.Args[0].Op != OpStore {
  1897  			if v.Op == OpStaticLECall {
  1898  				// there is no store, yet.
  1899  				return true
  1900  			}
  1901  			return false
  1902  		}
  1903  		mem := v.Args[0].Args[2]
  1904  		v.Args[0].reset(OpCopy)
  1905  		v.Args[0].AddArg(mem)
  1906  	}
  1907  	return true
  1908  }
  1909  
  1910  // symIsRO reports whether sym is a read-only global.
  1911  func symIsRO(sym interface{}) bool {
  1912  	lsym := sym.(*obj.LSym)
  1913  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1914  }
  1915  
  1916  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1917  func symIsROZero(sym Sym) bool {
  1918  	lsym := sym.(*obj.LSym)
  1919  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1920  		return false
  1921  	}
  1922  	for _, b := range lsym.P {
  1923  		if b != 0 {
  1924  			return false
  1925  		}
  1926  	}
  1927  	return true
  1928  }
  1929  
  1930  // isFixed32 returns true if the int32 at offset off in symbol sym
  1931  // is known and constant.
  1932  func isFixed32(c *Config, sym Sym, off int64) bool {
  1933  	return isFixed(c, sym, off, 4)
  1934  }
  1935  
  1936  // isFixed returns true if the range [off,off+size] of the symbol sym
  1937  // is known and constant.
  1938  func isFixed(c *Config, sym Sym, off, size int64) bool {
  1939  	lsym := sym.(*obj.LSym)
  1940  	if lsym.Extra == nil {
  1941  		return false
  1942  	}
  1943  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1944  		if off == 2*c.PtrSize && size == 4 {
  1945  			return true // type hash field
  1946  		}
  1947  	}
  1948  	return false
  1949  }
  1950  func fixed32(c *Config, sym Sym, off int64) int32 {
  1951  	lsym := sym.(*obj.LSym)
  1952  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1953  		if off == 2*c.PtrSize {
  1954  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  1955  		}
  1956  	}
  1957  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  1958  	return 0
  1959  }
  1960  
  1961  // isFixedSym returns true if the contents of sym at the given offset
  1962  // is known and is the constant address of another symbol.
  1963  func isFixedSym(sym Sym, off int64) bool {
  1964  	lsym := sym.(*obj.LSym)
  1965  	switch {
  1966  	case lsym.Type == objabi.SRODATA:
  1967  		// itabs, dictionaries
  1968  	default:
  1969  		return false
  1970  	}
  1971  	for _, r := range lsym.R {
  1972  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  1973  			return true
  1974  		}
  1975  	}
  1976  	return false
  1977  }
  1978  func fixedSym(f *Func, sym Sym, off int64) Sym {
  1979  	lsym := sym.(*obj.LSym)
  1980  	for _, r := range lsym.R {
  1981  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  1982  			if strings.HasPrefix(r.Sym.Name, "type:") {
  1983  				// In case we're loading a type out of a dictionary, we need to record
  1984  				// that the containing function might put that type in an interface.
  1985  				// That information is currently recorded in relocations in the dictionary,
  1986  				// but if we perform this load at compile time then the dictionary
  1987  				// might be dead.
  1988  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1989  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  1990  				// Same, but if we're using an itab we need to record that the
  1991  				// itab._type might be put in an interface.
  1992  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1993  			}
  1994  			return r.Sym
  1995  		}
  1996  	}
  1997  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  1998  	return nil
  1999  }
  2000  
  2001  // read8 reads one byte from the read-only global sym at offset off.
  2002  func read8(sym interface{}, off int64) uint8 {
  2003  	lsym := sym.(*obj.LSym)
  2004  	if off >= int64(len(lsym.P)) || off < 0 {
  2005  		// Invalid index into the global sym.
  2006  		// This can happen in dead code, so we don't want to panic.
  2007  		// Just return any value, it will eventually get ignored.
  2008  		// See issue 29215.
  2009  		return 0
  2010  	}
  2011  	return lsym.P[off]
  2012  }
  2013  
  2014  // read16 reads two bytes from the read-only global sym at offset off.
  2015  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  2016  	lsym := sym.(*obj.LSym)
  2017  	// lsym.P is written lazily.
  2018  	// Bytes requested after the end of lsym.P are 0.
  2019  	var src []byte
  2020  	if 0 <= off && off < int64(len(lsym.P)) {
  2021  		src = lsym.P[off:]
  2022  	}
  2023  	buf := make([]byte, 2)
  2024  	copy(buf, src)
  2025  	return byteorder.Uint16(buf)
  2026  }
  2027  
  2028  // read32 reads four bytes from the read-only global sym at offset off.
  2029  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  2030  	lsym := sym.(*obj.LSym)
  2031  	var src []byte
  2032  	if 0 <= off && off < int64(len(lsym.P)) {
  2033  		src = lsym.P[off:]
  2034  	}
  2035  	buf := make([]byte, 4)
  2036  	copy(buf, src)
  2037  	return byteorder.Uint32(buf)
  2038  }
  2039  
  2040  // read64 reads eight bytes from the read-only global sym at offset off.
  2041  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  2042  	lsym := sym.(*obj.LSym)
  2043  	var src []byte
  2044  	if 0 <= off && off < int64(len(lsym.P)) {
  2045  		src = lsym.P[off:]
  2046  	}
  2047  	buf := make([]byte, 8)
  2048  	copy(buf, src)
  2049  	return byteorder.Uint64(buf)
  2050  }
  2051  
  2052  // sequentialAddresses reports true if it can prove that x + n == y
  2053  func sequentialAddresses(x, y *Value, n int64) bool {
  2054  	if x == y && n == 0 {
  2055  		return true
  2056  	}
  2057  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2058  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2059  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2060  		return true
  2061  	}
  2062  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2063  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2064  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2065  		return true
  2066  	}
  2067  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2068  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2069  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2070  		return true
  2071  	}
  2072  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2073  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2074  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2075  		return true
  2076  	}
  2077  	return false
  2078  }
  2079  
  2080  // flagConstant represents the result of a compile-time comparison.
  2081  // The sense of these flags does not necessarily represent the hardware's notion
  2082  // of a flags register - these are just a compile-time construct.
  2083  // We happen to match the semantics to those of arm/arm64.
  2084  // Note that these semantics differ from x86: the carry flag has the opposite
  2085  // sense on a subtraction!
  2086  //
  2087  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2088  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2089  //	 (because it does x + ^y + C).
  2090  //
  2091  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2092  type flagConstant uint8
  2093  
  2094  // N reports whether the result of an operation is negative (high bit set).
  2095  func (fc flagConstant) N() bool {
  2096  	return fc&1 != 0
  2097  }
  2098  
  2099  // Z reports whether the result of an operation is 0.
  2100  func (fc flagConstant) Z() bool {
  2101  	return fc&2 != 0
  2102  }
  2103  
  2104  // C reports whether an unsigned add overflowed (carry), or an
  2105  // unsigned subtract did not underflow (borrow).
  2106  func (fc flagConstant) C() bool {
  2107  	return fc&4 != 0
  2108  }
  2109  
  2110  // V reports whether a signed operation overflowed or underflowed.
  2111  func (fc flagConstant) V() bool {
  2112  	return fc&8 != 0
  2113  }
  2114  
  2115  func (fc flagConstant) eq() bool {
  2116  	return fc.Z()
  2117  }
  2118  func (fc flagConstant) ne() bool {
  2119  	return !fc.Z()
  2120  }
  2121  func (fc flagConstant) lt() bool {
  2122  	return fc.N() != fc.V()
  2123  }
  2124  func (fc flagConstant) le() bool {
  2125  	return fc.Z() || fc.lt()
  2126  }
  2127  func (fc flagConstant) gt() bool {
  2128  	return !fc.Z() && fc.ge()
  2129  }
  2130  func (fc flagConstant) ge() bool {
  2131  	return fc.N() == fc.V()
  2132  }
  2133  func (fc flagConstant) ult() bool {
  2134  	return !fc.C()
  2135  }
  2136  func (fc flagConstant) ule() bool {
  2137  	return fc.Z() || fc.ult()
  2138  }
  2139  func (fc flagConstant) ugt() bool {
  2140  	return !fc.Z() && fc.uge()
  2141  }
  2142  func (fc flagConstant) uge() bool {
  2143  	return fc.C()
  2144  }
  2145  
  2146  func (fc flagConstant) ltNoov() bool {
  2147  	return fc.lt() && !fc.V()
  2148  }
  2149  func (fc flagConstant) leNoov() bool {
  2150  	return fc.le() && !fc.V()
  2151  }
  2152  func (fc flagConstant) gtNoov() bool {
  2153  	return fc.gt() && !fc.V()
  2154  }
  2155  func (fc flagConstant) geNoov() bool {
  2156  	return fc.ge() && !fc.V()
  2157  }
  2158  
  2159  func (fc flagConstant) String() string {
  2160  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2161  }
  2162  
  2163  type flagConstantBuilder struct {
  2164  	N bool
  2165  	Z bool
  2166  	C bool
  2167  	V bool
  2168  }
  2169  
  2170  func (fcs flagConstantBuilder) encode() flagConstant {
  2171  	var fc flagConstant
  2172  	if fcs.N {
  2173  		fc |= 1
  2174  	}
  2175  	if fcs.Z {
  2176  		fc |= 2
  2177  	}
  2178  	if fcs.C {
  2179  		fc |= 4
  2180  	}
  2181  	if fcs.V {
  2182  		fc |= 8
  2183  	}
  2184  	return fc
  2185  }
  2186  
  2187  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2188  //  - the results of the C flag are different
  2189  //  - the results of the V flag when y==minint are different
  2190  
  2191  // addFlags64 returns the flags that would be set from computing x+y.
  2192  func addFlags64(x, y int64) flagConstant {
  2193  	var fcb flagConstantBuilder
  2194  	fcb.Z = x+y == 0
  2195  	fcb.N = x+y < 0
  2196  	fcb.C = uint64(x+y) < uint64(x)
  2197  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2198  	return fcb.encode()
  2199  }
  2200  
  2201  // subFlags64 returns the flags that would be set from computing x-y.
  2202  func subFlags64(x, y int64) flagConstant {
  2203  	var fcb flagConstantBuilder
  2204  	fcb.Z = x-y == 0
  2205  	fcb.N = x-y < 0
  2206  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2207  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2208  	return fcb.encode()
  2209  }
  2210  
  2211  // addFlags32 returns the flags that would be set from computing x+y.
  2212  func addFlags32(x, y int32) flagConstant {
  2213  	var fcb flagConstantBuilder
  2214  	fcb.Z = x+y == 0
  2215  	fcb.N = x+y < 0
  2216  	fcb.C = uint32(x+y) < uint32(x)
  2217  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2218  	return fcb.encode()
  2219  }
  2220  
  2221  // subFlags32 returns the flags that would be set from computing x-y.
  2222  func subFlags32(x, y int32) flagConstant {
  2223  	var fcb flagConstantBuilder
  2224  	fcb.Z = x-y == 0
  2225  	fcb.N = x-y < 0
  2226  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2227  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2228  	return fcb.encode()
  2229  }
  2230  
  2231  // logicFlags64 returns flags set to the sign/zeroness of x.
  2232  // C and V are set to false.
  2233  func logicFlags64(x int64) flagConstant {
  2234  	var fcb flagConstantBuilder
  2235  	fcb.Z = x == 0
  2236  	fcb.N = x < 0
  2237  	return fcb.encode()
  2238  }
  2239  
  2240  // logicFlags32 returns flags set to the sign/zeroness of x.
  2241  // C and V are set to false.
  2242  func logicFlags32(x int32) flagConstant {
  2243  	var fcb flagConstantBuilder
  2244  	fcb.Z = x == 0
  2245  	fcb.N = x < 0
  2246  	return fcb.encode()
  2247  }
  2248  
  2249  func makeJumpTableSym(b *Block) *obj.LSym {
  2250  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2251  	// The jump table symbol is accessed only from the function symbol.
  2252  	s.Set(obj.AttrStatic, true)
  2253  	return s
  2254  }
  2255  
  2256  // canRotate reports whether the architecture supports
  2257  // rotates of integer registers with the given number of bits.
  2258  func canRotate(c *Config, bits int64) bool {
  2259  	if bits > c.PtrSize*8 {
  2260  		// Don't rewrite to rotates bigger than the machine word.
  2261  		return false
  2262  	}
  2263  	switch c.arch {
  2264  	case "386", "amd64", "arm64", "riscv64":
  2265  		return true
  2266  	case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
  2267  		return bits >= 32
  2268  	default:
  2269  		return false
  2270  	}
  2271  }
  2272  
  2273  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2274  func isARM64bitcon(x uint64) bool {
  2275  	if x == 1<<64-1 || x == 0 {
  2276  		return false
  2277  	}
  2278  	// determine the period and sign-extend a unit to 64 bits
  2279  	switch {
  2280  	case x != x>>32|x<<32:
  2281  		// period is 64
  2282  		// nothing to do
  2283  	case x != x>>16|x<<48:
  2284  		// period is 32
  2285  		x = uint64(int64(int32(x)))
  2286  	case x != x>>8|x<<56:
  2287  		// period is 16
  2288  		x = uint64(int64(int16(x)))
  2289  	case x != x>>4|x<<60:
  2290  		// period is 8
  2291  		x = uint64(int64(int8(x)))
  2292  	default:
  2293  		// period is 4 or 2, always true
  2294  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2295  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2296  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2297  		// 0101, 1010             -- 01   rotate, repeat
  2298  		return true
  2299  	}
  2300  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2301  }
  2302  
  2303  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2304  func sequenceOfOnes(x uint64) bool {
  2305  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2306  	y += x
  2307  	return (y-1)&y == 0
  2308  }
  2309  
  2310  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2311  func isARM64addcon(v int64) bool {
  2312  	/* uimm12 or uimm24? */
  2313  	if v < 0 {
  2314  		return false
  2315  	}
  2316  	if (v & 0xFFF) == 0 {
  2317  		v >>= 12
  2318  	}
  2319  	return v <= 0xFFF
  2320  }
  2321  
  2322  // setPos sets the position of v to pos, then returns true.
  2323  // Useful for setting the result of a rewrite's position to
  2324  // something other than the default.
  2325  func setPos(v *Value, pos src.XPos) bool {
  2326  	v.Pos = pos
  2327  	return true
  2328  }
  2329  

View as plain text