// Copyright 2022 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/compiler/string-builder-optimizer.h" #include #include "src/base/bits.h" #include "src/base/logging.h" #include "src/base/optional.h" #include "src/base/small-vector.h" #include "src/compiler/access-builder.h" #include "src/compiler/graph-assembler.h" #include "src/compiler/js-graph.h" #include "src/compiler/js-heap-broker.h" #include "src/compiler/js-operator.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" #include "src/compiler/node.h" #include "src/compiler/opcodes.h" #include "src/compiler/operator.h" #include "src/compiler/schedule.h" #include "src/compiler/types.h" #include "src/objects/code.h" #include "src/objects/map-inl.h" #include "src/utils/utils.h" #include "src/zone/zone-containers.h" namespace v8 { namespace internal { namespace compiler { namespace { // Returns true if {node} is a kStringConcat or a kNewConsString. bool IsConcat(Node* node) { return node->opcode() == IrOpcode::kStringConcat || node->opcode() == IrOpcode::kNewConsString; } // Returns true if {node} is considered as a literal string by the string // builder optimizer: // - it's a literal string // - or it's a kStringFromSingleCharCode bool IsLiteralString(Node* node, JSHeapBroker* broker) { switch (node->opcode()) { case IrOpcode::kHeapConstant: { HeapObjectMatcher m(node); return m.HasResolvedValue() && m.Ref(broker).IsString() && m.Ref(broker).AsString().IsContentAccessible(); } case IrOpcode::kStringFromSingleCharCode: return true; default: return false; } } // Returns true if {node} has at least one concatenation or phi in its uses. bool HasConcatOrPhiUse(Node* node) { for (Node* use : node->uses()) { if (IsConcat(use) || use->opcode() == IrOpcode::kPhi) { return true; } } return false; } } // namespace OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte( State a, State b) { DCHECK(a != State::kUnknown && b != State::kUnknown); if (a == State::kOneByte && b == State::kOneByte) { return State::kOneByte; } if (a == State::kTwoByte || b == State::kTwoByte) { return State::kTwoByte; } return State::kCantKnow; } base::Optional> OneOrTwoByteAnalysis::TryGetRange( Node* node) { switch (node->opcode()) { case IrOpcode::kChangeTaggedToFloat64: case IrOpcode::kTruncateFloat64ToWord32: return TryGetRange(node->InputAt(0)); case IrOpcode::kInt32Add: case IrOpcode::kInt32AddWithOverflow: case IrOpcode::kInt64Add: case IrOpcode::kInt64AddWithOverflow: case IrOpcode::kFloat32Add: case IrOpcode::kFloat64Add: { base::Optional> left = TryGetRange(node->InputAt(0)); base::Optional> right = TryGetRange(node->InputAt(1)); if (left.has_value() && right.has_value()) { int32_t high_bound; if (base::bits::SignedAddOverflow32(static_cast(left->second), static_cast(right->second), &high_bound)) { // The range would overflow a 32-bit integer. return base::nullopt; } return std::pair{left->first + right->first, high_bound}; } else { return base::nullopt; } } case IrOpcode::kInt32Sub: case IrOpcode::kInt32SubWithOverflow: case IrOpcode::kInt64Sub: case IrOpcode::kInt64SubWithOverflow: case IrOpcode::kFloat32Sub: case IrOpcode::kFloat64Sub: { base::Optional> left = TryGetRange(node->InputAt(0)); base::Optional> right = TryGetRange(node->InputAt(1)); if (left.has_value() && right.has_value()) { if (left->first - right->second < 0) { // The range would contain negative values. return base::nullopt; } return std::pair{left->first - right->second, left->second - right->first}; } else { return base::nullopt; } } case IrOpcode::kWord32And: case IrOpcode::kWord64And: { // Note that the minimal value for "a & b" is always 0, regardless of the // max for "a" or "b". And the maximal value is the min of "max of a" and // "max of b". base::Optional> left = TryGetRange(node->InputAt(0)); base::Optional> right = TryGetRange(node->InputAt(1)); if (left.has_value() && right.has_value()) { return std::pair{0, std::min(left->second, right->second)}; } else if (left.has_value()) { return std::pair{0, left->second}; } else if (right.has_value()) { return std::pair{0, right->second}; } else { return base::nullopt; } } case IrOpcode::kInt32Mul: case IrOpcode::kInt32MulWithOverflow: case IrOpcode::kInt64Mul: case IrOpcode::kFloat32Mul: case IrOpcode::kFloat64Mul: { base::Optional> left = TryGetRange(node->InputAt(0)); base::Optional> right = TryGetRange(node->InputAt(1)); if (left.has_value() && right.has_value()) { int32_t high_bound; if (base::bits::SignedMulOverflow32(static_cast(left->second), static_cast(right->second), &high_bound)) { // The range would overflow a 32-bit integer. return base::nullopt; } return std::pair{left->first * right->first, left->second * right->second}; } else { return base::nullopt; } } case IrOpcode::kCall: { HeapObjectMatcher m(node->InputAt(0)); if (m.HasResolvedValue() && m.Ref(broker()).IsCode()) { CodeRef code = m.Ref(broker()).AsCode(); if (code.object()->is_builtin()) { Builtin builtin = code.object()->builtin_id(); switch (builtin) { // TODO(dmercadier): handle more builtins. case Builtin::kMathRandom: return std::pair{0, 1}; default: return base::nullopt; } } } return base::nullopt; } #define CONST_CASE(op, matcher) \ case IrOpcode::k##op: { \ matcher m(node); \ if (m.HasResolvedValue()) { \ if (m.ResolvedValue() < 0 || \ m.ResolvedValue() >= std::numeric_limits::min()) { \ return base::nullopt; \ } \ return std::pair{m.ResolvedValue(), m.ResolvedValue()}; \ } else { \ return base::nullopt; \ } \ } CONST_CASE(Float32Constant, Float32Matcher) CONST_CASE(Float64Constant, Float64Matcher) CONST_CASE(Int32Constant, Int32Matcher) CONST_CASE(Int64Constant, Int64Matcher) CONST_CASE(NumberConstant, NumberMatcher) #undef CONST_CASE default: return base::nullopt; } } // Tries to determine whether {node} is a 1-byte or a 2-byte string. This // function assumes that {node} is part of a string builder: if it's a // concatenation and its left hand-side is something else than a literal string, // it returns only whether the right hand-side is 1/2-byte: the String builder // analysis will take care of propagating the state of the left hand-side. OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::OneOrTwoByte(Node* node) { // TODO(v8:13785,dmercadier): once externalization can no longer convert a // 1-byte into a 2-byte string, compute the proper OneOrTwoByte state. return State::kCantKnow; #if 0 if (states_[node->id()] != State::kUnknown) { return states_[node->id()]; } switch (node->opcode()) { case IrOpcode::kHeapConstant: { HeapObjectMatcher m(node); if (m.HasResolvedValue() && m.Ref(broker()).IsString()) { StringRef string = m.Ref(broker()).AsString(); if (string.object()->IsOneByteRepresentation()) { states_[node->id()] = State::kOneByte; return State::kOneByte; } else { DCHECK(string.object()->IsTwoByteRepresentation()); states_[node->id()] = State::kTwoByte; return State::kTwoByte; } } else { states_[node->id()] = State::kCantKnow; return State::kCantKnow; } } case IrOpcode::kStringFromSingleCharCode: { Node* input = node->InputAt(0); switch (input->opcode()) { case IrOpcode::kStringCharCodeAt: { State state = OneOrTwoByte(input->InputAt(0)); states_[node->id()] = state; return state; } default: { base::Optional> range = TryGetRange(input); if (!range.has_value()) { states_[node->id()] = State::kCantKnow; return State::kCantKnow; } else if (range->first >= 0 && range->second < 255) { states_[node->id()] = State::kOneByte; return State::kOneByte; } else { // For values greater than 0xFF, with the current analysis, we have // no way of knowing if the result will be on 1 or 2 bytes. For // instance, `String.fromCharCode(0x120064 & 0xffff)` will // be a 1-byte string, although the analysis will consider that its // range is [0, 0xffff]. states_[node->id()] = State::kCantKnow; return State::kCantKnow; } } } } case IrOpcode::kStringConcat: case IrOpcode::kNewConsString: { Node* lhs = node->InputAt(1); Node* rhs = node->InputAt(2); DCHECK(IsLiteralString(rhs, broker())); State rhs_state = OneOrTwoByte(rhs); // OneOrTwoByte is only called for Nodes that are part of a String // Builder. As a result, a StringConcat/NewConsString is either: // - between 2 string literal if it is the 1st concatenation of the // string builder. // - between the beginning of the string builder and a literal string. // Thus, if {lhs} is not a literal string, we ignore its State: the // analysis should already have been done on its predecessors anyways. State lhs_state = IsLiteralString(lhs, broker()) ? OneOrTwoByte(lhs) : rhs_state; State node_state = ConcatResultIsOneOrTwoByte(rhs_state, lhs_state); states_[node->id()] = node_state; return node_state; } default: states_[node->id()] = State::kCantKnow; return State::kCantKnow; } #endif } bool StringBuilderOptimizer::BlockShouldFinalizeStringBuilders( BasicBlock* block) { DCHECK_LT(block->id().ToInt(), blocks_to_trimmings_map_.size()); return blocks_to_trimmings_map_[block->id().ToInt()].has_value(); } ZoneVector StringBuilderOptimizer::GetStringBuildersToFinalize( BasicBlock* block) { DCHECK(BlockShouldFinalizeStringBuilders(block)); return blocks_to_trimmings_map_[block->id().ToInt()].value(); } OneOrTwoByteAnalysis::State StringBuilderOptimizer::GetOneOrTwoByte( Node* node) { DCHECK(ConcatIsInStringBuilder(node)); // TODO(v8:13785,dmercadier): once externalization can no longer convert a // 1-byte into a 2-byte string, return the proper OneOrTwoByte status for the // node (= remove the next line and uncomment the 2 after). return OneOrTwoByteAnalysis::State::kCantKnow; // int string_builder_number = GetStringBuilderIdForConcat(node); // return string_builders_[string_builder_number].one_or_two_bytes; } bool StringBuilderOptimizer::IsStringBuilderEnd(Node* node) { Status status = GetStatus(node); DCHECK_IMPLIES(status.state == State::kEndStringBuilder || status.state == State::kEndStringBuilderLoopPhi, status.id != kInvalidId && StringBuilderIsValid(string_builders_[status.id])); return status.state == State::kEndStringBuilder || status.state == State::kEndStringBuilderLoopPhi; } bool StringBuilderOptimizer::IsNonLoopPhiStringBuilderEnd(Node* node) { return IsStringBuilderEnd(node) && !IsLoopPhi(node); } bool StringBuilderOptimizer::IsStringBuilderConcatInput(Node* node) { Status status = GetStatus(node); DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder, status.id != kInvalidId && StringBuilderIsValid(string_builders_[status.id])); return status.state == State::kConfirmedInStringBuilder; } bool StringBuilderOptimizer::ConcatIsInStringBuilder(Node* node) { DCHECK(IsConcat(node)); Status status = GetStatus(node); DCHECK_IMPLIES(status.state == State::kConfirmedInStringBuilder || status.state == State::kBeginStringBuilder || status.state == State::kEndStringBuilder, status.id != kInvalidId && StringBuilderIsValid(string_builders_[status.id])); return status.state == State::kConfirmedInStringBuilder || status.state == State::kBeginStringBuilder || status.state == State::kEndStringBuilder; } int StringBuilderOptimizer::GetStringBuilderIdForConcat(Node* node) { DCHECK(IsConcat(node)); Status status = GetStatus(node); DCHECK(status.state == State::kConfirmedInStringBuilder || status.state == State::kBeginStringBuilder || status.state == State::kEndStringBuilder); DCHECK_NE(status.id, kInvalidId); return status.id; } bool StringBuilderOptimizer::IsFirstConcatInStringBuilder(Node* node) { if (!ConcatIsInStringBuilder(node)) return false; Status status = GetStatus(node); return status.state == State::kBeginStringBuilder; } // Duplicates the {input_idx}th input of {node} if it has multiple uses, so that // the replacement only has one use and can safely be marked as // State::kConfirmedInStringBuilder and properly optimized in // EffectControlLinearizer (in particular, this will allow to safely remove // StringFromSingleCharCode that are only used for a StringConcat that we // optimize). void StringBuilderOptimizer::ReplaceConcatInputIfNeeded(Node* node, int input_idx) { if (!IsLiteralString(node->InputAt(input_idx), broker())) return; Node* input = node->InputAt(input_idx); DCHECK_EQ(input->op()->EffectOutputCount(), 0); DCHECK_EQ(input->op()->ControlOutputCount(), 0); if (input->UseCount() > 1) { input = graph()->CloneNode(input); node->ReplaceInput(input_idx, input); } Status node_status = GetStatus(node); DCHECK_NE(node_status.id, kInvalidId); SetStatus(input, State::kConfirmedInStringBuilder, node_status.id); } // If all of the predecessors of {node} are part of a string builder and have // the same id, returns this id. Otherwise, returns kInvalidId. int StringBuilderOptimizer::GetPhiPredecessorsCommonId(Node* node) { DCHECK_EQ(node->opcode(), IrOpcode::kPhi); int id = kInvalidId; for (int i = 0; i < node->op()->ValueInputCount(); i++) { Node* input = NodeProperties::GetValueInput(node, i); Status status = GetStatus(input); switch (status.state) { case State::kBeginStringBuilder: case State::kInStringBuilder: case State::kPendingPhi: if (id == kInvalidId) { // Initializind {id}. id = status.id; } else if (id != status.id) { // 2 inputs belong to different StringBuilder chains. return kInvalidId; } break; case State::kInvalid: case State::kUnvisited: return kInvalidId; default: UNREACHABLE(); } } DCHECK_NE(id, kInvalidId); return id; } namespace { // Returns true if {first} comes before {second} in {block}. bool ComesBeforeInBlock(Node* first, Node* second, BasicBlock* block) { for (Node* node : *block->nodes()) { if (node == first) { return true; } if (node == second) { return false; } } UNREACHABLE(); } static constexpr int kMaxPredecessors = 15; // Compute up to {kMaxPredecessors} predecessors of {start} that are not past // {end}, and store them in {dst}. Returns true if there are less than // {kMaxPredecessors} such predecessors and false otherwise. bool ComputePredecessors( BasicBlock* start, BasicBlock* end, base::SmallVector* dst) { dst->push_back(start); size_t stack_pointer = 0; while (stack_pointer < dst->size()) { BasicBlock* current = (*dst)[stack_pointer++]; if (current == end) continue; for (BasicBlock* pred : current->predecessors()) { if (std::find(dst->begin(), dst->end(), pred) == dst->end()) { if (dst->size() == kMaxPredecessors) return false; dst->push_back(pred); } } } return true; } // Returns false if {node} makes its string input escape this use. For instance, // a Phi or a Store make their input escape, but a kStringLength consumes its // inputs. bool OpcodeIsAllowed(IrOpcode::Value op) { switch (op) { case IrOpcode::kStringLength: case IrOpcode::kStringConcat: case IrOpcode::kNewConsString: case IrOpcode::kStringCharCodeAt: case IrOpcode::kStringCodePointAt: case IrOpcode::kStringIndexOf: case IrOpcode::kObjectIsString: case IrOpcode::kStringToLowerCaseIntl: case IrOpcode::kStringToNumber: case IrOpcode::kStringToUpperCaseIntl: case IrOpcode::kStringEqual: case IrOpcode::kStringLessThan: case IrOpcode::kStringLessThanOrEqual: case IrOpcode::kCheckString: case IrOpcode::kTypedStateValues: return true; default: return false; } } // Returns true if {sb_child_block} can be a valid successor for // {previous_block} in the string builder, considering that {other_child_block} // is another successor of {previous_block} (which uses the string builder that // is in {previous_block}).We are mainly checking for the following scenario: // // | // v // +---> LoopPhi // | | // | v // | node ----------> other_child // | | // | v // | child // | ... // | | // +-------+ // // Where {node} and {child} are inside a loop (and could be part of a string // builder), but {other_child} is not, and the control flow doesn't exit the // loop in between {node} and {child}. The string builder should not be used in // such situations, because by the time {other_child} is reached, its input will // be invalid, because {child} will have mutated it. (here, node's block would // be {previous_block}, child's would be {sb_child_block} and other_child's // would be {other_child_block}). bool ValidControlFlowForStringBuilder(BasicBlock* sb_child_block, BasicBlock* other_child_block, BasicBlock* previous_block, ZoneVector loop_headers) { if (loop_headers.empty()) return true; // Due to how we visit the graph, {sb_child_block} is the block that // VisitGraph is currently visiting, which means that it has to be in all the // loops of {loop_headers} (and in particular in the latest one). // {other_child_block} on the other hand could be in the loop or not, which is // what this function tries to determine. DCHECK(loop_headers.back()->LoopContains(sb_child_block)); if (sb_child_block->IsLoopHeader()) { // {sb_child_block} starts a loop. This is OK for {other_child_block} only // if {other_child_block} is before the loop (because if it's after, then // the value it will receive will be invalid), or if both // {other_child_block} and {previous_block} are inside the loop. The latter // case corresponds to: // // +--------> sb_child_block // | / \ // | | \ // | v v // | previous_block other_child_block // | | // +--------+ // // Where {other_child_block} eventually reaches {previous_block} (or exits // the loop through some other path). return other_child_block->rpo_number() < sb_child_block->rpo_number() || (sb_child_block->LoopContains(previous_block) && (sb_child_block->LoopContains(other_child_block))); } else { // Both {sb_child_block} and {other_child_block} should be in the same loop. return loop_headers.back()->LoopContains(other_child_block); } } // Return true if {maybe_dominator} dominates {maybe_dominee} and is less than // {kMaxDominatorSteps} steps away (to avoid going back too far if // {maybe_dominee} is much deeper in the graph that {maybe_dominator}). bool IsClosebyDominator(BasicBlock* maybe_dominator, BasicBlock* maybe_dominee) { static constexpr int kMaxDominatorSteps = 10; if (maybe_dominee->dominator_depth() + kMaxDominatorSteps < maybe_dominator->dominator_depth()) { // {maybe_dominee} is too far from {maybe_dominator} to compute quickly if // it's dominated by {maybe_dominator} or not. return false; } while (maybe_dominee != maybe_dominator && maybe_dominator->dominator_depth() < maybe_dominee->dominator_depth()) { maybe_dominee = maybe_dominee->dominator(); } return maybe_dominee == maybe_dominator; } // Returns true if {node} is a Phi that has both {input1} and {input2} as // inputs. bool IsPhiContainingGivenInputs(Node* node, Node* input1, Node* input2, Schedule* schedule) { if (node->opcode() != IrOpcode::kPhi || schedule->block(node)->IsLoopHeader()) { return false; } bool has_input1 = false, has_input2 = false; for (Node* input : node->inputs()) { if (input == input1) { has_input1 = true; } else if (input == input2) { has_input2 = true; } } return has_input1 && has_input2; } // Returns true if {phi} has 3 inputs (including the Loop or Merge), and its // first two inputs are either Phi themselves, or StringConcat/NewConsString. // This is used to quickly eliminate Phi nodes that cannot be part of a String // Builder. bool PhiInputsAreConcatsOrPhi(Node* phi) { DCHECK_EQ(phi->opcode(), IrOpcode::kPhi); return phi->InputCount() == 3 && (phi->InputAt(0)->opcode() == IrOpcode::kPhi || IsConcat(phi->InputAt(0))) && (phi->InputAt(1)->opcode() == IrOpcode::kPhi || IsConcat(phi->InputAt(1))); } } // namespace // Check that the uses of {node} are valid, assuming that {string_builder_child} // is the following node in the string builder. In a nutshell, for uses of a // node (that is part of the string builder) to be valid, they need to all // appear before the next node of the string builder (because after, the node is // not valid anymore because we mutate SlicedString and the backing store in // place). For instance: // // s1 = "123" + "abc"; // s2 = s1 + "def"; // l = s1.length(); // // In this snippet, if `s1` and `s2` are part of the string builder, then the // uses of `s1` are not actually valid, because `s1.length()` appears after the // next node of the string builder (`s2`) has been computed. bool StringBuilderOptimizer::CheckNodeUses(Node* node, Node* string_builder_child, Status status) { DCHECK(GetStatus(string_builder_child).state == State::kInStringBuilder || GetStatus(string_builder_child).state == State::kPendingPhi); BasicBlock* child_block = schedule()->block(string_builder_child); if (node->UseCount() == 1) return true; BasicBlock* node_block = schedule()->block(node); bool is_loop_phi = IsLoopPhi(node); bool child_is_in_loop = is_loop_phi && LoopContains(node, string_builder_child); base::SmallVector current_predecessors; bool predecessors_computed = false; for (Node* other_child : node->uses()) { if (other_child == string_builder_child) continue; BasicBlock* other_child_block = schedule()->block(other_child); if (!OpcodeIsAllowed(other_child->opcode())) { // {other_child} could write {node} (the beginning of the string builder) // in memory (or keep it alive through other means, such as a Phi). This // means that if {string_builder_child} modifies the string builder, then // the value stored by {other_child} will become out-dated (since // {other_child} will probably just write a pointer to the string in // memory, and the string pointed by this pointer will be updated by the // string builder). if (is_loop_phi && child_is_in_loop && !node_block->LoopContains(other_child_block)) { // {other_child} keeps the string alive, but this is only after the // loop, when {string_builder_child} isn't alive anymore, so this isn't // an issue. continue; } return false; } if (other_child_block == child_block) { // Both {child} and {other_child} are in the same block, we need to make // sure that {other_child} comes first. Status other_status = GetStatus(other_child); if (other_status.id != kInvalidId) { DCHECK_EQ(other_status.id, status.id); // {node} flows into 2 different nodes of the string builder, both of // which are in the same BasicBlock, which is not supported. We need to // invalidate {other_child} as well, or the input of {child} could be // wrong. In theory, we could keep one of {other_child} and {child} (the // one that comes the later in the BasicBlock), but it's simpler to keep // neither, and end the string builder on {node}. SetStatus(other_child, State::kInvalid); return false; } if (!ComesBeforeInBlock(other_child, string_builder_child, child_block)) { return false; } continue; } if (is_loop_phi) { if ((child_is_in_loop && !node_block->LoopContains(other_child_block)) || (!child_is_in_loop && node_block->LoopContains(other_child_block))) { // {child} is in the loop and {other_child} isn't (or the other way // around). In that case, we skip {other_child}: it will be tested // later when we leave the loop (if {child} is in the loop) or has // been tested earlier while we were inside the loop (if {child} isn't // in the loop). continue; } } else if (!ValidControlFlowForStringBuilder(child_block, other_child_block, node_block, loop_headers_)) { return false; } if (IsPhiContainingGivenInputs(other_child, node, string_builder_child, schedule())) { // {other_child} is a Phi that merges {child} and {node} (and maybe some // other nodes that we don't care about for now: if {other_child} merges // more than 2 nodes, it won't be added to the string builder anyways). continue; } base::SmallVector other_predecessors; bool all_other_predecessors_computed = ComputePredecessors(other_child_block, node_block, &other_predecessors); // Making sure that {child_block} isn't in the predecessors of // {other_child_block}. Otherwise, the use of {node} in {other_child} // would be invalid. if (std::find(other_predecessors.begin(), other_predecessors.end(), child_block) != other_predecessors.end()) { // {child} is in the predecessor of {other_child}, which is definitely // invalid (because it means that {other_child} uses an out-dated version // of {node}, since {child} modified it). return false; } else { if (all_other_predecessors_computed) { // {child} is definitely not in the predecessors of {other_child}, which // means that it's either a successor of {other_child} (which is safe), // or it's in another path of the graph alltogether (which is also // safe). continue; } else { // We didn't compute all the predecessors of {other_child}, so it's // possible that {child_block} is one of the predecessor that we didn't // compute. // // Trying to see if we can find {other_child_block} in the // predecessors of {child_block}: that would mean that {other_child} // is guaranteed to be scheduled before {child}, making it safe. if (!predecessors_computed) { ComputePredecessors(child_block, node_block, ¤t_predecessors); predecessors_computed = true; } if (std::find(current_predecessors.begin(), current_predecessors.end(), other_child_block) == current_predecessors.end()) { // We didn't find {other_child} in the predecessors of {child}. It // means that either {other_child} comes after in the graph (which // is unsafe), or that {other_child} and {child} are on two // independent subgraphs (which is safe). We have no efficient way // to know which one of the two this is, so, we fall back to a // stricter approach: the use of {node} in {other_child} is // guaranteed to be safe if {other_child_block} dominates // {child_block}. if (!IsClosebyDominator(other_child_block, child_block)) { return false; } } } } } return true; } // Check that the uses of the predecessor(s) of {child} in the string builder // are valid, with respect to {child}. This sounds a bit backwards, but we can't // check if uses are valid before having computed what the next node in the // string builder is. Hence, once we've established that {child} is in the // string builder, we check that the uses of the previous node(s) of the // string builder are valid. For non-loop phis (ie, merge phis), we simply check // that the uses of their 2 predecessors are valid. For loop phis, this function // is called twice: one for the outside-the-loop input (with {input_if_loop_phi} // = 0), and once for the inside-the-loop input (with {input_if_loop_phi} = 1). bool StringBuilderOptimizer::CheckPreviousNodeUses(Node* child, Status status, int input_if_loop_phi) { if (IsConcat(child)) { return CheckNodeUses(child->InputAt(1), child, status); } if (child->opcode() == IrOpcode::kPhi) { BasicBlock* child_block = schedule()->block(child); if (child_block->IsLoopHeader()) { return CheckNodeUses(child->InputAt(input_if_loop_phi), child, status); } else { DCHECK_EQ(child->InputCount(), 3); return CheckNodeUses(child->InputAt(0), child, status) && CheckNodeUses(child->InputAt(1), child, status); } } UNREACHABLE(); } void StringBuilderOptimizer::VisitNode(Node* node, BasicBlock* block) { if (IsConcat(node)) { Node* lhs = node->InputAt(1); Node* rhs = node->InputAt(2); if (!IsLiteralString(rhs, broker())) { SetStatus(node, State::kInvalid); return; } if (IsLiteralString(lhs, broker())) { // This node could start a string builder. However, we won't know until // we've properly inspected its uses, found a Phi somewhere down its use // chain, made sure that the Phi was valid, etc. Pre-emptively, we do a // quick check (with HasConcatOrPhiUse) that this node has a // StringConcat/NewConsString in its uses, and if so, we set its state as // kBeginConcat, and increment the {string_builder_count_}. The goal of // the HasConcatOrPhiUse is mainly to avoid incrementing // {string_builder_count_} too often for things that are obviously just // regular concatenations of 2 constant strings and that can't be // beginning of string builders. if (HasConcatOrPhiUse(lhs)) { SetStatus(node, State::kBeginStringBuilder, string_builder_count_); string_builders_.push_back( StringBuilder{node, static_cast(string_builder_count_), false, OneOrTwoByteAnalysis::State::kUnknown}); string_builder_count_++; } // A concatenation between 2 literal strings has no predecessor in the // string builder, and there is thus no more checks/bookkeeping required // ==> early return. return; } else { Status lhs_status = GetStatus(lhs); switch (lhs_status.state) { case State::kBeginStringBuilder: case State::kInStringBuilder: SetStatus(node, State::kInStringBuilder, lhs_status.id); break; case State::kPendingPhi: { BasicBlock* phi_block = schedule()->block(lhs); if (phi_block->LoopContains(block)) { // This node uses a PendingPhi and is inside the loop. We // speculatively set it to kInStringBuilder. SetStatus(node, State::kInStringBuilder, lhs_status.id); } else { // This node uses a PendingPhi but is not inside the loop, which // means that the PendingPhi was never resolved to a kInConcat or a // kInvalid, which means that it's actually not valid (because we // visit the graph in RPO order, which means that we've already // visited the whole loop). Thus, we set the Phi to kInvalid, and // thus, we also set the current node to kInvalid. SetStatus(lhs, State::kInvalid); SetStatus(node, State::kInvalid); } break; } case State::kInvalid: case State::kUnvisited: SetStatus(node, State::kInvalid); break; default: UNREACHABLE(); } } } else if (node->opcode() == IrOpcode::kPhi && PhiInputsAreConcatsOrPhi(node)) { if (!block->IsLoopHeader()) { // This Phi merges nodes after a if/else. int id = GetPhiPredecessorsCommonId(node); if (id == kInvalidId) { SetStatus(node, State::kInvalid); } else { SetStatus(node, State::kInStringBuilder, id); } } else { // This Phi merges a value from inside the loop with one from before. DCHECK_EQ(node->op()->ValueInputCount(), 2); Status first_input_status = GetStatus(node->InputAt(0)); switch (first_input_status.state) { case State::kBeginStringBuilder: case State::kInStringBuilder: SetStatus(node, State::kPendingPhi, first_input_status.id); break; case State::kPendingPhi: case State::kInvalid: case State::kUnvisited: SetStatus(node, State::kInvalid); break; default: UNREACHABLE(); } } } else { SetStatus(node, State::kInvalid); } Status status = GetStatus(node); if (status.state == State::kInStringBuilder || status.state == State::kPendingPhi) { // We make sure that this node being in the string builder doesn't conflict // with other uses of the previous node of the string builder. Note that // loop phis can never have the kInStringBuilder state at this point. We // thus check their uses when we finish the loop and set the phi's status to // InStringBuilder. if (!CheckPreviousNodeUses(node, status, 0)) { SetStatus(node, State::kInvalid); return; } // Updating following PendingPhi if needed. for (Node* use : node->uses()) { if (use->opcode() == IrOpcode::kPhi) { Status use_status = GetStatus(use); if (use_status.state == State::kPendingPhi) { // Finished the loop. SetStatus(use, State::kInStringBuilder, status.id); if (use_status.id == status.id && CheckPreviousNodeUses(use, status, 1)) { string_builders_[status.id].has_loop_phi = true; } else { // One of the uses of {node} is a pending Phi that hasn't the // correct id (is that even possible?), or the uses of {node} are // invalid. Either way, both {node} and {use} are invalid. SetStatus(node, State::kInvalid); SetStatus(use, State::kInvalid); } } } } } } // For each potential string builder, checks that their beginning has status // kBeginStringBuilder, and that they contain at least one phi. Then, all of // their "valid" nodes are switched from status State::InStringBuilder to status // State::kConfirmedInStringBuilder (and "valid" kBeginStringBuilder are left // as kBeginStringBuilder while invalid ones are switched to kInvalid). Nodes // are considered "valid" if they are before any kPendingPhi in the string // builder. Put otherwise, switching status from kInStringBuilder to // kConfirmedInStringBuilder is a cheap way of getting rid of kInStringBuilder // nodes that are invalid before one of their predecessor is a kPendingPhi that // was never switched to kInStringBuilder. An example: // // StringConcat [1] // kBeginStringBuilder // | // | // v // -----> Loop Phi [2] --------------- // | kInStringBuilder | // | | | // | | | // | v v // | StringConcat [3] StringConcat [4] // | kInStringBuilder kInStringBuilder // | | | // ----------| | // v // -----> Loop Phi [5] ------------> // | kPendingPhi // | | // | | // | v // | StringConcat [6] // | kInStringBuilder // | | // -----------| // // In this graph, nodes [1], [2], [3] and [4] are part of the string builder. In // particular, node 2 has at some point been assigned the status kPendingPhi // (because all loop phis start as kPendingPhi), but was later switched to // status kInStringBuilder (because its uses inside the loop were compatible // with the string builder), which implicitly made node [3] a valid part of the // string builder. On the other hand, node [5] was never switched to status // kInStringBuilder, which means that it is not valid, and any successor of [5] // isn't valid either (remember that we speculatively set nodes following a // kPendingPhi to kInStringBuilder). Thus, rather than having to iterate through // the successors of kPendingPhi nodes to invalidate them, we simply update the // status of valid nodes to kConfirmedInStringBuilder, after which any // kInStringBuilder node is actually invalid. // // In this function, we also collect all the possible ends for each string // builder (their can be multiple possible ends if there is a branch before the // end of a string builder), as well as where trimming for a given string // builder should be done (either right after the last node, or at the beginning // of the blocks following this node). For an example of string builder with // multiple ends, consider this code: // // let s = "a" + "b" // for (...) { // s += "..."; // } // if (...) return s + "abc"; // else return s + "def"; // // Which would produce a graph that looks like: // // kStringConcat // | // | // v // -------> Loop Phi--------------- // | | | // | | | // | v | // | kStringConcat | // | | | // -------------| | // | // | // ------------------------------------------ // | | // | | // | | // v v // kStringConcat [1] kStringConcat [2] // | | // | | // v v // Return Return // // In this case, both kStringConcat [1] and [2] are valid ends for the string // builder. void StringBuilderOptimizer::FinalizeStringBuilders() { OneOrTwoByteAnalysis one_or_two_byte_analysis(graph(), temp_zone(), broker()); // We use {to_visit} to iterate through a string builder, and {ends} to // collect its ending. To save some memory, these 2 variables are declared a // bit early, and we .clear() them at the beginning of each iteration (which // shouldn't free their memory), rather than allocating new memory for each // string builder. ZoneVector to_visit(temp_zone()); ZoneVector ends(temp_zone()); bool one_string_builder_or_more_valid = false; for (unsigned int string_builder_id = 0; string_builder_id < string_builder_count_; string_builder_id++) { StringBuilder* string_builder = &string_builders_[string_builder_id]; Node* start = string_builder->start; Status start_status = GetStatus(start); if (start_status.state != State::kBeginStringBuilder || !string_builder->has_loop_phi) { // {start} has already been invalidated, or the string builder doesn't // contain a loop Phi. *string_builder = kInvalidStringBuilder; UpdateStatus(start, State::kInvalid); continue; } DCHECK_EQ(start_status.state, State::kBeginStringBuilder); DCHECK_EQ(start_status.id, string_builder_id); one_string_builder_or_more_valid = true; OneOrTwoByteAnalysis::State one_or_two_byte = one_or_two_byte_analysis.OneOrTwoByte(start); to_visit.clear(); ends.clear(); to_visit.push_back(start); while (!to_visit.empty()) { Node* curr = to_visit.back(); to_visit.pop_back(); Status curr_status = GetStatus(curr); if (curr_status.state == State::kConfirmedInStringBuilder) continue; DCHECK(curr_status.state == State::kInStringBuilder || curr_status.state == State::kBeginStringBuilder); DCHECK_IMPLIES(curr_status.state == State::kBeginStringBuilder, curr == start); DCHECK_EQ(curr_status.id, start_status.id); if (curr_status.state != State::kBeginStringBuilder) { UpdateStatus(curr, State::kConfirmedInStringBuilder); } if (IsConcat(curr)) { one_or_two_byte = OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte( one_or_two_byte, one_or_two_byte_analysis.OneOrTwoByte(curr)); // Duplicating string inputs if needed, and marking them as // InStringBuilder (so that EffectControlLinearizer doesn't lower them). ReplaceConcatInputIfNeeded(curr, 1); ReplaceConcatInputIfNeeded(curr, 2); } // Check if {curr} is one of the string builder's ends: if {curr} has no // uses that are part of the string builder, then {curr} ends the string // builder. bool has_use_in_string_builder = false; for (Node* next : curr->uses()) { Status next_status = GetStatus(next); if ((next_status.state == State::kInStringBuilder || next_status.state == State::kConfirmedInStringBuilder) && next_status.id == curr_status.id) { if (next_status.state == State::kInStringBuilder) { // We only add to {to_visit} when the state is kInStringBuilder to // make sure that we don't revisit already-visited nodes. to_visit.push_back(next); } if (!IsLoopPhi(curr) || !LoopContains(curr, next)) { // The condition above is true when: // - {curr} is not a loop phi: in that case, {next} is (one of) the // nodes in the string builder after {curr}. // - {curr} is a loop phi, and {next} is not inside the loop: in // that case, {node} is (one of) the nodes in the string builder // that are after {curr}. Note that we ignore uses of {curr} // inside the loop, since if {curr} has no uses **after** the // loop, then it's (one of) the end of the string builder. has_use_in_string_builder = true; } } } if (!has_use_in_string_builder) { ends.push_back(curr); } } // Note that there is no need to check that the ends have no conflicting // uses, because none of the ends can be alive at the same time, and thus, // uses of the different ends can't be alive at the same time either. The // reason that ens can't be alive at the same time is that if 2 ends were // alive at the same time, then there exist a node n that is a predecessors // of both ends, and that has 2 successors in the string builder (and alive // at the same time), which is not possible because CheckNodeUses prevents // it. // Collecting next blocks where trimming is required (blocks following a // loop Phi where the Phi is the last in a string builder), and setting // kEndStringBuilder state to nodes where trimming should be done right // after computing the node (when the last node in a string builder is not a // loop phi). for (Node* end : ends) { if (IsLoopPhi(end)) { BasicBlock* phi_block = schedule()->block(end); for (BasicBlock* block : phi_block->successors()) { if (phi_block->LoopContains(block)) continue; if (!blocks_to_trimmings_map_[block->id().ToInt()].has_value()) { blocks_to_trimmings_map_[block->id().ToInt()] = ZoneVector(temp_zone()); } blocks_to_trimmings_map_[block->id().ToInt()]->push_back(end); } UpdateStatus(end, State::kEndStringBuilderLoopPhi); } else { UpdateStatus(end, State::kEndStringBuilder); } } string_builder->one_or_two_bytes = one_or_two_byte; } #ifdef DEBUG if (one_string_builder_or_more_valid) { broker()->isolate()->set_has_turbofan_string_builders(); } #else USE(one_string_builder_or_more_valid); #endif } void StringBuilderOptimizer::VisitGraph() { // Initial discovery of the potential string builders. for (BasicBlock* block : *schedule()->rpo_order()) { // Removing finished loops. while (!loop_headers_.empty() && loop_headers_.back()->loop_end() == block) { loop_headers_.pop_back(); } // Adding new loop if necessary. if (block->IsLoopHeader()) { loop_headers_.push_back(block); } // Visiting block content. for (Node* node : *block->nodes()) { VisitNode(node, block); } } // Finalize valid string builders (moving valid nodes to status // kConfirmedInStringBuilder or kEndStringBuilder), and collecting the // trimming points. FinalizeStringBuilders(); } void StringBuilderOptimizer::Run() { VisitGraph(); } StringBuilderOptimizer::StringBuilderOptimizer(JSGraph* jsgraph, Schedule* schedule, Zone* temp_zone, JSHeapBroker* broker) : jsgraph_(jsgraph), schedule_(schedule), temp_zone_(temp_zone), broker_(broker), blocks_to_trimmings_map_(schedule->BasicBlockCount(), temp_zone), status_(jsgraph->graph()->NodeCount(), Status{kInvalidId, State::kUnvisited}, temp_zone), string_builders_(temp_zone), loop_headers_(temp_zone) {} } // namespace compiler } // namespace internal } // namespace v8