diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index d748b162d7809..0982fd35401cb 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -89,6 +89,7 @@ STATISTIC(FoundProfiledCalleeMaxDepth, "Maximum depth of profiled callees found via tail calls"); STATISTIC(FoundProfiledCalleeNonUniquelyCount, "Number of profiled callees found via multiple tail call chains"); +STATISTIC(DeferredBackedges, "Number of backedges with deferred cloning"); static cl::opt DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -127,6 +128,10 @@ static cl::opt AllowRecursiveCallsites( "memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles")); +static cl::opt CloneRecursiveContexts( + "memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, + cl::desc("Allow cloning of contexts through recursive cycles")); + // When disabled, try to detect and prevent cloning of recursive contexts. // This is only necessary until we support cloning through recursive cycles. // Leave on by default for now, as disabling requires a little bit of compile @@ -134,7 +139,7 @@ static cl::opt AllowRecursiveCallsites( // hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled. static cl::opt AllowRecursiveContexts( "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, - cl::desc("Allow cloning of contexts through recursive cycles")); + cl::desc("Allow cloning of contexts having recursive cycles")); namespace llvm { cl::opt EnableMemProfContextDisambiguation( @@ -293,37 +298,40 @@ class CallsiteContextGraph { // TODO: Should this be a map (from Caller node) for more efficient lookup? std::vector> CallerEdges; - // Get the list of edges from which we can compute allocation information - // such as the context ids and allocation type of this node. - const std::vector> * - getEdgesWithAllocInfo() const { - // If node has any callees, compute from those, otherwise compute from - // callers (i.e. if this is the leaf allocation node). - if (!CalleeEdges.empty()) - return &CalleeEdges; + // Returns true if we need to look at the callee edges for determining the + // node context ids and allocation type. + bool useCallerEdgesForContextInfo() const { // Typically if the callee edges are empty either the caller edges are // also empty, or this is an allocation (leaf node). However, if we are // allowing recursive callsites and contexts this will be violated for // incompletely cloned recursive cycles. - assert(CallerEdges.empty() || IsAllocation || + assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation || (AllowRecursiveCallsites && AllowRecursiveContexts)); - if (!CallerEdges.empty() && IsAllocation) - return &CallerEdges; - return nullptr; + // When cloning for a recursive context, during cloning we might be in the + // midst of cloning for a recurrence and have moved context ids off of a + // caller edge onto the clone but not yet off of the incoming caller + // (back) edge. If we don't look at those we miss the fact that this node + // still has context ids of interest. + return IsAllocation || CloneRecursiveContexts; } // Compute the context ids for this node from the union of its edge context // ids. DenseSet getContextIds() const { - DenseSet ContextIds; - auto *Edges = getEdgesWithAllocInfo(); - if (!Edges) - return {}; unsigned Count = 0; - for (auto &Edge : *Edges) + // Compute the number of ids for reserve below. In general we only need to + // look at one set of edges, typically the callee edges, since other than + // allocations and in some cases during recursion cloning, all the context + // ids on the callers should also flow out via callee edges. + for (auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges) Count += Edge->getContextIds().size(); + DenseSet ContextIds; ContextIds.reserve(Count); - for (auto &Edge : *Edges) + auto Edges = llvm::concat>( + CalleeEdges, useCallerEdgesForContextInfo() + ? CallerEdges + : std::vector>()); + for (const auto &Edge : Edges) ContextIds.insert(Edge->getContextIds().begin(), Edge->getContextIds().end()); return ContextIds; @@ -332,13 +340,14 @@ class CallsiteContextGraph { // Compute the allocation type for this node from the OR of its edge // allocation types. uint8_t computeAllocType() const { - auto *Edges = getEdgesWithAllocInfo(); - if (!Edges) - return (uint8_t)AllocationType::None; uint8_t BothTypes = (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; uint8_t AllocType = (uint8_t)AllocationType::None; - for (auto &Edge : *Edges) { + auto Edges = llvm::concat>( + CalleeEdges, useCallerEdgesForContextInfo() + ? CallerEdges + : std::vector>()); + for (const auto &Edge : Edges) { AllocType |= Edge->AllocTypes; // Bail early if alloc type reached both, no further refinement. if (AllocType == BothTypes) @@ -350,10 +359,11 @@ class CallsiteContextGraph { // The context ids set for this node is empty if its edge context ids are // also all empty. bool emptyContextIds() const { - auto *Edges = getEdgesWithAllocInfo(); - if (!Edges) - return true; - for (auto &Edge : *Edges) { + auto Edges = llvm::concat>( + CalleeEdges, useCallerEdgesForContextInfo() + ? CallerEdges + : std::vector>()); + for (const auto &Edge : Edges) { if (!Edge->getContextIds().empty()) return false; } @@ -434,6 +444,14 @@ class CallsiteContextGraph { // for contexts including this edge. uint8_t AllocTypes = 0; + // Set just before initiating cloning when cloning of recursive contexts is + // enabled. Used to defer cloning of backedges until we have done cloning of + // the callee node for non-backedge caller edges. This exposes cloning + // opportunities through the backedge of the cycle. + // TODO: Note that this is not updated during cloning, and it is unclear + // whether that would be needed. + bool IsBackedge = false; + // The set of IDs for contexts including this edge. DenseSet ContextIds; @@ -722,6 +740,9 @@ class CallsiteContextGraph { void moveCalleeEdgeToNewCaller(const std::shared_ptr &Edge, ContextNode *NewCaller); + void markBackedges(ContextNode *Node, DenseSet &Visited, + DenseSet &CurrentStack); + /// Recursively perform cloning on the graph for the given Node and its /// callers, in order to uniquely identify the allocation behavior of an /// allocation given its context. The context ids of the allocation being @@ -2874,6 +2895,7 @@ template void CallsiteContextGraph::ContextEdge::print( raw_ostream &OS) const { OS << "Edge from Callee " << Callee << " to Caller: " << Caller + << (IsBackedge ? " (BE)" : "") << " AllocTypes: " << getAllocTypeString(AllocTypes); OS << " ContextIds:"; std::vector SortedIds(ContextIds.begin(), ContextIds.end()); @@ -3115,6 +3137,8 @@ void CallsiteContextGraph:: // node (Edge's current callee may be the original node too). assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); + bool EdgeIsRecursive = Edge->Callee == Edge->Caller; + ContextNode *OldCallee = Edge->Callee; // We might already have an edge to the new callee from earlier cloning for a @@ -3181,8 +3205,16 @@ void CallsiteContextGraph:: // If this is a direct recursion edge, use NewCallee (the clone) as the // callee as well, so that any edge updated/created here is also direct // recursive. - if (CalleeToUse == OldCallee) + if (CalleeToUse == OldCallee) { + // If this is a recursive edge, see if we already moved a recursive edge + // (which would have to have been this one) - if we were only moving a + // subset of context ids it would still be on OldCallee. + if (EdgeIsRecursive) { + assert(OldCalleeEdge == Edge); + continue; + } CalleeToUse = NewCallee; + } // The context ids moving to the new callee are the subset of this edge's // context ids and the context ids on the caller edge being moved. DenseSet EdgeContextIdsToMove = @@ -3369,9 +3401,47 @@ void CallsiteContextGraph:: } } +// This is the standard DFS based backedge discovery algorithm. +template +void CallsiteContextGraph::markBackedges( + ContextNode *Node, DenseSet &Visited, + DenseSet &CurrentStack) { + auto I = Visited.insert(Node); + // We should only call this for unvisited nodes. + assert(I.second); + for (auto &CalleeEdge : Node->CalleeEdges) { + auto *Callee = CalleeEdge->Callee; + if (Visited.count(Callee)) { + // Since this was already visited we need to check if it is currently on + // the recursive stack in which case it is a backedge. + if (CurrentStack.count(Callee)) + CalleeEdge->IsBackedge = true; + continue; + } + CurrentStack.insert(Callee); + markBackedges(Callee, Visited, CurrentStack); + CurrentStack.erase(Callee); + } +} + template void CallsiteContextGraph::identifyClones() { + // If we are cloning recursive contexts, find and mark backedges from all root + // callers, using the typical DFS based backedge analysis. DenseSet Visited; + if (CloneRecursiveContexts) { + DenseSet CurrentStack; + for (auto &Entry : NonAllocationCallToContextNodeMap) { + auto *Node = Entry.second; + if (Node->isRemoved()) + continue; + // It is a root if it doesn't have callers. + if (!Node->CallerEdges.empty()) + continue; + markBackedges(Node, Visited, CurrentStack); + assert(CurrentStack.empty()); + } + } for (auto &Entry : AllocationCallToContextNodeMap) { Visited.clear(); identifyClones(Entry.second, Visited, Entry.second->getContextIds()); @@ -3430,6 +3500,14 @@ void CallsiteContextGraph::identifyClones( assert(!is_contained(Node->CallerEdges, Edge)); continue; } + // Defer backedges. See comments further below where these edges are + // handled during the cloning of this Node. + if (Edge->IsBackedge) { + // We should only mark these if cloning recursive contexts, where we + // need to do this deferral. + assert(CloneRecursiveContexts); + continue; + } // Ignore any caller we previously visited via another edge. if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { identifyClones(Edge->Caller, Visited, AllocContextIds); @@ -3483,6 +3561,7 @@ void CallsiteContextGraph::identifyClones( assert(Node->AllocTypes != (uint8_t)AllocationType::None); DenseSet RecursiveContextIds; + assert(AllowRecursiveContexts || !CloneRecursiveContexts); // If we are allowing recursive callsites, but have also disabled recursive // contexts, look for context ids that show up in multiple caller edges. if (AllowRecursiveCallsites && !AllowRecursiveContexts) { @@ -3505,6 +3584,13 @@ void CallsiteContextGraph::identifyClones( // makes it less error-prone. auto CallerEdges = Node->CallerEdges; for (auto &CallerEdge : CallerEdges) { + // Skip any that have been removed by an earlier recursive call. + if (CallerEdge->isRemoved()) { + assert(!is_contained(Node->CallerEdges, CallerEdge)); + continue; + } + assert(CallerEdge->Callee == Node); + // See if cloning the prior caller edge left this node with a single alloc // type or a single caller. In that case no more cloning of Node is needed. if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) @@ -3546,13 +3632,100 @@ void CallsiteContextGraph::identifyClones( // // Then check if by cloning node at least one of the callee edges will be // disambiguated by splitting out different context ids. + // + // However, always do the cloning if this is a backedge, in which case we + // have not yet cloned along this caller edge. assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); assert(Node->AllocTypes != (uint8_t)AllocationType::None); - if (allocTypeToUse(CallerAllocTypeForAlloc) == + if (!CallerEdge->IsBackedge && + allocTypeToUse(CallerAllocTypeForAlloc) == allocTypeToUse(Node->AllocTypes) && allocTypesMatch( - CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) + CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { continue; + } + + if (CallerEdge->IsBackedge) { + // We should only mark these if cloning recursive contexts, where we + // need to do this deferral. + assert(CloneRecursiveContexts); + DeferredBackedges++; + } + + // If this is a backedge, we now do recursive cloning starting from its + // caller since we may have moved unambiguous caller contexts to a clone + // of this Node in a previous iteration of the current loop, giving more + // opportunity for cloning through the backedge. Because we sorted the + // caller edges earlier so that cold caller edges are first, we would have + // visited and cloned this node for any unamibiguously cold non-recursive + // callers before any ambiguous backedge callers. Note that we don't do this + // if the caller is already cloned or visited during cloning (e.g. via a + // different context path from the allocation). + // TODO: Can we do better in the case where the caller was already visited? + if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf && + !Visited.count(CallerEdge->Caller)) { + const auto OrigIdCount = CallerEdge->getContextIds().size(); + // Now do the recursive cloning of this backedge's caller, which was + // deferred earlier. + identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc); + removeNoneTypeCalleeEdges(CallerEdge->Caller); + // See if the recursive call to identifyClones moved the context ids to a + // new edge from this node to a clone of caller, and switch to looking at + // that new edge so that we clone Node for the new caller clone. + bool UpdatedEdge = false; + if (OrigIdCount > CallerEdge->getContextIds().size()) { + for (auto E : Node->CallerEdges) { + // Only interested in clones of the current edges caller. + if (E->Caller->CloneOf != CallerEdge->Caller) + continue; + // See if this edge contains any of the context ids originally on the + // current caller edge. + auto CallerEdgeContextsForAllocNew = + set_intersection(CallerEdgeContextsForAlloc, E->getContextIds()); + if (CallerEdgeContextsForAllocNew.empty()) + continue; + // Make sure we don't pick a previously existing caller edge of this + // Node, which would be processed on a different iteration of the + // outer loop over the saved CallerEdges. + if (std::find(CallerEdges.begin(), CallerEdges.end(), E) != + CallerEdges.end()) + continue; + // The CallerAllocTypeForAlloc and CalleeEdgeAllocTypesForCallerEdge + // are updated further below for all cases where we just invoked + // identifyClones recursively. + CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew); + CallerEdge = E; + UpdatedEdge = true; + break; + } + } + // If cloning removed this edge (and we didn't update it to a new edge + // above), we're done with this edge. It's possible we moved all of the + // context ids to an existing clone, in which case there's no need to do + // further processing for them. + if (CallerEdge->isRemoved()) + continue; + + // Now we need to update the information used for the cloning decisions + // further below, as we may have modified edges and their context ids. + + // Note if we changed the CallerEdge above we would have already updated + // the context ids. + if (!UpdatedEdge) { + CallerEdgeContextsForAlloc = set_intersection( + CallerEdgeContextsForAlloc, CallerEdge->getContextIds()); + if (CallerEdgeContextsForAlloc.empty()) + continue; + } + // Update the other information that depends on the edges and on the now + // updated CallerEdgeContextsForAlloc. + CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc); + CalleeEdgeAllocTypesForCallerEdge.clear(); + for (auto &CalleeEdge : Node->CalleeEdges) { + CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( + CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc)); + } + } // First see if we can use an existing clone. Check each clone and its // callee edges for matching alloc types. diff --git a/llvm/test/ThinLTO/X86/memprof-recursive.ll b/llvm/test/ThinLTO/X86/memprof-recursive.ll index 4b2b5490bc2cb..e1a9084b583b9 100644 --- a/llvm/test/ThinLTO/X86/memprof-recursive.ll +++ b/llvm/test/ThinLTO/X86/memprof-recursive.ll @@ -3,12 +3,15 @@ ;; See llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll for ;; information on how the test was created. +;; -stats requires asserts +; REQUIRES: asserts + ; RUN: opt -thinlto-bc %s >%t.o ;; Check behavior when we enable cloning of contexts involved with recursive -;; cycles, but not through the cycle itself. I.e. until full support for -;; recursion is added, the cloned recursive call from C back to B (line 12) will -;; not be updated to call a clone. +;; cycles, but not through the cycle itself. I.e. with full support for cloning +;; recursive cycles off, the cloned recursive call from C back to B (line 12) +;; will not be updated to call a clone. ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ ; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,_Z1Dv,plx \ @@ -19,6 +22,7 @@ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS @@ -39,7 +43,7 @@ ; RUN: --implicit-check-not="created clone" \ ; RUN: --implicit-check-not="marked with memprof allocation attribute cold" -;; Check the default behavior (enabled recursive callsites). +;; Check the default behavior (clone recursive callsites). ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ ; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,_Z1Dv,plx \ @@ -47,11 +51,11 @@ ; RUN: -r=%t.o,_Z1Bi,plx \ ; RUN: -r=%t.o,main,plx \ ; RUN: -r=%t.o,_Znam, \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ -; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS +; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS \ +; RUN: --check-prefix=CLONE-RECUR-CALLSITES ;; Skipping recursive contexts should prevent spurious call to cloned version of ;; B from the context starting at memprof_recursive.cc:19:13, which is actually @@ -67,6 +71,7 @@ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: -memprof-allow-recursive-contexts=false \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: -o %t.out 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS @@ -76,6 +81,7 @@ ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:8:0: created clone _Z1Ci.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 +; CLONE-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:14:0: created clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 ;; We should only call the cold clone for the recursive context if we enabled @@ -83,6 +89,7 @@ ; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 ; SKIP-RECUR-CONTEXTS-NOT: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:20:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; CLONE-RECUR-CALLSITES: 1 memprof-context-disambiguation - Number of backedges with deferred cloning target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll index d234dedc5a57a..1d09b1c1a0cb3 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/recursive.ll @@ -1,5 +1,8 @@ ;; Test recursion handling during cloning. -;; + +;; -stats requires asserts +; REQUIRES: asserts + ;; Original code looks like: ;; ;; #include @@ -35,13 +38,14 @@ ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. ;; Check behavior when we enable cloning of contexts involved with recursive -;; cycles, but not through the cycle itself. I.e. until full support for -;; recursion is added, the cloned recursive call from C back to B (line 12) will -;; not be updated to call a clone. +;; cycles, but not through the cycle itself. I.e. with full support for cloning +;; recursive cycles off, the cloned recursive call from C back to B (line 12) +;; will not be updated to call a clone. ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS @@ -57,13 +61,13 @@ ; RUN: --implicit-check-not="marked with memprof allocation attribute cold" \ ; RUN: --check-prefix=ALL -;; Check the default behavior (enabled recursive callsites). +;; Check the default behavior (clone recursive callsites). ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: %s -S 2>&1 | FileCheck %s \ -; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ -; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS +; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=ALLOW-RECUR-CONTEXTS \ +; RUN: --check-prefix=CLONE-RECUR-CALLSITES ;; Skipping recursive contexts should prevent spurious call to cloned version of ;; B from the context starting at memprof_recursive.cc:19:13, which is actually @@ -73,6 +77,7 @@ ; RUN: -pass-remarks=memprof-context-disambiguation \ ; RUN: -memprof-allow-recursive-callsites=true \ ; RUN: -memprof-allow-recursive-contexts=false \ +; RUN: -memprof-clone-recursive-contexts=false \ ; RUN: %s -S 2>&1 | FileCheck %s \ ; RUN: --implicit-check-not "memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned" \ ; RUN: --check-prefix=ALL --check-prefix=ALLOW-RECUR-CALLSITES --check-prefix=SKIP-RECUR-CONTEXTS @@ -84,6 +89,7 @@ ;; We should only call the cold clone for the recursive context if we enabled ;; recursive contexts via -memprof-allow-recursive-contexts=true (default). ; ALLOW-RECUR-CONTEXTS: memprof_recursive.cc:19:13: call in clone main assigned to call function clone _Z1Bi.memprof.1 +; CLONE-RECUR-CALLSITES: memprof_recursive.cc:12:10: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Bi.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi.memprof.1 assigned to call function clone _Z1Ci.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci.memprof.1 assigned to call function clone _Z1Dv.memprof.1 ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:5:10: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold @@ -95,6 +101,7 @@ ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:15:10: call in clone _Z1Bi assigned to call function clone _Z1Ci ; ALLOW-RECUR-CALLSITES: memprof_recursive.cc:10:12: call in clone _Z1Ci assigned to call function clone _Z1Dv ; ALL: memprof_recursive.cc:5:10: call in clone _Z1Dv marked with memprof allocation attribute notcold +; CLONE-RECUR-CALLSITES: 1 memprof-context-disambiguation - Number of backedges with deferred cloning target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"