Skip to content

Commit 9f44d5d

Browse files
authored
[flang] Simplify copy-in copy-out runtime API (#95822)
The runtime API for copy-in copy-out currently only has an entry only for the copy-out. This entry has a "skipInit" boolean that is never set to false by lowering and it does not deal with the deallocation of the temporary. The generated code was a mix of inline code and runtime calls This is not a big deal, but this is unneeded compiler and generated code complexity. With assumed-rank, it is also more cumbersome to establish a temporary descriptor. Instead, this patch: - Adds a CopyInAssignment API that deals with establishing the temporary descriptor and does the copy. - Removes unused arg to CopyOutAssign, and pushes destruction/deallocation responsibility inside it. Note that this runtime API are still not responsible for deciding the need of copying-in and out. This is kept as a separate runtime call to IsContiguous, which is easier to inline/replace by inline code with the hope of removing the copy-in/out calls after user function inlining. @vzakhari has already shown that always inlining all the copy part increase Fortran compilation time due to loop optimization attempts for loops that are known to have little optimization profitability (the variable being copied from and to is not contiguous).
1 parent 1d97f8f commit 9f44d5d

26 files changed

+376
-387
lines changed

flang/include/flang/Optimizer/Builder/Runtime/Assign.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,12 @@ void genAssignExplicitLengthCharacter(fir::FirOpBuilder &builder,
5656
void genAssignTemporary(fir::FirOpBuilder &builder, mlir::Location loc,
5757
mlir::Value destBox, mlir::Value sourceBox);
5858

59-
/// Generate runtime call to CopyOutAssign to assign \p sourceBox to
60-
/// \p destBox. This call implements the copy-out of a temporary
61-
/// (\p sourceBox) to the actual argument (\p destBox) passed to a procedure,
62-
/// after the procedure returns to the caller.
63-
/// If \p skipToInit is false, then \p destBox will be initialized before
64-
/// the assignment, otherwise, it is assumed to be already initialized.
65-
/// The runtime makes sure that there is no reallocation of the top-level
66-
/// entity represented by \p destBox. If reallocation is required
67-
/// for the components of \p destBox, then it is done without finalization.
59+
/// Generate runtime call to "CopyInAssign" runtime API.
60+
void genCopyInAssign(fir::FirOpBuilder &builder, mlir::Location loc,
61+
mlir::Value tempBoxAddr, mlir::Value varBoxAddr);
62+
/// Generate runtime call to "CopyOutAssign" runtime API.
6863
void genCopyOutAssign(fir::FirOpBuilder &builder, mlir::Location loc,
69-
mlir::Value destBox, mlir::Value sourceBox,
70-
bool skipToInit);
64+
mlir::Value varBoxAddr, mlir::Value tempBoxAddr);
7165

7266
} // namespace fir::runtime
7367
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_ASSIGN_H

flang/include/flang/Optimizer/Dialect/FIRType.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ class BaseBoxType : public mlir::Type {
4141
public:
4242
using mlir::Type::Type;
4343

44+
/// Box attributes.
45+
enum class Attribute { None, Allocatable, Pointer };
46+
4447
/// Returns the element type of this box type.
4548
mlir::Type getEleTy() const;
4649

@@ -55,6 +58,9 @@ class BaseBoxType : public mlir::Type {
5558
BaseBoxType getBoxTypeWithNewShape(mlir::Type shapeMold) const;
5659
BaseBoxType getBoxTypeWithNewShape(int rank) const;
5760

61+
/// Return the same type, except for the attribute (fir.heap/fir.ptr).
62+
BaseBoxType getBoxTypeWithNewAttr(Attribute attr) const;
63+
5864
/// Methods for support type inquiry through isa, cast, and dyn_cast.
5965
static bool classof(mlir::Type type);
6066
};

flang/include/flang/Optimizer/HLFIR/HLFIROps.td

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,9 +1091,12 @@ def hlfir_CopyInOp : hlfir_Op<"copy_in", [MemoryEffects<[MemAlloc]>]> {
10911091
potentially absent variable storage. The second result indicates if a copy
10921092
was made.
10931093

1094+
A descriptor address must be provided for the temporary. This descriptor will
1095+
be set if a temporary copy was made.
1096+
10941097
This operation is meant to be used in combination with the hlfir.copy_out
1095-
operation that deletes the temporary if it was created and copies the data
1096-
back if needed.
1098+
operation that takes the address of the descriptor for the temporary, deletes
1099+
the temporary if it was created, and copies the data back if needed.
10971100
This operation allows passing non contiguous arrays to contiguous dummy
10981101
arguments, which is possible in Fortran procedure references.
10991102

@@ -1103,17 +1106,19 @@ def hlfir_CopyInOp : hlfir_Op<"copy_in", [MemoryEffects<[MemAlloc]>]> {
11031106
}];
11041107

11051108
let arguments = (ins Arg<fir_BaseBoxType, "", [MemRead]>:$var,
1106-
Optional<I1>:$var_is_present);
1109+
Arg<AnyReferenceLike, "", [MemWrite]>:$tempBox,
1110+
Optional<I1>:$var_is_present);
11071111

11081112
let results = (outs fir_BaseBoxType, I1);
11091113

11101114
let assemblyFormat = [{
1111-
$var (`handle_optional` $var_is_present^)?
1115+
$var `to` $tempBox (`handle_optional` $var_is_present^)?
11121116
attr-dict `:` functional-type(operands, results)
11131117
}];
11141118

11151119
let builders = [
1116-
OpBuilder<(ins "mlir::Value":$var, "mlir::Value":$var_is_present)>
1120+
OpBuilder<(ins "mlir::Value":$var, "mlir::Value":$temp_box,
1121+
"mlir::Value":$var_is_present)>
11171122
];
11181123

11191124
let extraClassDeclaration = [{
@@ -1138,9 +1143,10 @@ def hlfir_CopyOutOp : hlfir_Op<"copy_out", [MemoryEffects<[MemFree]>]> {
11381143
the temporary.
11391144
The copy back is done if $var is provided and $was_copied is true.
11401145
The deallocation of $temp is done if $was_copied is true.
1146+
$temp must be the descriptor address that was provided to hlfir.copy_in.
11411147
}];
11421148

1143-
let arguments = (ins Arg<fir_BaseBoxType, "", [MemRead]>:$temp,
1149+
let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$temp,
11441150
I1:$was_copied,
11451151
Arg<Optional<fir_BaseBoxType>, "", [MemWrite]>:$var);
11461152

flang/include/flang/Runtime/assign.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,16 @@ void RTDECL(Assign)(Descriptor &to, const Descriptor &from,
3636
// reallocation.
3737
void RTDECL(AssignTemporary)(Descriptor &to, const Descriptor &from,
3838
const char *sourceFile = nullptr, int sourceLine = 0);
39-
void RTDECL(CopyOutAssign)(Descriptor &to, const Descriptor &from,
40-
bool skipToInit, const char *sourceFile = nullptr, int sourceLine = 0);
39+
40+
// Establish "temp" descriptor as an allocatable descriptor with the same type,
41+
// rank, and length parameters as "var" and copy "var" to it using
42+
// AssignTemporary.
43+
void RTDECL(CopyInAssign)(Descriptor &temp, const Descriptor &var,
44+
const char *sourceFile = nullptr, int sourceLine = 0);
45+
// When "var" is provided, copy "temp" to it assuming "var" is already
46+
// initialized. Destroy and deallocate "temp" in all cases.
47+
void RTDECL(CopyOutAssign)(Descriptor *var, Descriptor &temp,
48+
const char *sourceFile = nullptr, int sourceLine = 0);
4149
// This variant is for assignments to explicit-length CHARACTER left-hand
4250
// sides that might need to handle truncation or blank-fill, and
4351
// must maintain the character length even if an allocatable array

flang/lib/Lower/ConvertCall.cpp

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -920,9 +920,11 @@ namespace {
920920
struct CallCleanUp {
921921
struct CopyIn {
922922
void genCleanUp(mlir::Location loc, fir::FirOpBuilder &builder) {
923-
builder.create<hlfir::CopyOutOp>(loc, copiedIn, wasCopied, copyBackVar);
923+
builder.create<hlfir::CopyOutOp>(loc, tempBox, wasCopied, copyBackVar);
924924
}
925-
mlir::Value copiedIn;
925+
// address of the descriptor holding the temp if a temp was created.
926+
mlir::Value tempBox;
927+
// Boolean indicating if a copy was made or not.
926928
mlir::Value wasCopied;
927929
// copyBackVar may be null if copy back is not needed.
928930
mlir::Value copyBackVar;
@@ -945,10 +947,10 @@ struct CallCleanUp {
945947
/// It holds the value to be passed in the call and any related
946948
/// clean-ups to be done after the call.
947949
struct PreparedDummyArgument {
948-
void pushCopyInCleanUp(mlir::Value copiedIn, mlir::Value wasCopied,
950+
void pushCopyInCleanUp(mlir::Value tempBox, mlir::Value wasCopied,
949951
mlir::Value copyBackVar) {
950952
cleanups.emplace_back(
951-
CallCleanUp{CallCleanUp::CopyIn{copiedIn, wasCopied, copyBackVar}});
953+
CallCleanUp{CallCleanUp::CopyIn{tempBox, wasCopied, copyBackVar}});
952954
}
953955
void pushExprAssociateCleanUp(mlir::Value tempVar, mlir::Value wasCopied) {
954956
cleanups.emplace_back(
@@ -987,7 +989,6 @@ struct ConditionallyPreparedDummy {
987989
for (const CallCleanUp &c : preparedDummy.cleanups) {
988990
if (const auto *copyInCleanUp =
989991
std::get_if<CallCleanUp::CopyIn>(&c.cleanUp)) {
990-
thenResultValues.push_back(copyInCleanUp->copiedIn);
991992
thenResultValues.push_back(copyInCleanUp->wasCopied);
992993
if (copyInCleanUp->copyBackVar)
993994
thenResultValues.push_back(copyInCleanUp->copyBackVar);
@@ -1042,8 +1043,10 @@ struct ConditionallyPreparedDummy {
10421043
mlir::Value copyBackVar;
10431044
if (copyInCleanUp->copyBackVar)
10441045
copyBackVar = ifOp.getResults().back();
1045-
preparedDummy.pushCopyInCleanUp(ifOp.getResults()[1],
1046-
ifOp.getResults()[2], copyBackVar);
1046+
// tempBox is an hlfir.copy_in argument created outside of the
1047+
// fir.if region. It needs not to be threaded as a fir.if result.
1048+
preparedDummy.pushCopyInCleanUp(copyInCleanUp->tempBox,
1049+
ifOp.getResults()[1], copyBackVar);
10471050
} else {
10481051
preparedDummy.pushExprAssociateCleanUp(ifOp.getResults()[1],
10491052
ifOp.getResults()[2]);
@@ -1204,10 +1207,29 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
12041207
dummyTypeWithActualRank, actual.getFortranElementType(),
12051208
actual.isPolymorphic());
12061209

1210+
PreparedDummyArgument preparedDummy;
1211+
1212+
// Helpers to generate hlfir.copy_in operation and register the related
1213+
// hlfir.copy_out creation.
1214+
auto genCopyIn = [&](hlfir::Entity var, bool doCopyOut) -> hlfir::Entity {
1215+
auto baseBoxTy = mlir::dyn_cast<fir::BaseBoxType>(var.getType());
1216+
assert(baseBoxTy && "expect non simply contiguous variables to be boxes");
1217+
// Create allocatable descriptor for the potential temporary.
1218+
mlir::Type tempBoxType = baseBoxTy.getBoxTypeWithNewAttr(
1219+
fir::BaseBoxType::Attribute::Allocatable);
1220+
mlir::Value tempBox = builder.createTemporary(loc, tempBoxType);
1221+
auto copyIn = builder.create<hlfir::CopyInOp>(
1222+
loc, var, tempBox, /*var_is_present=*/mlir::Value{});
1223+
// Register the copy-out after the call.
1224+
preparedDummy.pushCopyInCleanUp(copyIn.getTempBox(), copyIn.getWasCopied(),
1225+
doCopyOut ? copyIn.getVar()
1226+
: mlir::Value{});
1227+
return hlfir::Entity{copyIn.getCopiedIn()};
1228+
};
1229+
12071230
// Step 2: prepare the storage for the dummy arguments, ensuring that it
12081231
// matches the dummy requirements (e.g., must be contiguous or must be
12091232
// a temporary).
1210-
PreparedDummyArgument preparedDummy;
12111233
hlfir::Entity entity =
12121234
hlfir::derefPointersAndAllocatables(loc, builder, actual);
12131235
if (entity.isVariable()) {
@@ -1243,8 +1265,6 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
12431265
preparedDummy.pushExprAssociateCleanUp(associate);
12441266
} else if (mustDoCopyInOut) {
12451267
// Copy-in non contiguous variables.
1246-
assert(mlir::isa<fir::BaseBoxType>(entity.getType()) &&
1247-
"expect non simply contiguous variables to be boxes");
12481268
if (actualIsAssumedRank)
12491269
TODO(loc, "copy-in and copy-out of assumed-rank arguments");
12501270
// TODO: for non-finalizable monomorphic derived type actual
@@ -1254,13 +1274,7 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
12541274
// allocation for the temp in this case. We can communicate
12551275
// this to the codegen via some CopyInOp flag.
12561276
// This is a performance concern.
1257-
auto copyIn = builder.create<hlfir::CopyInOp>(
1258-
loc, entity, /*var_is_present=*/mlir::Value{});
1259-
entity = hlfir::Entity{copyIn.getCopiedIn()};
1260-
// Register the copy-out after the call.
1261-
preparedDummy.pushCopyInCleanUp(
1262-
copyIn.getCopiedIn(), copyIn.getWasCopied(),
1263-
arg.mayBeModifiedByCall() ? copyIn.getVar() : mlir::Value{});
1277+
entity = genCopyIn(entity, arg.mayBeModifiedByCall());
12641278
}
12651279
} else {
12661280
const Fortran::lower::SomeExpr *expr = arg.entity->UnwrapExpr();
@@ -1287,14 +1301,7 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
12871301
entity = hlfir::Entity{builder.create<fir::ReboxOp>(
12881302
loc, boxType, entity, /*shape=*/mlir::Value{},
12891303
/*slice=*/mlir::Value{})};
1290-
auto copyIn = builder.create<hlfir::CopyInOp>(
1291-
loc, entity, /*var_is_present=*/mlir::Value{});
1292-
entity = hlfir::Entity{copyIn.getCopiedIn()};
1293-
// Note that the copy-out is not required, but the copy-in
1294-
// temporary must be deallocated if created.
1295-
preparedDummy.pushCopyInCleanUp(copyIn.getCopiedIn(),
1296-
copyIn.getWasCopied(),
1297-
/*copyBackVar=*/mlir::Value{});
1304+
entity = genCopyIn(entity, /*doCopyOut=*/false);
12981305
}
12991306
}
13001307

flang/lib/Lower/ConvertExpr.cpp

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2290,11 +2290,21 @@ class ScalarExprLowering {
22902290
bool isActualArgBox =
22912291
fir::isa_box_type(fir::getBase(copyOutPair.var).getType());
22922292
auto doCopyOut = [&]() {
2293-
if (!copyOutPair.argMayBeModifiedByCall) {
2294-
return;
2295-
}
22962293
if (!isActualArgBox || inlineCopyInOutForBoxes) {
2297-
genArrayCopy(copyOutPair.var, copyOutPair.temp);
2294+
if (copyOutPair.argMayBeModifiedByCall)
2295+
genArrayCopy(copyOutPair.var, copyOutPair.temp);
2296+
if (mlir::isa<fir::RecordType>(
2297+
fir::getElementTypeOf(copyOutPair.temp))) {
2298+
// Destroy components of the temporary (if any).
2299+
// If there are no components requiring destruction, then the call
2300+
// is a no-op.
2301+
mlir::Value tempBox =
2302+
fir::getBase(builder.createBox(loc, copyOutPair.temp));
2303+
fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc,
2304+
tempBox);
2305+
}
2306+
// Deallocate the top-level entity of the temporary.
2307+
builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
22982308
return;
22992309
}
23002310
// Generate CopyOutAssign() call to copy data from the temporary
@@ -2305,51 +2315,39 @@ class ScalarExprLowering {
23052315
// Moreover, CopyOutAssign() guarantees that there will be no
23062316
// finalization for the LHS even if it is of a derived type
23072317
// with finalization.
2318+
2319+
// Create allocatable descriptor for the temp so that the runtime may
2320+
// deallocate it.
23082321
mlir::Value srcBox =
23092322
fir::getBase(builder.createBox(loc, copyOutPair.temp));
2310-
mlir::Value destBox =
2311-
fir::getBase(builder.createBox(loc, copyOutPair.var));
2312-
mlir::Value destBoxRef = builder.createTemporary(loc, destBox.getType());
2313-
builder.create<fir::StoreOp>(loc, destBox, destBoxRef);
2314-
fir::runtime::genCopyOutAssign(builder, loc, destBoxRef, srcBox,
2315-
/*skipToInit=*/true);
2316-
};
2317-
if (!copyOutPair.restrictCopyAndFreeAtRuntime) {
2318-
doCopyOut();
2319-
2320-
if (mlir::isa<fir::RecordType>(fir::getElementTypeOf(copyOutPair.temp))) {
2321-
// Destroy components of the temporary (if any).
2322-
// If there are no components requiring destruction, then the call
2323-
// is a no-op.
2324-
mlir::Value tempBox =
2325-
fir::getBase(builder.createBox(loc, copyOutPair.temp));
2326-
fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc,
2327-
tempBox);
2323+
mlir::Type allocBoxTy =
2324+
mlir::cast<fir::BaseBoxType>(srcBox.getType())
2325+
.getBoxTypeWithNewAttr(fir::BaseBoxType::Attribute::Allocatable);
2326+
srcBox = builder.create<fir::ReboxOp>(loc, allocBoxTy, srcBox,
2327+
/*shift=*/mlir::Value{},
2328+
/*slice=*/mlir::Value{});
2329+
mlir::Value srcBoxRef = builder.createTemporary(loc, srcBox.getType());
2330+
builder.create<fir::StoreOp>(loc, srcBox, srcBoxRef);
2331+
// Create descriptor pointer to variable descriptor if copy out is needed,
2332+
// and nullptr otherwise.
2333+
mlir::Value destBoxRef;
2334+
if (copyOutPair.argMayBeModifiedByCall) {
2335+
mlir::Value destBox =
2336+
fir::getBase(builder.createBox(loc, copyOutPair.var));
2337+
destBoxRef = builder.createTemporary(loc, destBox.getType());
2338+
builder.create<fir::StoreOp>(loc, destBox, destBoxRef);
2339+
} else {
2340+
destBoxRef = builder.create<fir::ZeroOp>(loc, srcBoxRef.getType());
23282341
}
2342+
fir::runtime::genCopyOutAssign(builder, loc, destBoxRef, srcBoxRef);
2343+
};
23292344

2330-
// Deallocate the top-level entity of the temporary.
2331-
builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
2332-
return;
2333-
}
2334-
2335-
builder.genIfThen(loc, *copyOutPair.restrictCopyAndFreeAtRuntime)
2336-
.genThen([&]() {
2337-
doCopyOut();
2338-
if (mlir::isa<fir::RecordType>(
2339-
fir::getElementTypeOf(copyOutPair.temp))) {
2340-
// Destroy components of the temporary (if any).
2341-
// If there are no components requiring destruction, then the call
2342-
// is a no-op.
2343-
mlir::Value tempBox =
2344-
fir::getBase(builder.createBox(loc, copyOutPair.temp));
2345-
fir::runtime::genDerivedTypeDestroyWithoutFinalization(builder, loc,
2346-
tempBox);
2347-
}
2348-
2349-
// Deallocate the top-level entity of the temporary.
2350-
builder.create<fir::FreeMemOp>(loc, fir::getBase(copyOutPair.temp));
2351-
})
2352-
.end();
2345+
if (!copyOutPair.restrictCopyAndFreeAtRuntime)
2346+
doCopyOut();
2347+
else
2348+
builder.genIfThen(loc, *copyOutPair.restrictCopyAndFreeAtRuntime)
2349+
.genThen([&]() { doCopyOut(); })
2350+
.end();
23532351
}
23542352

23552353
/// Lower a designator to a variable that may be absent at runtime into an

flang/lib/Optimizer/Builder/Runtime/Assign.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,29 @@ void fir::runtime::genAssignTemporary(fir::FirOpBuilder &builder,
6969
builder.create<fir::CallOp>(loc, func, args);
7070
}
7171

72+
void fir::runtime::genCopyInAssign(fir::FirOpBuilder &builder,
73+
mlir::Location loc, mlir::Value destBox,
74+
mlir::Value sourceBox) {
75+
auto func = fir::runtime::getRuntimeFunc<mkRTKey(CopyInAssign)>(loc, builder);
76+
auto fTy = func.getFunctionType();
77+
auto sourceFile = fir::factory::locationToFilename(builder, loc);
78+
auto sourceLine =
79+
fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
80+
auto args = fir::runtime::createArguments(builder, loc, fTy, destBox,
81+
sourceBox, sourceFile, sourceLine);
82+
builder.create<fir::CallOp>(loc, func, args);
83+
}
84+
7285
void fir::runtime::genCopyOutAssign(fir::FirOpBuilder &builder,
7386
mlir::Location loc, mlir::Value destBox,
74-
mlir::Value sourceBox, bool skipToInit) {
87+
mlir::Value sourceBox) {
7588
auto func =
7689
fir::runtime::getRuntimeFunc<mkRTKey(CopyOutAssign)>(loc, builder);
7790
auto fTy = func.getFunctionType();
7891
auto sourceFile = fir::factory::locationToFilename(builder, loc);
7992
auto sourceLine =
80-
fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));
81-
auto i1Ty = builder.getIntegerType(1);
82-
auto skipToInitVal = builder.createIntegerConstant(loc, i1Ty, skipToInit);
83-
auto args =
84-
fir::runtime::createArguments(builder, loc, fTy, destBox, sourceBox,
85-
skipToInitVal, sourceFile, sourceLine);
93+
fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
94+
auto args = fir::runtime::createArguments(builder, loc, fTy, destBox,
95+
sourceBox, sourceFile, sourceLine);
8696
builder.create<fir::CallOp>(loc, func, args);
8797
}

0 commit comments

Comments
 (0)