llvm
diff --git a/‎mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
Lines changed: 212 additions & 45 deletions b/‎mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
Lines changed: 212 additions & 45 deletions
@@ -385,8 +385,10 @@ mlir::linalg::getCombinerOpKind(Operation *combinerOp) {
           [&](auto op) { return CombiningKind::ADD; })
       .Case<arith::AndIOp>([&](auto op) { return CombiningKind::AND; })
       .Case<arith::MaxSIOp>([&](auto op) { return CombiningKind::MAXSI; })
+      .Case<arith::MaxUIOp>([&](auto op) { return CombiningKind::MAXUI; })
       .Case<arith::MaxFOp>([&](auto op) { return CombiningKind::MAXF; })
       .Case<arith::MinSIOp>([&](auto op) { return CombiningKind::MINSI; })
+      .Case<arith::MinUIOp>([&](auto op) { return CombiningKind::MINUI; })
       .Case<arith::MinFOp>([&](auto op) { return CombiningKind::MINF; })
       .Case<arith::MulIOp, arith::MulFOp>(
           [&](auto op) { return CombiningKind::MUL; })
@@ -1796,6 +1798,26 @@ static void bindShapeDims(ShapedType shapedType, IntTy &...vals) {
 }
 
 namespace {
+bool isCastOfBlockArgument(Operation *op) {
+  return isa<CastOpInterface>(op) && op->getNumOperands() == 1 &&
+         op->getOperand(0).isa<BlockArgument>();
+}
+
+bool isSupportedPoolKind(vector::CombiningKind kind) {
+  switch (kind) {
+  case vector::CombiningKind::ADD:
+  case vector::CombiningKind::MAXF:
+  case vector::CombiningKind::MAXSI:
+  case vector::CombiningKind::MAXUI:
+  case vector::CombiningKind::MINF:
+  case vector::CombiningKind::MINSI:
+  case vector::CombiningKind::MINUI:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Generate a vector implementation for either:
 /// ```
 ///   Op def: (     n,     w,     c,    kw,    f  )
@@ -1838,41 +1860,33 @@ struct Conv1DGenerator
     resShapedType = resShaped.getType().dyn_cast<ShapedType>();
     if (!lhsShapedType || !rhsShapedType || !resShapedType)
       return;
-    if (lhsShapedType.getRank() != 3 ||
-        (rhsShapedType.getRank() != 2 && rhsShapedType.getRank() != 3) ||
-        resShapedType.getRank() != 3)
+    // LHS has dimension NCW/NWC and RES has dimension NFW/NCW/NWF/NWC.
+    if (lhsShapedType.getRank() != 3 || resShapedType.getRank() != 3)
       return;
 
-    // Check for reduction `add` preceded by `mul`.
     Operation *reduceOp = matchLinalgReduction(linalgOp.getDpsInitOperand(0));
     if (!reduceOp)
       return;
-    std::optional<vector::CombiningKind> maybeKind;
-    maybeKind = getCombinerOpKind(reduceOp);
-    if (!maybeKind || *maybeKind != vector::CombiningKind::ADD)
+    redOp = reduceOp->getName().getIdentifier();
+
+    if (!setOperKind(reduceOp))
       return;
-    // Check for single `mul` predecessor. The `mul` operands must be block
-    // arguments or extension of block arguments.
-    Operation *mulOp = nullptr;
-    for (Value operand : reduceOp->getOperands()) {
-      if (operand.isa<BlockArgument>())
-        continue;
-      if (mulOp)
-        return;
-      mulOp = operand.getDefiningOp();
-      if (!mulOp || !isa<arith::MulIOp, arith::MulFOp>(mulOp))
-        return;
-    }
-    if (!mulOp)
+    auto maybeKind = getCombinerOpKind(reduceOp);
+    if (!(maybeKind && (*maybeKind == vector::CombiningKind::ADD ||
+                        (oper == Pool && isSupportedPoolKind(*maybeKind))))) {
       return;
-    for (Value operand : mulOp->getOperands()) {
-      if (Operation *def = operand.getDefiningOp()) {
-        if (!isa<CastOpInterface>(def))
-          return;
-        operand = def->getOperand(0);
-      }
-      if (!operand.isa<BlockArgument>())
+    }
+
+    auto rhsRank = rhsShapedType.getRank();
+    switch (oper) {
+    case Conv:
+      if (rhsRank != 2 && rhsRank!= 3)
+        return;
+      break;
+    case Pool:
+      if (rhsRank != 1)
         return;
+      break;
     }
     // The op is now known to be valid.
     valid = true;
@@ -1889,38 +1903,70 @@ struct Conv1DGenerator
   /// > 1.
   FailureOr<Operation *> conv(Conv1DOpOrder conv1DOpOrder) {
     if (!valid)
-      return rewriter.notifyMatchFailure(op, "unvectorizable 1-D conv");
+      return rewriter.notifyMatchFailure(op, "unvectorizable 1-D conv/pool");
 
     int64_t nSize, wSize, cSize, kwSize, fSize;
     SmallVector<int64_t, 3> lhsShape, rhsShape, resShape;
     switch (conv1DOpOrder) {
     case Conv1DOpOrder::Nwc:
-      // kernel{kw, c, f}
-      bindShapeDims(rhsShapedType, kwSize, cSize, fSize);
       // out{n, w, f}
-      bindShapeDims(resShapedType, nSize, wSize);
+      bindShapeDims(resShapedType, nSize, wSize, fSize);
+      switch (oper) {
+      case Conv:
+        // kernel{kw, c, f}
+        bindShapeDims(rhsShapedType, kwSize, cSize);
+        break;
+      case Pool:
+        // kernel{kw}
+        bindShapeDims(rhsShapedType, kwSize);
+        cSize = fSize;
+        break;
+      }
       lhsShape = {nSize,
                   // iw = ow * sw + kw *  dw - 1
                   //   (i.e. 16 convolved with 3 (@stride 1 dilation 1) -> 14)
                   // Perform the proper inclusive -> exclusive -> inclusive.
                   ((wSize - 1) * strideW + 1) + ((kwSize - 1) * dilationW + 1) -
                       1,
                   cSize};
-      rhsShape = {kwSize, cSize, fSize};
+      switch (oper) {
+      case Conv:
+        rhsShape = {kwSize, cSize, fSize};
+        break;
+      case Pool:
+        rhsShape = {kwSize};
+        break;
+      }
       resShape = {nSize, wSize, fSize};
       break;
     case Conv1DOpOrder::Ncw:
-      // kernel{f, c, kw}
-      bindShapeDims(rhsShapedType, fSize, cSize, kwSize);
       // out{n, f, w}
       bindShapeDims(resShapedType, nSize, fSize, wSize);
+      switch (oper) {
+      case Conv:
+        // kernel{f, c, kw}
+        bindShapeDims(rhsShapedType, fSize, cSize, kwSize);
+        break;
+      case Pool:
+        // kernel{kw}
+        bindShapeDims(rhsShapedType, kwSize);
+        cSize = fSize;
+        break;
+      }
       lhsShape = {nSize, cSize,
                   // iw = ow * sw + kw *  dw - 1
                   //   (i.e. 16 convolved with 3 (@stride 1 dilation 1) -> 14)
                   // Perform the proper inclusive -> exclusive -> inclusive.
                   ((wSize - 1) * strideW + 1) + ((kwSize - 1) * dilationW + 1) -
                       1};
-      rhsShape = {fSize, cSize, kwSize};
+      switch (oper) {
+      case Conv:
+        rhsShape = {fSize, cSize, kwSize};
+        break;
+      case Pool:
+        rhsShape = {kwSize};
+        break;
+      }
       resShape = {nSize, fSize, wSize};
       break;
     }
@@ -1944,8 +1990,11 @@ struct Conv1DGenerator
     Value lhs = rewriter.create<vector::TransferReadOp>(
         loc, lhsType, lhsShaped, ValueRange{zero, zero, zero});
     // Read rhs slice of size {kw, c, f} @ [0, 0, 0].
-    Value rhs = rewriter.create<vector::TransferReadOp>(
-        loc, rhsType, rhsShaped, ValueRange{zero, zero, zero});
+    // This is needed only for Conv.
+    Value rhs = nullptr;
+    if (oper == Conv)
+      rhs = rewriter.create<vector::TransferReadOp>(
+          loc, rhsType, rhsShaped, ValueRange{zero, zero, zero});
     // Read res slice of size {n, w, f} @ [0, 0, 0].
     Value res = rewriter.create<vector::TransferReadOp>(
         loc, resType, resShaped, ValueRange{zero, zero, zero});
@@ -1964,7 +2013,10 @@ struct Conv1DGenerator
       lhs = rewriter.create<vector::TransposeOp>(loc, lhs, permLhs);
       // fcw -> wcf
       static constexpr std::array<int64_t, 3> permRhs = {2, 1, 0};
-      rhs = rewriter.create<vector::TransposeOp>(loc, rhs, permRhs);
+
+      // This is needed only for Conv.
+      if (oper == Conv)
+        rhs = rewriter.create<vector::TransposeOp>(loc, rhs, permRhs);
       // nfw -> nwf
       static constexpr std::array<int64_t, 3> permRes = {0, 2, 1};
       res = rewriter.create<vector::TransposeOp>(loc, res, permRes);
@@ -1988,10 +2040,12 @@ struct Conv1DGenerator
       }
     }
     // Extract rhs slice of size {c, f} @ [kw].
-    for (int64_t kw = 0; kw < kwSize; ++kw) {
-      rhsVals.push_back(rewriter.create<vector::ExtractOp>(
-          loc, rhs, /*offsets=*/ArrayRef<int64_t>{kw}));
-    }
+    // Do not do for pooling.
+    if (oper == Conv)
+      for (int64_t kw = 0; kw < kwSize; ++kw) {
+        rhsVals.push_back(rewriter.create<vector::ExtractOp>(
+            loc, rhs, /*offsets=*/ArrayRef<int64_t>{kw}));
+      }
     // Extract res slice: {n, wSizeStep, f} @ [0, w, 0].
     for (int64_t w = 0; w < wSize; w += wSizeStep) {
       resVals.push_back(rewriter.create<vector::ExtractStridedSliceOp>(
@@ -2005,11 +2059,21 @@ struct Conv1DGenerator
       return kw * (wSize / wSizeStep) + w;
     };
 
-    // Compute contraction: O{n, w, f} += I{n, sw * w + dw * kw, c} * F{c, f}
+    // Compute contraction: O{n, w, f} += I{n, sw * w + dw * kw, c} * F{c, f} or
+    // perform simple arith operation for pooling
     for (int64_t kw = 0; kw < kwSize; ++kw) {
       for (int64_t w = 0; w < wSize; w += wSizeStep) {
-        resVals[w] = conv1dSliceAsContraction(
-            rewriter, loc, lhsVals[linearIndex(kw, w)], rhsVals[kw], resVals[w]);
+        switch (oper) {
+        case Conv:
+          resVals[w] = conv1dSliceAsContraction(rewriter, loc,
+                                                lhsVals[linearIndex(kw, w)],
+                                                rhsVals[kw], resVals[w]);
+          break;
+        case Pool:
+          resVals[w] = pool1dSlice(rewriter, loc, lhsVals[linearIndex(kw, w)],
+                                   resVals[w]);
+          break;
+        }
       }
     }
 
@@ -2060,6 +2124,16 @@ struct Conv1DGenerator
         /*iteratorTypes=*/ArrayRef<vector::IteratorType>{par, par, par, red});
   }
 
+  // Create a reduction: lhs{n, w, c} -> res{n, w, c}
+  Value pool1dSlice(RewriterBase &rewriter, Location loc, Value lhs,
+                    Value res) {
+    if (isPoolExt)
+      lhs = rewriter.create(loc, poolExtOp, lhs, res.getType())->getResult(0);
+    return rewriter
+        .create(loc, redOp, ArrayRef<Value>{lhs, res}, res.getType())
+        ->getResult(0);
+  }
+
   /// Generate a vector implementation for:
   /// ```
   ///   Op def: (     n,     w,     c,    kw)
@@ -2236,6 +2310,7 @@ struct Conv1DGenerator
                 /*rhsIndex*/ {kw, c, f},
                 /*resIndex*/ {n, w, f}}))
       return conv(Conv1DOpOrder::Nwc);
+
     return rewriter.notifyMatchFailure(op, "not a conv::Nwc layout");
   }
 
@@ -2256,6 +2331,41 @@ struct Conv1DGenerator
     return rewriter.notifyMatchFailure(op, "not a conv::Ncw layout");
   }
 
+  /// Entry point that transposes into the common form:
+  ///   {{n, strideW * w + dilationW * kw, c}, {kw}, {n, w, c}} for pooling
+  FailureOr<Operation *> generateNwcPooling() {
+    AffineExpr n, w, c, kw;
+    bindDims(ctx, n, w, c, kw);
+    if (!iters({Par(), Par(), Par(), Red()}))
+      return rewriter.notifyMatchFailure(op,
+                                         "failed to match pooling 3-par 1-red");
+
+    // No transposition needed.
+    if (layout({/*lhsIndex*/ {n, strideW * w + dilationW * kw, c},
+                /*rhsIndex*/ {kw},
+                /*resIndex*/ {n, w, c}}))
+      return conv(Conv1DOpOrder::Nwc);
+
+    return rewriter.notifyMatchFailure(op, "not a pooling::Nwc layout");
+  }
+
+  /// Entry point that transposes into the common form:
+  ///   {{n, c, strideW * w + dilationW * kw}, {kw}, {n, c, w}} for pooling
+  FailureOr<Operation *> generateNcwPooling() {
+    AffineExpr n, w, c, kw;
+    bindDims(ctx, n, c, w, kw);
+    if (!iters({Par(), Par(), Par(), Red()}))
+      return rewriter.notifyMatchFailure(op,
+                                         "failed to match pooling 3-par 1-red");
+
+    if (layout({/*lhsIndex*/ {n, c, strideW * w + dilationW * kw},
+                /*rhsIndex*/ {kw},
+                /*resIndex*/ {n, c, w}}))
+      return conv(Conv1DOpOrder::Ncw);
+
+    return rewriter.notifyMatchFailure(op, "not a pooling::Ncw layout");
+  }
+
   /// Entry point that transposes into the common form:
   ///   {{n, strideW * w + dilationW * kw, c}, {kw, c}, {n, w, c}}
   FailureOr<Operation *> generateDilatedConv() {
@@ -2275,10 +2385,61 @@ struct Conv1DGenerator
   }
 
 private:
+  enum OperKind { Conv, Pool };
   bool valid = false;
+  OperKind oper = Conv;
+  StringAttr redOp;
+  StringAttr poolExtOp;
+  bool isPoolExt = false;
   int strideW, dilationW;
   Value lhsShaped, rhsShaped, resShaped;
   ShapedType lhsShapedType, rhsShapedType, resShapedType;
+
+  // Sets oper, poolExtOp and isPoolExt for valid conv/pooling ops.
+  // Returns true iff it is a valid conv/pooling op.
+  // If (region has 2 ops (reduction + yield) or 3 ops (extension + reduction
+  // + yield) and rhs is not used) then it is the body of a pooling
+  // If conv, check for single `mul` predecessor. The `mul` operands must be
+  // block arguments or extension of block arguments.
+  // Otherwise, check for one or zero `ext` predecessor. The `ext` operands
+  // must be block arguments or extension of block arguments.
+  bool setOperKind(Operation *reduceOp) {
+    int numBlockArguments =
+        llvm::count_if(reduceOp->getOperands(),
+                       [](Value v) { return v.isa<BlockArgument>(); });
+    switch (numBlockArguments) {
+    case 1: {
+      // Will be convolution if feeder is a MulOp.
+      // Otherwise, if it can be pooling.
+      auto feedValIt = llvm::find_if(reduceOp->getOperands(), [](Value v) {
+        return !v.isa<BlockArgument>();
+      });
+      Operation *feedOp = (*feedValIt).getDefiningOp();
+      if (isCastOfBlockArgument(feedOp)) {
+        oper = Pool;
+        isPoolExt = true;
+        poolExtOp = feedOp->getName().getIdentifier();
+      } else if (!(isa<arith::MulIOp, arith::MulFOp>(feedOp) &&
+                   llvm::all_of(feedOp->getOperands(), [](Value v) {
+                     if (v.isa<BlockArgument>())
+                       return true;
+                     if (Operation *op = v.getDefiningOp())
+                       return isCastOfBlockArgument(op);
+                     return false;
+                   }))) {
+        return false;
+      }
+      return true;
+    }
+    case 2:
+      // Must be pooling
+      oper = Pool;
+      isPoolExt = false;
+      return true;
+    default:
+      return false;
+    }
+  }
 };
 } // namespace
 
@@ -2299,6 +2460,12 @@ static FailureOr<Operation *> vectorizeConvolution(RewriterBase &rewriter,
   if (succeeded(res))
     return res;
   res = e.generateNcwConv();
+  if (succeeded(res))
+    return res;
+  res = e.generateNwcPooling();
+  if (succeeded(res))
+    return res;
+  res = e.generateNcwPooling();
   if (succeeded(res))
     return res;
   return e.generateDilatedConv();