From dee01e397e94e8954c181fde02f5b698a23b4514 Mon Sep 17 00:00:00 2001
From: "sujuan.chen" <sujuan.chen@mediatek.com>
Date: Wed, 26 Apr 2023 17:47:42 +0800
Subject: [PATCH 2006/2010] wifi: mt76: mt7996: wed: add wed3.0 rx support

Signed-off-by: sujuan.chen <sujuan.chen@mediatek.com>
---
 dma.c           | 114 +++++++++++++++++++++++++++++++++++++-----------
 mt76.h          |  12 +++--
 mt7996/dma.c    |  78 ++++++++++++++++++++-------------
 mt7996/init.c   |  25 +++++++++--
 mt7996/mac.c    |  38 ++++++++++++++--
 mt7996/mmio.c   |  35 ++++++++++++---
 mt7996/mt7996.h |   2 +
 mt7996/regs.h   |   2 +
 8 files changed, 234 insertions(+), 72 deletions(-)

diff --git a/dma.c b/dma.c
index df75a03..7d656a5 100644
--- a/dma.c
+++ b/dma.c
@@ -205,7 +205,7 @@ mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
 }
 
 static void
-mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q)
+mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q, bool skip)
 {
 	int i;
 
@@ -219,19 +219,23 @@ mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q)
 	for (i = 0; i < q->ndesc; i++)
 		q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
 
+	if (skip)
+		goto sync;
+
 done:
 	Q_WRITE(dev, q, cpu_idx, 0);
 	Q_WRITE(dev, q, dma_idx, 0);
+sync:
 	mt76_dma_sync_idx(dev, q);
 }
 
 static int
 mt76_dma_add_rx_buf(struct mt76_dev *dev, struct mt76_queue *q,
-		    struct mt76_queue_buf *buf, void *data)
+		    struct mt76_queue_buf *buf, void *data,
+		    struct mt76_rxwi_cache *rxwi)
 {
 	struct mt76_desc *desc;
 	struct mt76_queue_entry *entry = &q->entry[q->head];
-	struct mt76_rxwi_cache *rxwi = NULL;
 	u32 buf1 = 0, ctrl, info = 0;
 	int idx = q->head;
 	int rx_token;
@@ -246,9 +250,11 @@ mt76_dma_add_rx_buf(struct mt76_dev *dev, struct mt76_queue *q,
 	ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
 
 	if (mt76_queue_is_wed_rx(q) || mt76_queue_is_rro_data(q)) {
-		rxwi = mt76_get_rxwi(dev);
-		if (!rxwi)
-			return -ENOMEM;
+		if (!rxwi) {
+			rxwi = mt76_get_rxwi(dev);
+			if (!rxwi)
+				return -ENOMEM;
+		}
 
 		rx_token = mt76_rx_token_consume(dev, data, rxwi, buf->addr);
 		if (rx_token < 0) {
@@ -425,7 +431,7 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, struct mt76_queue *q, bool flush)
 
 static void *
 mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
-		 int *len, u32 *info, bool *more, bool *drop)
+		 int *len, u32 *info, bool *more, bool *drop, bool flush)
 {
 	struct mt76_queue_entry *e = &q->entry[idx];
 	struct mt76_desc *desc = &q->desc[idx];
@@ -462,11 +468,43 @@ mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
 				 SKB_WITH_OVERHEAD(q->buf_size),
 				 DMA_FROM_DEVICE);
 
-		buf = r->ptr;
-		r->dma_addr = 0;
-		r->ptr = NULL;
-
-		mt76_put_rxwi(dev, r);
+		if (flush) {
+			buf = r->ptr;
+			r->dma_addr = 0;
+			r->ptr = NULL;
+
+			mt76_put_rxwi(dev, r);
+		} else {
+			struct mt76_queue_buf qbuf;
+
+			buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
+			if (!buf)
+				return NULL;
+
+			memcpy(buf, r->ptr, SKB_WITH_OVERHEAD(q->buf_size));
+
+			r->dma_addr = dma_map_single(dev->dma_dev, r->ptr,
+						     SKB_WITH_OVERHEAD(q->buf_size),
+						     DMA_FROM_DEVICE);
+			if (unlikely(dma_mapping_error(dev->dma_dev, r->dma_addr))) {
+				skb_free_frag(r->ptr);
+				mt76_put_rxwi(dev, r);
+				return NULL;
+			}
+
+			qbuf.addr = r->dma_addr;
+			qbuf.len = SKB_WITH_OVERHEAD(q->buf_size);
+			qbuf.skip_unmap = false;
+
+			if (mt76_dma_add_rx_buf(dev, q, &qbuf, r->ptr, r) < 0) {
+				dma_unmap_single(dev->dma_dev, r->dma_addr,
+						 SKB_WITH_OVERHEAD(q->buf_size),
+						 DMA_FROM_DEVICE);
+				skb_free_frag(r->ptr);
+				mt76_put_rxwi(dev, r);
+				return NULL;
+			}
+		}
 
 		if (drop)
 			*drop |= !!(buf1 & MT_DMA_CTL_WO_DROP);
@@ -517,7 +555,7 @@ done:
 	q->tail = (q->tail + 1) % q->ndesc;
 	q->queued--;
 
-	return mt76_dma_get_buf(dev, q, idx, len, info, more, drop);
+	return mt76_dma_get_buf(dev, q, idx, len, info, more, drop, flush);
 }
 
 static int
@@ -760,7 +798,7 @@ done:
 		qbuf.addr = addr + offset;
 		qbuf.len = len - offset;
 		qbuf.skip_unmap = false;
-		if (mt76_dma_add_rx_buf(dev, q, &qbuf, buf) < 0) {
+		if (mt76_dma_add_rx_buf(dev, q, &qbuf, buf, NULL) < 0) {
 			dma_unmap_single(dev->dma_dev, addr, len,
 					 DMA_FROM_DEVICE);
 			skb_free_frag(buf);
@@ -769,7 +807,7 @@ done:
 		frames++;
 	}
 
-	if (frames)
+	if (frames || mt76_queue_is_wed_rx(q))
 		mt76_dma_kick_queue(dev, q);
 
 	spin_unlock_bh(&q->lock);
@@ -782,7 +820,7 @@ int mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q, bool reset)
 #ifdef CONFIG_NET_MEDIATEK_SOC_WED
 	struct mtk_wed_device *wed = &dev->mmio.wed;
 	int ret, type, ring;
-	u8 flags;
+	u16 flags;
 
 	if (!q || !q->ndesc)
 		return -EINVAL;
@@ -809,7 +847,7 @@ int mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q, bool reset)
 	case MT76_WED_Q_TXFREE:
 		/* WED txfree queue needs ring to be initialized before setup */
 		q->flags = 0;
-		mt76_dma_queue_reset(dev, q);
+		mt76_dma_queue_reset(dev, q, false);
 		mt76_dma_rx_fill(dev, q);
 		q->flags = flags;
 
@@ -818,9 +856,31 @@ int mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q, bool reset)
 			q->wed_regs = wed->txfree_ring.reg_base;
 		break;
 	case MT76_WED_Q_RX:
-		ret = mtk_wed_device_rx_ring_setup(wed, ring, q->regs, reset);
-		if (!ret)
-			q->wed_regs = wed->rx_ring[ring].reg_base;
+		if (q->flags & MT_QFLAG_RRO) {
+			q->flags &= ~0x1f;
+
+			ring = FIELD_GET(MT_QFLAG_RRO_RING, q->flags);
+			type = FIELD_GET(MT_QFLAG_RRO_TYPE, q->flags);
+			if (type == MT76_RRO_Q_DATA) {
+				mt76_dma_queue_reset(dev, q, true);
+				ret = mtk_wed_device_rro_rx_ring_setup(wed, ring, q->regs);
+			} else if (type == MT76_RRO_Q_MSDU_PG) {
+				mt76_dma_queue_reset(dev, q, true);
+				ret = mtk_wed_device_msdu_pg_rx_ring_setup(wed, ring, q->regs);
+			} else if (type == MT76_RRO_Q_IND) {
+				mt76_dma_queue_reset(dev, q, false);
+				mt76_dma_rx_fill(dev, q);
+				ret = mtk_wed_device_ind_rx_ring_setup(wed, q->regs);
+			}
+			if (type != MT76_RRO_Q_IND) {
+				q->head = q->ndesc - 1;
+				q->queued = q->ndesc - 1;
+			}
+		} else {
+			ret = mtk_wed_device_rx_ring_setup(wed, ring, q->regs, 0);
+			if (!ret)
+				q->wed_regs = wed->rx_ring[ring].reg_base;
+		}
 		break;
 	default:
 		ret = -EINVAL;
@@ -877,8 +937,11 @@ mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
 	if (ret)
 		return ret;
 
-	if (!mt76_queue_is_txfree(q))
-		mt76_dma_queue_reset(dev, q);
+	if (!mtk_wed_device_active(&dev->mmio.wed) ||
+	    (!mt76_queue_is_wed_txfree(q) &&
+	     !(mtk_wed_get_rx_capa(&dev->mmio.wed) &&
+	       q->flags & MT_QFLAG_RRO)))
+		mt76_dma_queue_reset(dev, q, false);
 
 	return 0;
 }
@@ -913,8 +976,7 @@ mt76_dma_rx_cleanup(struct mt76_dev *dev, struct mt76_queue *q)
 
 	spin_unlock_bh(&q->lock);
 
-	if (((q->flags & MT_QFLAG_WED) &&
-	     FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) ||
+	if (mt76_queue_is_wed_rx(q) ||
 	    (q->flags & MT_QFLAG_RRO))
 		return;
 
@@ -1183,7 +1245,8 @@ void mt76_dma_cleanup(struct mt76_dev *dev)
 	mt76_for_each_q_rx(dev, i) {
 		struct mt76_queue *q = &dev->q_rx[i];
 
-		if (mt76_queue_is_wed_rx(q))
+		if (mtk_wed_device_active(&dev->mmio.wed) && q->ndesc &&
+		    (mt76_queue_is_rro_msdu_pg(q) || mt76_queue_is_rro_data(q)))
 			continue;
 
 		netif_napi_del(&dev->napi[i]);
@@ -1195,6 +1258,7 @@ void mt76_dma_cleanup(struct mt76_dev *dev)
 
 	if (mtk_wed_device_active(&dev->mmio.wed_ext))
 		mtk_wed_device_detach(&dev->mmio.wed_ext);
+
 	mt76_free_pending_txwi(dev);
 	mt76_free_pending_rxwi(dev);
 }
diff --git a/mt76.h b/mt76.h
index 3b089be..e584469 100644
--- a/mt76.h
+++ b/mt76.h
@@ -329,7 +329,7 @@ struct mt76_queue_ops {
 
 	void (*kick)(struct mt76_dev *dev, struct mt76_queue *q);
 
-	void (*reset_q)(struct mt76_dev *dev, struct mt76_queue *q);
+	void (*reset_q)(struct mt76_dev *dev, struct mt76_queue *q, bool skip);
 };
 
 enum mt76_phy_type {
@@ -1622,7 +1622,7 @@ static inline bool mt76_queue_is_rro_ind(struct mt76_queue *q)
 	       FIELD_GET(MT_QFLAG_RRO_TYPE, q->flags) == MT76_RRO_Q_IND;
 }
 
-static inline bool mt76_queue_is_txfree(struct mt76_queue *q)
+static inline bool mt76_queue_is_wed_txfree(struct mt76_queue *q)
 {
 	return (q->flags & MT_QFLAG_WED) &&
 	       FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_TXFREE;
@@ -1646,10 +1646,14 @@ static inline void mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked)
 static inline int
 mt76_token_get(struct mt76_dev *dev, struct mt76_txwi_cache **ptxwi)
 {
-	int token;
+	int token, start = 0;
+
+	if (mtk_wed_device_active(&dev->mmio.wed))
+		start = dev->mmio.wed.wlan.nbuf;
 
 	spin_lock_bh(&dev->token_lock);
-	token = idr_alloc(&dev->token, *ptxwi, 0, dev->token_size, GFP_ATOMIC);
+	token = idr_alloc(&dev->token, *ptxwi, start, start + dev->token_size,
+			  GFP_ATOMIC);
 	spin_unlock_bh(&dev->token_lock);
 
 	return token;
diff --git a/mt7996/dma.c b/mt7996/dma.c
index 34f79a6..bafc7f1 100644
--- a/mt7996/dma.c
+++ b/mt7996/dma.c
@@ -188,6 +188,7 @@ static void mt7996_dma_disable(struct mt7996_dev *dev, bool reset)
 
 void __mt7996_dma_enable(struct mt7996_dev *dev, bool reset, bool wed_reset)
 {
+	struct mtk_wed_device *wed = &dev->mt76.mmio.wed;
 	u32 hif1_ofs = 0;
 	u32 irq_mask;
 
@@ -196,11 +197,16 @@ void __mt7996_dma_enable(struct mt7996_dev *dev, bool reset, bool wed_reset)
 
 	/* enable wpdma tx/rx */
 	if (!reset) {
-		mt76_set(dev, MT_WFDMA0_GLO_CFG,
-			 MT_WFDMA0_GLO_CFG_TX_DMA_EN |
-			 MT_WFDMA0_GLO_CFG_RX_DMA_EN |
-			 MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
-			 MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+		if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+			mt76_set(dev, MT_WFDMA0_GLO_CFG,
+				 MT_WFDMA0_GLO_CFG_TX_DMA_EN |
+				 MT_WFDMA0_GLO_CFG_OMIT_TX_INFO);
+		else
+			mt76_set(dev, MT_WFDMA0_GLO_CFG,
+				MT_WFDMA0_GLO_CFG_TX_DMA_EN |
+				MT_WFDMA0_GLO_CFG_RX_DMA_EN |
+				MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
+				MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
 
 		if (dev->hif2)
 			mt76_set(dev, MT_WFDMA0_GLO_CFG + hif1_ofs,
@@ -212,8 +218,8 @@ void __mt7996_dma_enable(struct mt7996_dev *dev, bool reset, bool wed_reset)
 
 	/* enable interrupts for TX/RX rings */
 	irq_mask = MT_INT_MCU_CMD |
-			   MT_INT_RX_DONE_MCU |
-			   MT_INT_TX_DONE_MCU;
+		   MT_INT_RX_DONE_MCU |
+		   MT_INT_TX_DONE_MCU;
 
 	if (mt7996_band_valid(dev, MT_BAND0))
 		irq_mask |= MT_INT_BAND0_RX_DONE;
@@ -224,14 +230,14 @@ void __mt7996_dma_enable(struct mt7996_dev *dev, bool reset, bool wed_reset)
 	if (mt7996_band_valid(dev, MT_BAND2))
 		irq_mask |= MT_INT_BAND2_RX_DONE;
 
-	if (mtk_wed_device_active(&dev->mt76.mmio.wed) && wed_reset) {
+	if (mtk_wed_device_active(wed) && wed_reset) {
 		u32 wed_irq_mask = irq_mask;
 
 		wed_irq_mask |= MT_INT_TX_DONE_BAND0 | MT_INT_TX_DONE_BAND1;
 
 		mt76_wr(dev, MT_INT_MASK_CSR, wed_irq_mask);
 
-		mtk_wed_device_start(&dev->mt76.mmio.wed, wed_irq_mask);
+		mtk_wed_device_start(wed, wed_irq_mask);
 	}
 
 	irq_mask = reset ? MT_INT_MCU_CMD : irq_mask;
@@ -308,7 +314,8 @@ static int mt7996_dma_enable(struct mt7996_dev *dev, bool reset)
 		/* fix hardware limitation, pcie1's rx ring3 is not available
 		 * so, redirect pcie0 rx ring3 interrupt to pcie1
 		 */
-		if (mtk_wed_device_active(&dev->mt76.mmio.wed) && dev->rro_support)
+		if (mtk_wed_device_active(&dev->mt76.mmio.wed) &&
+		    dev->rro_support)
 			mt76_set(dev, MT_WFDMA0_RX_INT_PCIE_SEL + hif1_ofs,
 				 MT_WFDMA0_RX_INT_SEL_RING6);
 		else
@@ -325,12 +332,15 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev)
 {
 	int ret;
 	u32 hif1_ofs = 0;
+	struct mtk_wed_device *wed = &dev->mt76.mmio.wed;
 
 	if (dev->hif2)
 		hif1_ofs = MT_WFDMA0_PCIE1(0) - MT_WFDMA0(0);
 
 	/* ind cmd */
 	dev->mt76.q_rx[MT_RXQ_RRO_IND].flags = MT_RRO_Q_IND;
+	if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+		dev->mt76.q_rx[MT_RXQ_RRO_IND].flags |= MT_WED_Q_RX(0);
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_RRO_IND],
 			       MT_RXQ_ID(MT_RXQ_RRO_IND),
 			       MT7996_RX_RING_SIZE,
@@ -341,6 +351,8 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev)
 	/* rx msdu page queue for band0 */
 	dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND0].flags = MT_RRO_Q_MSDU_PG(0);
 	dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND0].flags |= MT_QFLAG_MAGIC;
+	if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+		dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND0].flags |= MT_WED_Q_RX(0);
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND0],
 			       MT_RXQ_ID(MT_RXQ_MSDU_PAGE_BAND0),
 			       MT7996_RX_RING_SIZE,
@@ -353,6 +365,8 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev)
 		/* rx msdu page queue for band1 */
 		dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND1].flags = MT_RRO_Q_MSDU_PG(1);
 		dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND1].flags |= MT_QFLAG_MAGIC;
+		if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+			dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND1].flags |= MT_WED_Q_RX(1);
 		ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND1],
 				       MT_RXQ_ID(MT_RXQ_MSDU_PAGE_BAND1),
 				       MT7996_RX_RING_SIZE,
@@ -366,6 +380,8 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev)
 		/* rx msdu page queue for band2 */
 		dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND2].flags = MT_RRO_Q_MSDU_PG(2);
 		dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND2].flags |= MT_QFLAG_MAGIC;
+		if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+			dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND2].flags |= MT_WED_Q_RX(0);
 		ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MSDU_PAGE_BAND2],
 				       MT_RXQ_ID(MT_RXQ_MSDU_PAGE_BAND2),
 				       MT7996_RX_RING_SIZE,
@@ -375,31 +391,30 @@ int mt7996_dma_rro_init(struct mt7996_dev *dev)
 			return ret;
 	}
 
-	mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND0, mt76_dma_rx_poll);
-	mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND1, mt76_dma_rx_poll);
-	mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND2, mt76_dma_rx_poll);
-	mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND0, mt76_dma_rx_poll);
-	mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND1, mt76_dma_rx_poll);
-	mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND2, mt76_dma_rx_poll);
 	mt76_queue_rx_init(dev, MT_RXQ_TXFREE_BAND0, mt76_dma_rx_poll);
 	mt76_queue_rx_init(dev, MT_RXQ_TXFREE_BAND2, mt76_dma_rx_poll);
 	mt76_queue_rx_init(dev, MT_RXQ_RRO_IND, mt76_dma_rx_poll);
 
-
-	if (mtk_wed_device_active(&dev->mt76.mmio.wed)) {
+	if (mtk_wed_device_active(wed)) {
 		u32 wed_irq_mask = dev->mt76.mmio.irqmask |
 				   MT_INT_RRO_RX_DONE |
 				   MT_INT_TX_DONE_BAND2;
 
-		if (mtk_wed_get_rx_capa(&dev->mt76.mmio.wed))
+		if (mtk_wed_get_rx_capa(wed))
 			wed_irq_mask &= ~MT_INT_RX_DONE_RRO_IND;
 
 		mt76_wr(dev, MT_INT_MASK_CSR, wed_irq_mask);
 
-		mtk_wed_device_start_hwrro(&dev->mt76.mmio.wed, wed_irq_mask);
-
+		mtk_wed_device_start_hwrro(wed, wed_irq_mask);
 		mt7996_irq_enable(dev, wed_irq_mask);
 	} else {
+		mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND0, mt76_dma_rx_poll);
+		mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND1, mt76_dma_rx_poll);
+		mt76_queue_rx_init(dev, MT_RXQ_RRO_BAND2, mt76_dma_rx_poll);
+		mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND0, mt76_dma_rx_poll);
+		mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND1, mt76_dma_rx_poll);
+		mt76_queue_rx_init(dev, MT_RXQ_MSDU_PAGE_BAND2, mt76_dma_rx_poll);
+
 		mt7996_irq_enable(dev, MT_INT_RRO_RX_DONE);
 	}
 
@@ -475,6 +490,9 @@ int mt7996_dma_init(struct mt7996_dev *dev)
 		return ret;
 
 	/* rx data queue for band0 and band1 */
+	if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+		dev->mt76.q_rx[MT_RXQ_MAIN].flags = MT_WED_Q_RX(0);
+
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MAIN],
 			       MT_RXQ_ID(MT_RXQ_MAIN),
 			       MT7996_RX_RING_SIZE,
@@ -498,9 +516,6 @@ int mt7996_dma_init(struct mt7996_dev *dev)
 	if (mt7996_band_valid(dev, MT_BAND2)) {
 		/* rx data queue for band2 */
 		rx_base = MT_RXQ_RING_BASE(MT_RXQ_BAND2) + hif1_ofs;
-		if (mtk_wed_device_active(wed))
-			rx_base = MT_RXQ_RING_BASE(MT_RXQ_BAND2);
-
 		ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_BAND2],
 				       MT_RXQ_ID(MT_RXQ_BAND2),
 				       MT7996_RX_RING_SIZE,
@@ -524,11 +539,12 @@ int mt7996_dma_init(struct mt7996_dev *dev)
 			return ret;
 	}
 
-
 	if (dev->rro_support) {
 		/* rx rro data queue for band0 */
 		dev->mt76.q_rx[MT_RXQ_RRO_BAND0].flags = MT_RRO_Q_DATA(0);
 		dev->mt76.q_rx[MT_RXQ_RRO_BAND0].flags |= MT_QFLAG_MAGIC;
+		if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+			dev->mt76.q_rx[MT_RXQ_RRO_BAND0].flags |= MT_WED_Q_RX(0);
 		ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_RRO_BAND0],
 				       MT_RXQ_ID(MT_RXQ_RRO_BAND0),
 				       MT7996_RX_RING_SIZE,
@@ -552,6 +568,8 @@ int mt7996_dma_init(struct mt7996_dev *dev)
 			/* rx rro data queue for band2 */
 			dev->mt76.q_rx[MT_RXQ_RRO_BAND2].flags = MT_RRO_Q_DATA(1);
 			dev->mt76.q_rx[MT_RXQ_RRO_BAND2].flags |= MT_QFLAG_MAGIC;
+			if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+				dev->mt76.q_rx[MT_RXQ_RRO_BAND2].flags |= MT_WED_Q_RX(1);
 			ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_RRO_BAND2],
 					       MT_RXQ_ID(MT_RXQ_RRO_BAND2),
 					       MT7996_RX_RING_SIZE,
@@ -634,18 +652,18 @@ void mt7996_dma_reset(struct mt7996_dev *dev, bool force)
 
 	/* reset hw queues */
 	for (i = 0; i < __MT_TXQ_MAX; i++) {
-		mt76_queue_reset(dev, dev->mphy.q_tx[i]);
+		mt76_queue_reset(dev, dev->mphy.q_tx[i], false);
 		if (phy2)
-			mt76_queue_reset(dev, phy2->q_tx[i]);
+			mt76_queue_reset(dev, phy2->q_tx[i], false);
 		if (phy3)
-			mt76_queue_reset(dev, phy3->q_tx[i]);
+			mt76_queue_reset(dev, phy3->q_tx[i], false);
 	}
 
 	for (i = 0; i < __MT_MCUQ_MAX; i++)
-		mt76_queue_reset(dev, dev->mt76.q_mcu[i]);
+		mt76_queue_reset(dev, dev->mt76.q_mcu[i], false);
 
 	mt76_for_each_q_rx(&dev->mt76, i) {
-		mt76_queue_reset(dev, &dev->mt76.q_rx[i]);
+		mt76_queue_reset(dev, &dev->mt76.q_rx[i], false);
 	}
 
 	mt76_tx_status_check(&dev->mt76, true);
diff --git a/mt7996/init.c b/mt7996/init.c
index 8acb408..7233e41 100644
--- a/mt7996/init.c
+++ b/mt7996/init.c
@@ -659,6 +659,7 @@ static int mt7996_rro_init(struct mt7996_dev *dev)
 {
 	struct mt7996_rro_addr *ptr;
 	struct mt7996_rro_cfg *rro = &dev->rro;
+	struct mtk_wed_device *wed = &dev->mt76.mmio.wed;
 	u32 size, val = 0, reg = MT_RRO_ADDR_ELEM_SEG_ADDR0;
 	int i, j;
 	void *buf;
@@ -691,6 +692,9 @@ static int mt7996_rro_init(struct mt7996_dev *dev)
 		ptr = rro->addr_elem_alloc_va[i];
 		for (j = 0; j < MT7996_RRO_SESSION_PER_CR * rro->win_sz; j++, ptr++)
 			ptr->signature = 0xff;
+
+		if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed))
+			wed->wlan.ind_cmd.addr_elem_phys[i] = rro->addr_elem_alloc_pa[i];
 	}
 
 	rro->particular_se_id = MT7996_RRO_SESSION_MAX;
@@ -732,8 +736,20 @@ static int mt7996_rro_init(struct mt7996_dev *dev)
 	mt76_wr(dev, MT_RRO_ADDR_ARRAY_BASE1,
 		MT_RRO_ADDR_ARRAY_ELEM_ADDR_SEG_MODE);
 
-	mt76_wr(dev, MT_RRO_IND_CMD_SIGNATURE_BASE0, 0);
-	mt76_wr(dev, MT_RRO_IND_CMD_SIGNATURE_BASE1, 0);
+	if (mtk_wed_device_active(wed) && mtk_wed_get_rx_capa(wed)) {
+		wed->wlan.ind_cmd.win_size = ffs(rro->win_sz) - 6;
+		wed->wlan.ind_cmd.particular_sid = rro->particular_se_id;
+		wed->wlan.ind_cmd.particular_se_phys = rro->particular_session_pa;
+		wed->wlan.ind_cmd.se_group_nums = MT7996_RRO_ADDR_ELEM_CR_CNT;
+		wed->wlan.ind_cmd.ack_sn_addr = MT_RRO_ACK_SN_CTRL;
+
+		mt76_wr(dev, MT_RRO_IND_CMD_SIGNATURE_BASE0, 0x15010e00);
+		mt76_set(dev, MT_RRO_IND_CMD_SIGNATURE_BASE1,
+			 MT_RRO_IND_CMD_SIGNATURE_BASE1_EN);
+	} else {
+		mt76_wr(dev, MT_RRO_IND_CMD_SIGNATURE_BASE0, 0);
+		mt76_wr(dev, MT_RRO_IND_CMD_SIGNATURE_BASE1, 0);
+	}
 
 	/* particular session configure */
 	/* use max session idx + 1 as particular session id */
@@ -1266,9 +1282,10 @@ void mt7996_unregister_device(struct mt7996_dev *dev)
 	mt7996_mcu_exit(dev);
 	mt7996_tx_token_put(dev);
 	mt7996_dma_cleanup(dev);
-	if (dev->rro_support)
+	if (dev->rro_support && !mtk_wed_device_active(&dev->mt76.mmio.wed)){
 		mt7996_rro_msdu_pg_free(dev);
-	mt7996_rx_token_put(dev);
+		mt7996_rx_token_put(dev);
+	}
 	tasklet_disable(&dev->mt76.irq_tasklet);
 
 	mt76_free_device(&dev->mt76);
diff --git a/mt7996/mac.c b/mt7996/mac.c
index 1f0e4df..3674411 100644
--- a/mt7996/mac.c
+++ b/mt7996/mac.c
@@ -614,8 +614,37 @@ mt7996_mac_fill_rx_rate(struct mt7996_dev *dev,
 	return 0;
 }
 
+static void
+mt7996_wed_check_ppe(struct mt7996_dev *dev, struct mt76_queue *q,
+		     struct mt7996_sta *msta, struct sk_buff *skb,
+		     u32 info)
+{
+	struct ieee80211_vif *vif;
+	struct wireless_dev *wdev;
+
+	if (!msta || !msta->vif)
+		return;
+
+	if (!mt76_queue_is_wed_rx(q))
+		return;
+
+	if (!(info & MT_DMA_INFO_PPE_VLD))
+		return;
+
+	vif = container_of((void *)msta->vif, struct ieee80211_vif,
+			   drv_priv);
+	wdev = ieee80211_vif_to_wdev(vif);
+	skb->dev = wdev->netdev;
+
+	mtk_wed_device_ppe_check(&dev->mt76.mmio.wed, skb,
+				 FIELD_GET(MT_DMA_PPE_CPU_REASON, info),
+				 FIELD_GET(MT_DMA_PPE_ENTRY, info));
+}
+
+
 static int
-mt7996_mac_fill_rx(struct mt7996_dev *dev, struct sk_buff *skb)
+mt7996_mac_fill_rx(struct mt7996_dev *dev, enum mt76_rxq_id q,
+		   struct sk_buff *skb, u32 *info)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	struct mt76_phy *mphy = &dev->mt76.phy;
@@ -641,6 +670,7 @@ mt7996_mac_fill_rx(struct mt7996_dev *dev, struct sk_buff *skb)
 	__le16 fc = 0;
 	int idx;
 	u8 hw_aggr = false;
+	struct mt7996_sta *msta = NULL;
 
 	hw_aggr = status->aggr;
 	memset(status, 0, sizeof(*status));
@@ -669,8 +699,6 @@ mt7996_mac_fill_rx(struct mt7996_dev *dev, struct sk_buff *skb)
 	status->wcid = mt7996_rx_get_wcid(dev, idx, unicast);
 
 	if (status->wcid) {
-		struct mt7996_sta *msta;
-
 		msta = container_of(status->wcid, struct mt7996_sta, wcid);
 		spin_lock_bh(&dev->sta_poll_lock);
 		if (list_empty(&msta->poll_list))
@@ -873,6 +901,8 @@ mt7996_mac_fill_rx(struct mt7996_dev *dev, struct sk_buff *skb)
 #endif
 	} else {
 		status->flag |= RX_FLAG_8023;
+		mt7996_wed_check_ppe(dev, &dev->mt76.q_rx[q], msta, skb,
+				     *info);
 	}
 
 	if (rxv && mode >= MT_PHY_TYPE_HE_SU && !(status->flag & RX_FLAG_8023))
@@ -1754,7 +1784,7 @@ void mt7996_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
 		dev_kfree_skb(skb);
 		break;
 	case PKT_TYPE_NORMAL:
-		if (!mt7996_mac_fill_rx(dev, skb)) {
+		if (!mt7996_mac_fill_rx(dev, q, skb, info)) {
 			mt76_rx(&dev->mt76, q, skb);
 			return;
 		}
diff --git a/mt7996/mmio.c b/mt7996/mmio.c
index 411448a..2361e1b 100644
--- a/mt7996/mmio.c
+++ b/mt7996/mmio.c
@@ -346,9 +346,15 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr,
 			wed->wlan.txfree_tbit = ffs(MT_INT_RX_DONE_WA_TRI) - 1;
 		}
 
+		wed->wlan.wpdma_rx_glo = wed->wlan.phy_base + hif1_ofs + MT_WFDMA0_GLO_CFG;
+		wed->wlan.wpdma_rx = wed->wlan.phy_base + hif1_ofs +
+				     MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) +
+				     MT7996_RXQ_BAND0 * MT_RING_SIZE;
+
 		wed->wlan.chip_id = 0x7991;
 		wed->wlan.tx_tbit[0] = ffs(MT_INT_TX_DONE_BAND2) - 1;
 	} else {
+		wed->wlan.hwrro = dev->rro_support; /* default on */
 		wed->wlan.wpdma_int = wed->wlan.phy_base + MT_INT_SOURCE_CSR;
 		wed->wlan.wpdma_mask = wed->wlan.phy_base + MT_INT_MASK_CSR;
 		wed->wlan.wpdma_tx = wed->wlan.phy_base + MT_TXQ_RING_BASE(0) +
@@ -360,13 +366,33 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr,
 				     MT_RXQ_RING_BASE(MT7996_RXQ_BAND0) +
 				     MT7996_RXQ_BAND0 * MT_RING_SIZE;
 
+		wed->wlan.wpdma_rx_rro[0] = wed->wlan.phy_base +
+					    MT_RXQ_RING_BASE(MT7996_RXQ_RRO_BAND0) +
+					    MT7996_RXQ_RRO_BAND0 * MT_RING_SIZE;
+		wed->wlan.wpdma_rx_rro[1] = wed->wlan.phy_base + hif1_ofs +
+					    MT_RXQ_RING_BASE(MT7996_RXQ_RRO_BAND2) +
+					    MT7996_RXQ_RRO_BAND2 * MT_RING_SIZE;
+		wed->wlan.wpdma_rx_pg = wed->wlan.phy_base +
+					MT_RXQ_RING_BASE(MT7996_RXQ_MSDU_PG_BAND0) +
+					MT7996_RXQ_MSDU_PG_BAND0 * MT_RING_SIZE;
+
 		wed->wlan.rx_nbuf = 65536;
 		wed->wlan.rx_npkt = 24576;
+		if (dev->hif2)
+			wed->wlan.rx_npkt += 8192;
+
 		wed->wlan.rx_size = SKB_WITH_OVERHEAD(MT_RX_BUF_SIZE);
 
 		wed->wlan.rx_tbit[0] = ffs(MT_INT_RX_DONE_BAND0) - 1;
 		wed->wlan.rx_tbit[1] = ffs(MT_INT_RX_DONE_BAND2) - 1;
 
+		wed->wlan.rro_rx_tbit[0] = ffs(MT_INT_RX_DONE_RRO_BAND0) - 1;
+		wed->wlan.rro_rx_tbit[1] = ffs(MT_INT_RX_DONE_RRO_BAND2) - 1;
+
+		wed->wlan.rx_pg_tbit[0] = ffs(MT_INT_RX_DONE_MSDU_PG_BAND0) - 1;
+		wed->wlan.rx_pg_tbit[1] = ffs(MT_INT_RX_DONE_MSDU_PG_BAND1) - 1;
+		wed->wlan.rx_pg_tbit[2] = ffs(MT_INT_RX_DONE_MSDU_PG_BAND2) - 1;
+
 		wed->wlan.tx_tbit[0] = ffs(MT_INT_TX_DONE_BAND0) - 1;
 		wed->wlan.tx_tbit[1] = ffs(MT_INT_TX_DONE_BAND1) - 1;
 		if (dev->rro_support) {
@@ -378,6 +404,8 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr,
 			wed->wlan.wpdma_txfree = wed->wlan.phy_base + MT_RXQ_RING_BASE(0) +
 						  MT7996_RXQ_MCU_WA_MAIN * MT_RING_SIZE;
 		}
+
+		dev->mt76.rx_token_size += wed->wlan.rx_npkt;
 	}
 
 	wed->wlan.nbuf = 16384;
@@ -394,8 +422,6 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr,
 	wed->wlan.release_rx_buf = mt7996_mmio_wed_release_rx_buf;
 	wed->wlan.update_wo_rx_stats = NULL;
 
-	dev->mt76.rx_token_size += wed->wlan.rx_npkt;
-
 	if (mtk_wed_device_attach(wed))
 		return 0;
 
@@ -557,10 +583,9 @@ static void mt7996_irq_tasklet(struct tasklet_struct *t)
 irqreturn_t mt7996_irq_handler(int irq, void *dev_instance)
 {
 	struct mt7996_dev *dev = dev_instance;
-	struct mtk_wed_device *wed = &dev->mt76.mmio.wed;
 
-	if (mtk_wed_device_active(wed))
-		mtk_wed_device_irq_set_mask(wed, 0);
+	if (mtk_wed_device_active(&dev->mt76.mmio.wed))
+		mtk_wed_device_irq_set_mask(&dev->mt76.mmio.wed, 0);
 	else
 		mt76_wr(dev, MT_INT_MASK_CSR, 0);
 
diff --git a/mt7996/mt7996.h b/mt7996/mt7996.h
index d6d253a..23d4744 100644
--- a/mt7996/mt7996.h
+++ b/mt7996/mt7996.h
@@ -83,6 +83,8 @@
 #define MT7996_RX_BUF_SIZE		MT7996_SKB_TRUESIZE(1800)
 #define MT7996_RX_MSDU_PAGE_SIZE	MT7996_SKB_TRUESIZE(128)
 
+#define MT7996_WED_RX_TOKEN_SIZE	32768
+
 struct mt7996_vif;
 struct mt7996_sta;
 struct mt7996_dfs_pulse;
diff --git a/mt7996/regs.h b/mt7996/regs.h
index d352426..20db058 100644
--- a/mt7996/regs.h
+++ b/mt7996/regs.h
@@ -53,6 +53,8 @@ enum base_rev {
 
 #define MT_RRO_IND_CMD_SIGNATURE_BASE0		MT_RRO_TOP(0x38)
 #define MT_RRO_IND_CMD_SIGNATURE_BASE1		MT_RRO_TOP(0x3C)
+#define MT_RRO_IND_CMD_0_CTRL0			MT_RRO_TOP(0x40)
+#define MT_RRO_IND_CMD_SIGNATURE_BASE1_EN	BIT(31)
 
 #define MT_RRO_PARTICULAR_CFG0			MT_RRO_TOP(0x5C)
 #define MT_RRO_PARTICULAR_CFG1			MT_RRO_TOP(0x60)
-- 
2.18.0

