agent-enviroments/builder/libs/seastar/dpdk/drivers/net/nfp/nfp_rxtx.c
2024-09-10 17:06:08 +03:00

899 lines
24 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2014-2021 Netronome Systems, Inc.
* All rights reserved.
*
* Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
*/
#include <ethdev_driver.h>
#include <ethdev_pci.h>
#include "nfp_common.h"
#include "nfp_ctrl.h"
#include "nfp_rxtx.h"
#include "nfp_logs.h"
#include "nfd3/nfp_nfd3.h"
#include "nfdk/nfp_nfdk.h"
#include "nfpcore/nfp_mip.h"
#include "nfpcore/nfp_rtsym.h"
#include "flower/nfp_flower.h"
static int
nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq)
{
struct nfp_net_dp_buf *rxe = rxq->rxbufs;
uint64_t dma_addr;
unsigned int i;
PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %u descriptors",
rxq->rx_count);
for (i = 0; i < rxq->rx_count; i++) {
struct nfp_net_rx_desc *rxd;
struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool);
if (mbuf == NULL) {
PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%hu",
rxq->qidx);
return -ENOMEM;
}
dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf));
rxd = &rxq->rxds[i];
rxd->fld.dd = 0;
rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff;
rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
rxe[i].mbuf = mbuf;
PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64, i, dma_addr);
}
/* Make sure all writes are flushed before telling the hardware */
rte_wmb();
/* Not advertising the whole ring as the firmware gets confused if so */
PMD_RX_LOG(DEBUG, "Increment FL write pointer in %u",
rxq->rx_count - 1);
nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1);
return 0;
}
int
nfp_net_rx_freelist_setup(struct rte_eth_dev *dev)
{
int i;
for (i = 0; i < dev->data->nb_rx_queues; i++) {
if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) < 0)
return -1;
}
return 0;
}
uint32_t
nfp_net_rx_queue_count(void *rx_queue)
{
struct nfp_net_rxq *rxq;
struct nfp_net_rx_desc *rxds;
uint32_t idx;
uint32_t count;
rxq = rx_queue;
idx = rxq->rd_p;
count = 0;
/*
* Other PMDs are just checking the DD bit in intervals of 4
* descriptors and counting all four if the first has the DD
* bit on. Of course, this is not accurate but can be good for
* performance. But ideally that should be done in descriptors
* chunks belonging to the same cache line
*/
while (count < rxq->rx_count) {
rxds = &rxq->rxds[idx];
if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
break;
count++;
idx++;
/* Wrapping? */
if ((idx) == rxq->rx_count)
idx = 0;
}
return count;
}
/* nfp_net_parse_chained_meta() - Parse the chained metadata from packet */
static bool
nfp_net_parse_chained_meta(uint8_t *meta_base,
rte_be32_t meta_header,
struct nfp_meta_parsed *meta)
{
uint8_t *meta_offset;
uint32_t meta_info;
uint32_t vlan_info;
meta_info = rte_be_to_cpu_32(meta_header);
meta_offset = meta_base + 4;
for (; meta_info != 0; meta_info >>= NFP_NET_META_FIELD_SIZE, meta_offset += 4) {
switch (meta_info & NFP_NET_META_FIELD_MASK) {
case NFP_NET_META_PORTID:
meta->port_id = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset);
break;
case NFP_NET_META_HASH:
/* Next field type is about the hash type */
meta_info >>= NFP_NET_META_FIELD_SIZE;
/* Hash value is in the data field */
meta->hash = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset);
meta->hash_type = meta_info & NFP_NET_META_FIELD_MASK;
break;
case NFP_NET_META_VLAN:
vlan_info = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset);
meta->vlan[meta->vlan_layer].offload =
vlan_info >> NFP_NET_META_VLAN_OFFLOAD;
meta->vlan[meta->vlan_layer].tci =
vlan_info & NFP_NET_META_VLAN_MASK;
meta->vlan[meta->vlan_layer].tpid = NFP_NET_META_TPID(vlan_info);
++meta->vlan_layer;
break;
default:
/* Unsupported metadata can be a performance issue */
return false;
}
}
return true;
}
/*
* nfp_net_parse_meta_hash() - Set mbuf hash data based on the metadata info
*
* The RSS hash and hash-type are prepended to the packet data.
* Extract and decode it and set the mbuf fields.
*/
static void
nfp_net_parse_meta_hash(const struct nfp_meta_parsed *meta,
struct nfp_net_rxq *rxq,
struct rte_mbuf *mbuf)
{
struct nfp_net_hw *hw = rxq->hw;
if ((hw->ctrl & NFP_NET_CFG_CTRL_RSS_ANY) == 0)
return;
mbuf->hash.rss = meta->hash;
mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
}
/*
* nfp_net_parse_single_meta() - Parse the single metadata
*
* The RSS hash and hash-type are prepended to the packet data.
* Get it from metadata area.
*/
static inline void
nfp_net_parse_single_meta(uint8_t *meta_base,
rte_be32_t meta_header,
struct nfp_meta_parsed *meta)
{
meta->hash_type = rte_be_to_cpu_32(meta_header);
meta->hash = rte_be_to_cpu_32(*(rte_be32_t *)(meta_base + 4));
}
/*
* nfp_net_parse_meta_vlan() - Set mbuf vlan_strip data based on metadata info
*
* The VLAN info TPID and TCI are prepended to the packet data.
* Extract and decode it and set the mbuf fields.
*/
static void
nfp_net_parse_meta_vlan(const struct nfp_meta_parsed *meta,
struct nfp_net_rx_desc *rxd,
struct nfp_net_rxq *rxq,
struct rte_mbuf *mb)
{
struct nfp_net_hw *hw = rxq->hw;
/* Skip if hardware don't support setting vlan. */
if ((hw->ctrl & (NFP_NET_CFG_CTRL_RXVLAN | NFP_NET_CFG_CTRL_RXVLAN_V2)) == 0)
return;
/*
* The nic support the two way to send the VLAN info,
* 1. According the metadata to send the VLAN info when NFP_NET_CFG_CTRL_RXVLAN_V2
* is set
* 2. According the descriptor to sned the VLAN info when NFP_NET_CFG_CTRL_RXVLAN
* is set
*
* If the nic doesn't send the VLAN info, it is not necessary
* to do anything.
*/
if ((hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN_V2) != 0) {
if (meta->vlan_layer >= 1 && meta->vlan[0].offload != 0) {
mb->vlan_tci = rte_cpu_to_le_32(meta->vlan[0].tci);
mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED;
}
} else if ((hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN) != 0) {
if ((rxd->rxd.flags & PCIE_DESC_RX_VLAN) != 0) {
mb->vlan_tci = rte_cpu_to_le_32(rxd->rxd.offload_info);
mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED;
}
}
}
/*
* nfp_net_parse_meta_qinq() - Set mbuf qinq_strip data based on metadata info
*
* The out VLAN tci are prepended to the packet data.
* Extract and decode it and set the mbuf fields.
*
* If both RTE_MBUF_F_RX_VLAN and NFP_NET_CFG_CTRL_RXQINQ are set, the 2 VLANs
* have been stripped by the hardware and their TCIs are saved in
* mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
* If NFP_NET_CFG_CTRL_RXQINQ is set and RTE_MBUF_F_RX_VLAN is unset, only the
* outer VLAN is removed from packet data, but both tci are saved in
* mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
*
* qinq set & vlan set : meta->vlan_layer>=2, meta->vlan[0].offload=1, meta->vlan[1].offload=1
* qinq set & vlan not set: meta->vlan_layer>=2, meta->vlan[1].offload=1,meta->vlan[0].offload=0
* qinq not set & vlan set: meta->vlan_layer=1, meta->vlan[0].offload=1
* qinq not set & vlan not set: meta->vlan_layer=0
*/
static void
nfp_net_parse_meta_qinq(const struct nfp_meta_parsed *meta,
struct nfp_net_rxq *rxq,
struct rte_mbuf *mb)
{
struct nfp_net_hw *hw = rxq->hw;
if ((hw->ctrl & NFP_NET_CFG_CTRL_RXQINQ) == 0 ||
(hw->cap & NFP_NET_CFG_CTRL_RXQINQ) == 0)
return;
if (meta->vlan_layer < NFP_META_MAX_VLANS)
return;
if (meta->vlan[0].offload == 0)
mb->vlan_tci = rte_cpu_to_le_16(meta->vlan[0].tci);
mb->vlan_tci_outer = rte_cpu_to_le_16(meta->vlan[1].tci);
PMD_RX_LOG(DEBUG, "Received outer vlan is %u inter vlan is %u",
mb->vlan_tci_outer, mb->vlan_tci);
mb->ol_flags |= RTE_MBUF_F_RX_QINQ | RTE_MBUF_F_RX_QINQ_STRIPPED;
}
/* nfp_net_parse_meta() - Parse the metadata from packet */
static void
nfp_net_parse_meta(struct nfp_net_rx_desc *rxds,
struct nfp_net_rxq *rxq,
struct nfp_net_hw *hw,
struct rte_mbuf *mb,
struct nfp_meta_parsed *meta)
{
uint8_t *meta_base;
rte_be32_t meta_header;
if (unlikely(NFP_DESC_META_LEN(rxds) == 0))
return;
meta_base = rte_pktmbuf_mtod(mb, uint8_t *);
meta_base -= NFP_DESC_META_LEN(rxds);
meta_header = *(rte_be32_t *)meta_base;
switch (hw->meta_format) {
case NFP_NET_METAFORMAT_CHAINED:
if (nfp_net_parse_chained_meta(meta_base, meta_header, meta)) {
nfp_net_parse_meta_hash(meta, rxq, mb);
nfp_net_parse_meta_vlan(meta, rxds, rxq, mb);
nfp_net_parse_meta_qinq(meta, rxq, mb);
} else {
PMD_RX_LOG(DEBUG, "RX chained metadata format is wrong!");
}
break;
case NFP_NET_METAFORMAT_SINGLE:
if ((rxds->rxd.flags & PCIE_DESC_RX_RSS) != 0) {
nfp_net_parse_single_meta(meta_base, meta_header, meta);
nfp_net_parse_meta_hash(meta, rxq, mb);
}
break;
default:
PMD_RX_LOG(DEBUG, "RX metadata do not exist.");
}
}
/**
* Set packet type to mbuf based on parsed structure.
*
* @param nfp_ptype
* Packet type structure parsing from Rx descriptor.
* @param mb
* Mbuf to set the packet type.
*/
static void
nfp_net_set_ptype(const struct nfp_ptype_parsed *nfp_ptype, struct rte_mbuf *mb)
{
uint32_t mbuf_ptype = RTE_PTYPE_L2_ETHER;
uint8_t nfp_tunnel_ptype = nfp_ptype->tunnel_ptype;
if (nfp_tunnel_ptype != NFP_NET_PTYPE_TUNNEL_NONE)
mbuf_ptype |= RTE_PTYPE_INNER_L2_ETHER;
switch (nfp_ptype->outer_l3_ptype) {
case NFP_NET_PTYPE_OUTER_L3_NONE:
break;
case NFP_NET_PTYPE_OUTER_L3_IPV4:
mbuf_ptype |= RTE_PTYPE_L3_IPV4;
break;
case NFP_NET_PTYPE_OUTER_L3_IPV6:
mbuf_ptype |= RTE_PTYPE_L3_IPV6;
break;
default:
PMD_RX_LOG(DEBUG, "Unrecognized nfp outer layer 3 packet type: %u",
nfp_ptype->outer_l3_ptype);
break;
}
switch (nfp_tunnel_ptype) {
case NFP_NET_PTYPE_TUNNEL_NONE:
break;
case NFP_NET_PTYPE_TUNNEL_VXLAN:
mbuf_ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
break;
case NFP_NET_PTYPE_TUNNEL_NVGRE:
mbuf_ptype |= RTE_PTYPE_TUNNEL_NVGRE;
break;
case NFP_NET_PTYPE_TUNNEL_GENEVE:
mbuf_ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
break;
default:
PMD_RX_LOG(DEBUG, "Unrecognized nfp tunnel packet type: %u",
nfp_tunnel_ptype);
break;
}
switch (nfp_ptype->l4_ptype) {
case NFP_NET_PTYPE_L4_NONE:
break;
case NFP_NET_PTYPE_L4_TCP:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_TCP);
break;
case NFP_NET_PTYPE_L4_UDP:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_UDP);
break;
case NFP_NET_PTYPE_L4_FRAG:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_FRAG);
break;
case NFP_NET_PTYPE_L4_NONFRAG:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_NONFRAG);
break;
case NFP_NET_PTYPE_L4_ICMP:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_ICMP);
break;
case NFP_NET_PTYPE_L4_SCTP:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_SCTP);
break;
default:
PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 4 packet type: %u",
nfp_ptype->l4_ptype);
break;
}
switch (nfp_ptype->l3_ptype) {
case NFP_NET_PTYPE_L3_NONE:
break;
case NFP_NET_PTYPE_L3_IPV4:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4);
break;
case NFP_NET_PTYPE_L3_IPV6:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6);
break;
case NFP_NET_PTYPE_L3_IPV4_EXT:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT);
break;
case NFP_NET_PTYPE_L3_IPV6_EXT:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT);
break;
case NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT_UNKNOWN);
break;
case NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN:
mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT_UNKNOWN);
break;
default:
PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 3 packet type: %u",
nfp_ptype->l3_ptype);
break;
}
mb->packet_type = mbuf_ptype;
}
/**
* Parse the packet type from Rx descriptor and set to mbuf.
*
* @param rxds
* Rx descriptor including the offloading info of packet type.
* @param mb
* Mbuf to set the packet type.
*/
static void
nfp_net_parse_ptype(struct nfp_net_rx_desc *rxds,
struct rte_mbuf *mb)
{
struct nfp_ptype_parsed nfp_ptype;
uint16_t rxd_ptype = rxds->rxd.offload_info;
if (rxd_ptype == 0 || (rxds->rxd.flags & PCIE_DESC_RX_VLAN) != 0)
return;
nfp_ptype.l4_ptype = (rxd_ptype & NFP_NET_PTYPE_L4_MASK) >>
NFP_NET_PTYPE_L4_OFFSET;
nfp_ptype.l3_ptype = (rxd_ptype & NFP_NET_PTYPE_L3_MASK) >>
NFP_NET_PTYPE_L3_OFFSET;
nfp_ptype.tunnel_ptype = (rxd_ptype & NFP_NET_PTYPE_TUNNEL_MASK) >>
NFP_NET_PTYPE_TUNNEL_OFFSET;
nfp_ptype.outer_l3_ptype = (rxd_ptype & NFP_NET_PTYPE_OUTER_L3_MASK) >>
NFP_NET_PTYPE_OUTER_L3_OFFSET;
nfp_net_set_ptype(&nfp_ptype, mb);
}
/*
* RX path design:
*
* There are some decisions to take:
* 1) How to check DD RX descriptors bit
* 2) How and when to allocate new mbufs
*
* Current implementation checks just one single DD bit each loop. As each
* descriptor is 8 bytes, it is likely a good idea to check descriptors in
* a single cache line instead. Tests with this change have not shown any
* performance improvement but it requires further investigation. For example,
* depending on which descriptor is next, the number of descriptors could be
* less than 8 for just checking those in the same cache line. This implies
* extra work which could be counterproductive by itself. Indeed, last firmware
* changes are just doing this: writing several descriptors with the DD bit
* for saving PCIe bandwidth and DMA operations from the NFP.
*
* Mbuf allocation is done when a new packet is received. Then the descriptor
* is automatically linked with the new mbuf and the old one is given to the
* user. The main drawback with this design is mbuf allocation is heavier than
* using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the
* cache point of view it does not seem allocating the mbuf early on as we are
* doing now have any benefit at all. Again, tests with this change have not
* shown any improvement. Also, rte_mempool_get_bulk returns all or nothing
* so looking at the implications of this type of allocation should be studied
* deeply
*/
uint16_t
nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct nfp_net_rxq *rxq;
struct nfp_net_rx_desc *rxds;
struct nfp_net_dp_buf *rxb;
struct nfp_net_hw *hw;
struct rte_mbuf *mb;
struct rte_mbuf *new_mb;
uint16_t nb_hold;
uint64_t dma_addr;
uint16_t avail;
uint16_t avail_multiplexed = 0;
rxq = rx_queue;
if (unlikely(rxq == NULL)) {
/*
* DPDK just checks the queue is lower than max queues
* enabled. But the queue needs to be configured
*/
PMD_RX_LOG(ERR, "RX Bad queue");
return 0;
}
hw = rxq->hw;
avail = 0;
nb_hold = 0;
while (avail + avail_multiplexed < nb_pkts) {
rxb = &rxq->rxbufs[rxq->rd_p];
if (unlikely(rxb == NULL)) {
PMD_RX_LOG(ERR, "rxb does not exist!");
break;
}
rxds = &rxq->rxds[rxq->rd_p];
if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
break;
/*
* Memory barrier to ensure that we won't do other
* reads before the DD bit.
*/
rte_rmb();
/*
* We got a packet. Let's alloc a new mbuf for refilling the
* free descriptor ring as soon as possible
*/
new_mb = rte_pktmbuf_alloc(rxq->mem_pool);
if (unlikely(new_mb == NULL)) {
PMD_RX_LOG(DEBUG,
"RX mbuf alloc failed port_id=%u queue_id=%hu",
rxq->port_id, rxq->qidx);
nfp_net_mbuf_alloc_failed(rxq);
break;
}
/*
* Grab the mbuf and refill the descriptor with the
* previously allocated mbuf
*/
mb = rxb->mbuf;
rxb->mbuf = new_mb;
PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u",
rxds->rxd.data_len, rxq->mbuf_size);
/* Size of this segment */
mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
/* Size of the whole packet. We just support 1 segment */
mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds);
if (unlikely((mb->data_len + hw->rx_offset) >
rxq->mbuf_size)) {
/*
* This should not happen and the user has the
* responsibility of avoiding it. But we have
* to give some info about the error
*/
PMD_RX_LOG(ERR,
"mbuf overflow likely due to the RX offset.\n"
"\t\tYour mbuf size should have extra space for"
" RX offset=%u bytes.\n"
"\t\tCurrently you just have %u bytes available"
" but the received packet is %u bytes long",
hw->rx_offset,
rxq->mbuf_size - hw->rx_offset,
mb->data_len);
rte_pktmbuf_free(mb);
break;
}
/* Filling the received mbuf with packet info */
if (hw->rx_offset)
mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset;
else
mb->data_off = RTE_PKTMBUF_HEADROOM +
NFP_DESC_META_LEN(rxds);
/* No scatter mode supported */
mb->nb_segs = 1;
mb->next = NULL;
mb->port = rxq->port_id;
struct nfp_meta_parsed meta = {};
nfp_net_parse_meta(rxds, rxq, hw, mb, &meta);
nfp_net_parse_ptype(rxds, mb);
/* Checking the checksum flag */
nfp_net_rx_cksum(rxq, rxds, mb);
if (meta.port_id == 0) {
rx_pkts[avail++] = mb;
} else if (nfp_flower_pf_dispatch_pkts(hw, mb, meta.port_id)) {
avail_multiplexed++;
} else {
rte_pktmbuf_free(mb);
break;
}
/* Now resetting and updating the descriptor */
rxds->vals[0] = 0;
rxds->vals[1] = 0;
dma_addr = rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(new_mb));
rxds->fld.dd = 0;
rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff;
rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
nb_hold++;
rxq->rd_p++;
if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/
rxq->rd_p = 0;
}
if (nb_hold == 0)
return nb_hold;
PMD_RX_LOG(DEBUG, "RX port_id=%hu queue_id=%hu, %hu packets received",
rxq->port_id, rxq->qidx, avail);
nb_hold += rxq->nb_rx_hold;
/*
* FL descriptors needs to be written before incrementing the
* FL queue WR pointer
*/
rte_wmb();
if (nb_hold > rxq->rx_free_thresh) {
PMD_RX_LOG(DEBUG, "port=%hu queue=%hu nb_hold=%hu avail=%hu",
rxq->port_id, rxq->qidx, nb_hold, avail);
nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold);
nb_hold = 0;
}
rxq->nb_rx_hold = nb_hold;
return avail;
}
static void
nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq)
{
unsigned int i;
if (rxq->rxbufs == NULL)
return;
for (i = 0; i < rxq->rx_count; i++) {
if (rxq->rxbufs[i].mbuf) {
rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf);
rxq->rxbufs[i].mbuf = NULL;
}
}
}
void
nfp_net_rx_queue_release(struct rte_eth_dev *dev, uint16_t queue_idx)
{
struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_idx];
if (rxq) {
nfp_net_rx_queue_release_mbufs(rxq);
rte_eth_dma_zone_free(dev, "rx_ring", queue_idx);
rte_free(rxq->rxbufs);
rte_free(rxq);
}
}
void
nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
{
nfp_net_rx_queue_release_mbufs(rxq);
rxq->rd_p = 0;
rxq->nb_rx_hold = 0;
}
int
nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t nb_desc,
unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
int ret;
uint16_t min_rx_desc;
uint16_t max_rx_desc;
const struct rte_memzone *tz;
struct nfp_net_rxq *rxq;
struct nfp_net_hw *hw;
uint32_t rx_desc_sz;
hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
PMD_INIT_FUNC_TRACE();
ret = nfp_net_rx_desc_limits(hw, &min_rx_desc, &max_rx_desc);
if (ret != 0)
return ret;
/* Validating number of descriptors */
rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc);
if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 ||
nb_desc > max_rx_desc || nb_desc < min_rx_desc) {
PMD_DRV_LOG(ERR, "Wrong nb_desc value");
return -EINVAL;
}
/*
* Free memory prior to re-allocation if needed. This is the case after
* calling nfp_net_stop
*/
if (dev->data->rx_queues[queue_idx]) {
nfp_net_rx_queue_release(dev, queue_idx);
dev->data->rx_queues[queue_idx] = NULL;
}
/* Allocating rx queue data structure */
rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq),
RTE_CACHE_LINE_SIZE, socket_id);
if (rxq == NULL)
return -ENOMEM;
dev->data->rx_queues[queue_idx] = rxq;
/* Hw queues mapping based on firmware configuration */
rxq->qidx = queue_idx;
rxq->fl_qcidx = queue_idx * hw->stride_rx;
rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx);
/*
* Tracking mbuf size for detecting a potential mbuf overflow due to
* RX offset
*/
rxq->mem_pool = mp;
rxq->mbuf_size = rxq->mem_pool->elt_size;
rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
hw->flbufsz = rxq->mbuf_size;
rxq->rx_count = nb_desc;
rxq->port_id = dev->data->port_id;
rxq->rx_free_thresh = rx_conf->rx_free_thresh;
/*
* Allocate RX ring hardware descriptors. A memzone large enough to
* handle the maximum ring size is allocated in order to allow for
* resizing in later calls to the queue setup function.
*/
tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
sizeof(struct nfp_net_rx_desc) *
max_rx_desc, NFP_MEMZONE_ALIGN,
socket_id);
if (tz == NULL) {
PMD_DRV_LOG(ERR, "Error allocating rx dma");
nfp_net_rx_queue_release(dev, queue_idx);
dev->data->rx_queues[queue_idx] = NULL;
return -ENOMEM;
}
/* Saving physical and virtual addresses for the RX ring */
rxq->dma = (uint64_t)tz->iova;
rxq->rxds = tz->addr;
/* mbuf pointers array for referencing mbufs linked to RX descriptors */
rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs",
sizeof(*rxq->rxbufs) * nb_desc,
RTE_CACHE_LINE_SIZE, socket_id);
if (rxq->rxbufs == NULL) {
nfp_net_rx_queue_release(dev, queue_idx);
dev->data->rx_queues[queue_idx] = NULL;
return -ENOMEM;
}
PMD_RX_LOG(DEBUG, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64,
rxq->rxbufs, rxq->rxds, (unsigned long)rxq->dma);
nfp_net_reset_rx_queue(rxq);
rxq->hw = hw;
/*
* Telling the HW about the physical address of the RX ring and number
* of descriptors in log2 format
*/
nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma);
nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc));
return 0;
}
/*
* nfp_net_tx_free_bufs - Check for descriptors with a complete
* status
* @txq: TX queue to work with
* Returns number of descriptors freed
*/
int
nfp_net_tx_free_bufs(struct nfp_net_txq *txq)
{
uint32_t qcp_rd_p;
int todo;
PMD_TX_LOG(DEBUG, "queue %hu. Check for descriptor with a complete"
" status", txq->qidx);
/* Work out how many packets have been sent */
qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
if (qcp_rd_p == txq->rd_p) {
PMD_TX_LOG(DEBUG, "queue %hu: It seems harrier is not sending "
"packets (%u, %u)", txq->qidx,
qcp_rd_p, txq->rd_p);
return 0;
}
if (qcp_rd_p > txq->rd_p)
todo = qcp_rd_p - txq->rd_p;
else
todo = qcp_rd_p + txq->tx_count - txq->rd_p;
PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u",
qcp_rd_p, txq->rd_p, txq->rd_p);
if (todo == 0)
return todo;
txq->rd_p += todo;
if (unlikely(txq->rd_p >= txq->tx_count))
txq->rd_p -= txq->tx_count;
return todo;
}
static void
nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq)
{
unsigned int i;
if (txq->txbufs == NULL)
return;
for (i = 0; i < txq->tx_count; i++) {
if (txq->txbufs[i].mbuf) {
rte_pktmbuf_free_seg(txq->txbufs[i].mbuf);
txq->txbufs[i].mbuf = NULL;
}
}
}
void
nfp_net_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_idx)
{
struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx];
if (txq) {
nfp_net_tx_queue_release_mbufs(txq);
rte_eth_dma_zone_free(dev, "tx_ring", queue_idx);
rte_free(txq->txbufs);
rte_free(txq);
}
}
void
nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
{
nfp_net_tx_queue_release_mbufs(txq);
txq->wr_p = 0;
txq->rd_p = 0;
}
void
nfp_net_set_meta_vlan(struct nfp_net_meta_raw *meta_data,
struct rte_mbuf *pkt,
uint8_t layer)
{
uint16_t vlan_tci;
uint16_t tpid;
tpid = RTE_ETHER_TYPE_VLAN;
vlan_tci = pkt->vlan_tci;
meta_data->data[layer] = rte_cpu_to_be_32(tpid << 16 | vlan_tci);
}
int
nfp_net_tx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx,
uint16_t nb_desc,
unsigned int socket_id,
const struct rte_eth_txconf *tx_conf)
{
struct nfp_net_hw *hw;
hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
if (hw->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3)
return nfp_net_nfd3_tx_queue_setup(dev, queue_idx,
nb_desc, socket_id, tx_conf);
else
return nfp_net_nfdk_tx_queue_setup(dev, queue_idx,
nb_desc, socket_id, tx_conf);
}