Commit 4cb53f3f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx4-next'



Or Gerlitz says:

====================
mlx4_core driver updates

A series from Jack and Co of low-level fixes for the mlx4_core driver
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 57f5877c 1daa4303
......@@ -580,8 +580,18 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
err = context->result;
if (err) {
mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
op, context->fw_status);
/* Since we do not want to have this error message always
* displayed at driver start when there are ConnectX2 HCAs
* on the host, we deprecate the error message for this
* specific command/input_mod/opcode_mod/fw-status to be debug.
*/
if (op == MLX4_CMD_SET_PORT && in_modifier == 1 &&
op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE)
mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n",
op, context->fw_status);
else
mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
op, context->fw_status);
goto out;
}
......
......@@ -982,8 +982,13 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
if (port < 0)
return -EINVAL;
vhcr->in_modifier = (vhcr->in_modifier & ~0xFF) |
(port & 0xFF);
/* Protect against untrusted guests: enforce that this is the
* QUERY_PORT general query.
*/
if (vhcr->op_modifier || vhcr->in_modifier & ~0xFF)
return -EINVAL;
vhcr->in_modifier = port;
err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0,
MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
......
......@@ -2259,115 +2259,18 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
iounmap(owner);
}
static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
int total_vfs, int *nvfs, struct mlx4_priv *priv)
{
struct mlx4_priv *priv;
struct mlx4_dev *dev;
unsigned sum = 0;
int err;
int port;
int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
unsigned total_vfs = 0;
int sriov_initialized = 0;
unsigned int i;
pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
err = pci_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
return err;
}
/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
* per port, we must limit the number of VFs to 63 (since their are
* 128 MACs)
*/
for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
if (nvfs[i] < 0) {
dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
return -EINVAL;
}
}
for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
i++) {
prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
return -EINVAL;
}
}
if (total_vfs >= MLX4_MAX_NUM_VF) {
dev_err(&pdev->dev,
"Requested more VF's (%d) than allowed (%d)\n",
total_vfs, MLX4_MAX_NUM_VF - 1);
return -EINVAL;
}
for (i = 0; i < MLX4_MAX_PORTS; i++) {
if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
dev_err(&pdev->dev,
"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
nvfs[i] + nvfs[2], i + 1,
MLX4_MAX_NUM_VF_P_PORT - 1);
return -EINVAL;
}
}
/*
* Check for BARs.
*/
if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
pci_dev_data, pci_resource_flags(pdev, 0));
err = -ENODEV;
goto err_disable_pdev;
}
if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "Missing UAR, aborting\n");
err = -ENODEV;
goto err_disable_pdev;
}
err = pci_request_regions(pdev, DRV_NAME);
if (err) {
dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
goto err_disable_pdev;
}
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
goto err_release_regions;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
goto err_release_regions;
}
}
int i;
int existing_vfs = 0;
/* Allow large DMA segments, up to the firmware limit of 1 GB */
dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
dev = &priv->dev;
dev = pci_get_drvdata(pdev);
priv = mlx4_priv(dev);
dev->pdev = pdev;
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
......@@ -2381,28 +2284,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
dev->rev_id = pdev->revision;
dev->numa_node = dev_to_node(&pdev->dev);
/* Detect if this device is a virtual function */
if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
/* When acting as pf, we normally skip vfs unless explicitly
* requested to probe them. */
if (total_vfs) {
unsigned vfs_offset = 0;
for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
vfs_offset + nvfs[i] < extended_func_num(pdev);
vfs_offset += nvfs[i], i++)
;
if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
err = -ENODEV;
goto err_free_dev;
}
if ((extended_func_num(pdev) - vfs_offset)
> prb_vf[i]) {
mlx4_warn(dev, "Skipping virtual function:%d\n",
extended_func_num(pdev));
err = -ENODEV;
goto err_free_dev;
}
}
mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
dev->flags |= MLX4_FLAG_SLAVE;
} else {
......@@ -2412,11 +2296,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
err = mlx4_get_ownership(dev);
if (err) {
if (err < 0)
goto err_free_dev;
return err;
else {
mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
err = -EINVAL;
goto err_free_dev;
return -EINVAL;
}
}
......@@ -2428,21 +2311,28 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
GFP_KERNEL);
if (NULL == dev->dev_vfs) {
mlx4_err(dev, "Failed to allocate memory for VFs\n");
err = 0;
err = -ENOMEM;
goto err_free_own;
} else {
atomic_inc(&pf_loading);
err = pci_enable_sriov(pdev, total_vfs);
existing_vfs = pci_num_vf(pdev);
if (existing_vfs) {
err = 0;
if (existing_vfs != total_vfs)
mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
existing_vfs, total_vfs);
} else {
err = pci_enable_sriov(pdev, total_vfs);
}
if (err) {
mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
err);
atomic_dec(&pf_loading);
err = 0;
} else {
mlx4_warn(dev, "Running in master mode\n");
dev->flags |= MLX4_FLAG_SRIOV |
MLX4_FLAG_MASTER;
dev->num_vfs = total_vfs;
sriov_initialized = 1;
}
}
}
......@@ -2458,7 +2348,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
err = mlx4_reset(dev);
if (err) {
mlx4_err(dev, "Failed to reset HCA, aborting\n");
goto err_rel_own;
goto err_sriov;
}
}
......@@ -2508,34 +2398,46 @@ slave_start:
/* In master functions, the communication channel must be initialized
* after obtaining its address from fw */
if (mlx4_is_master(dev)) {
unsigned sum = 0;
err = mlx4_multi_func_init(dev);
if (err) {
mlx4_err(dev, "Failed to init master mfunc interface, aborting\n");
int ib_ports = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_ports++;
if (ib_ports &&
(num_vfs_argc > 1 || probe_vfs_argc > 1)) {
mlx4_err(dev,
"Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
err = -EINVAL;
goto err_close;
}
if (dev->caps.num_ports < 2 &&
num_vfs_argc > 1) {
err = -EINVAL;
mlx4_err(dev,
"Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
dev->caps.num_ports);
goto err_close;
}
if (sriov_initialized) {
int ib_ports = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_ports++;
memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs));
if (ib_ports &&
(num_vfs_argc > 1 || probe_vfs_argc > 1)) {
mlx4_err(dev,
"Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
err = -EINVAL;
goto err_master_mfunc;
}
for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]); i++) {
unsigned j;
for (j = 0; j < nvfs[i]; ++sum, ++j) {
dev->dev_vfs[sum].min_port =
i < 2 ? i + 1 : 1;
dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
dev->caps.num_ports;
}
for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) {
unsigned j;
for (j = 0; j < dev->nvfs[i]; ++sum, ++j) {
dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
dev->caps.num_ports;
}
}
/* In master functions, the communication channel
* must be initialized after obtaining its address from fw
*/
err = mlx4_multi_func_init(dev);
if (err) {
mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
goto err_close;
}
}
err = mlx4_alloc_eq_table(dev);
......@@ -2556,7 +2458,7 @@ slave_start:
if (!mlx4_is_slave(dev)) {
err = mlx4_init_steering(dev);
if (err)
goto err_free_eq;
goto err_disable_msix;
}
err = mlx4_setup_hca(dev);
......@@ -2616,6 +2518,10 @@ err_steer:
if (!mlx4_is_slave(dev))
mlx4_clear_steering(dev);
err_disable_msix:
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
err_free_eq:
mlx4_free_eq_table(dev);
......@@ -2632,9 +2538,6 @@ err_master_mfunc:
}
err_close:
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
mlx4_close_hca(dev);
err_mfunc:
......@@ -2645,20 +2548,154 @@ err_cmd:
mlx4_cmd_cleanup(dev);
err_sriov:
if (dev->flags & MLX4_FLAG_SRIOV)
if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
pci_disable_sriov(pdev);
err_rel_own:
if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev);
if (mlx4_is_master(dev) && dev->num_vfs)
atomic_dec(&pf_loading);
kfree(priv->dev.dev_vfs);
err_free_dev:
kfree(priv);
err_free_own:
if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev);
return err;
}
static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
struct mlx4_priv *priv)
{
int err;
int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
unsigned total_vfs = 0;
unsigned int i;
pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
err = pci_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
return err;
}
/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
* per port, we must limit the number of VFs to 63 (since their are
* 128 MACs)
*/
for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
if (nvfs[i] < 0) {
dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
err = -EINVAL;
goto err_disable_pdev;
}
}
for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
i++) {
prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
err = -EINVAL;
goto err_disable_pdev;
}
}
if (total_vfs >= MLX4_MAX_NUM_VF) {
dev_err(&pdev->dev,
"Requested more VF's (%d) than allowed (%d)\n",
total_vfs, MLX4_MAX_NUM_VF - 1);
err = -EINVAL;
goto err_disable_pdev;
}
for (i = 0; i < MLX4_MAX_PORTS; i++) {
if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
dev_err(&pdev->dev,
"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
nvfs[i] + nvfs[2], i + 1,
MLX4_MAX_NUM_VF_P_PORT - 1);
err = -EINVAL;
goto err_disable_pdev;
}
}
/* Check for BARs. */
if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
pci_dev_data, pci_resource_flags(pdev, 0));
err = -ENODEV;
goto err_disable_pdev;
}
if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "Missing UAR, aborting\n");
err = -ENODEV;
goto err_disable_pdev;
}
err = pci_request_regions(pdev, DRV_NAME);
if (err) {
dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
goto err_disable_pdev;
}
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
goto err_release_regions;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
goto err_release_regions;
}
}
/* Allow large DMA segments, up to the firmware limit of 1 GB */
dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
/* Detect if this device is a virtual function */
if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
/* When acting as pf, we normally skip vfs unless explicitly
* requested to probe them.
*/
if (total_vfs) {
unsigned vfs_offset = 0;
for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
vfs_offset + nvfs[i] < extended_func_num(pdev);
vfs_offset += nvfs[i], i++)
;
if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
err = -ENODEV;
goto err_release_regions;
}
if ((extended_func_num(pdev) - vfs_offset)
> prb_vf[i]) {
dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
extended_func_num(pdev));
err = -ENODEV;
goto err_release_regions;
}
}
}
err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
if (err)
goto err_release_regions;
return 0;
err_release_regions:
pci_release_regions(pdev);
......@@ -2673,6 +2710,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct mlx4_priv *priv;
struct mlx4_dev *dev;
int ret;
printk_once(KERN_INFO "%s", mlx4_version);
......@@ -2681,28 +2719,38 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;
dev = &priv->dev;
dev->pdev = pdev;
pci_set_drvdata(pdev, dev);
priv->pci_dev_data = id->driver_data;
return __mlx4_init_one(pdev, id->driver_data);
ret = __mlx4_init_one(pdev, id->driver_data, priv);
if (ret)
kfree(priv);
return ret;
}
static void __mlx4_remove_one(struct pci_dev *pdev)
static void mlx4_unload_one(struct pci_dev *pdev)
{
struct mlx4_dev *dev = pci_get_drvdata(pdev);
struct mlx4_priv *priv = mlx4_priv(dev);
int pci_dev_data;
int p;
int active_vfs = 0;
if (priv->removed)
return;
pci_dev_data = priv->pci_dev_data;
/* in SRIOV it is not allowed to unload the pf's
* driver while there are alive vf's */
if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev))
pr_warn("Removing PF when there are assigned VF's !!!\n");
/* Disabling SR-IOV is not allowed while there are active vf's */
if (mlx4_is_master(dev)) {
active_vfs = mlx4_how_many_lives_vf(dev);
if (active_vfs) {
pr_warn("Removing PF when there are active VF's !!\n");
pr_warn("Will not disable SR-IOV.\n");
}
}
mlx4_stop_sense(dev);
mlx4_unregister_device(dev);
......@@ -2745,7 +2793,7 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
if (dev->flags & MLX4_FLAG_SRIOV) {
if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
mlx4_warn(dev, "Disabling SR-IOV\n");
pci_disable_sriov(pdev);
dev->num_vfs = 0;
......@@ -2761,8 +2809,6 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
kfree(dev->caps.qp1_proxy);
kfree(dev->dev_vfs);
pci_release_regions(pdev);
pci_disable_device(pdev);
memset(priv, 0, sizeof(*priv));
priv->pci_dev_data = pci_dev_data;
priv->removed = 1;
......@@ -2773,7 +2819,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
struct mlx4_dev *dev = pci_get_drvdata(pdev);
struct mlx4_priv *priv = mlx4_priv(dev);
__mlx4_remove_one(pdev);
mlx4_unload_one(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
kfree(priv);
pci_set_drvdata(pdev, NULL);
}
......@@ -2782,11 +2830,22 @@ int mlx4_restart_one(struct pci_dev *pdev)
{
struct mlx4_dev *dev = pci_get_drvdata(pdev);
struct mlx4_priv *priv = mlx4_priv(dev);
int pci_dev_data;
int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
int pci_dev_data, err, total_vfs;
pci_dev_data = priv->pci_dev_data;
__mlx4_remove_one(pdev);
return __mlx4_init_one(pdev, pci_dev_data);
total_vfs = dev->num_vfs;
memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs));