sysctl_net_core.c 8.61 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
/* -*- linux-c -*-
 * sysctl_net_core.c: sysctl interface to net core subsystem.
 *
 * Begun April 1, 1996, Mike Shaver.
 * Added /proc/sys/net/core directory entry (empty =) ). [MS]
 */

#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/module.h>
11
#include <linux/socket.h>
12
#include <linux/netdevice.h>
13
#include <linux/ratelimit.h>
Tom Herbert's avatar
Tom Herbert committed
14
#include <linux/vmalloc.h>
15
#include <linux/init.h>
16
#include <linux/slab.h>
17
#include <linux/kmemleak.h>
18

19
#include <net/ip.h>
20
#include <net/sock.h>
21
#include <net/net_ratelimit.h>
22
#include <net/busy_poll.h>
Linus Torvalds's avatar
Linus Torvalds committed
23

24
25
static int one = 1;

Tom Herbert's avatar
Tom Herbert committed
26
#ifdef CONFIG_RPS
27
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
Tom Herbert's avatar
Tom Herbert committed
28
29
30
31
				void __user *buffer, size_t *lenp, loff_t *ppos)
{
	unsigned int orig_size, size;
	int ret, i;
32
	struct ctl_table tmp = {
Tom Herbert's avatar
Tom Herbert committed
33
34
35
36
37
38
39
40
41
		.data = &size,
		.maxlen = sizeof(size),
		.mode = table->mode
	};
	struct rps_sock_flow_table *orig_sock_table, *sock_table;
	static DEFINE_MUTEX(sock_flow_mutex);

	mutex_lock(&sock_flow_mutex);

Eric Dumazet's avatar
Eric Dumazet committed
42
43
	orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
					lockdep_is_held(&sock_flow_mutex));
Tom Herbert's avatar
Tom Herbert committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;

	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);

	if (write) {
		if (size) {
			if (size > 1<<30) {
				/* Enforce limit to prevent overflow */
				mutex_unlock(&sock_flow_mutex);
				return -EINVAL;
			}
			size = roundup_pow_of_two(size);
			if (size != orig_size) {
				sock_table =
				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
				if (!sock_table) {
					mutex_unlock(&sock_flow_mutex);
					return -ENOMEM;
				}

				sock_table->mask = size - 1;
			} else
				sock_table = orig_sock_table;

			for (i = 0; i < size; i++)
				sock_table->ents[i] = RPS_NO_CPU;
		} else
			sock_table = NULL;

		if (sock_table != orig_sock_table) {
			rcu_assign_pointer(rps_sock_flow_table, sock_table);
75
			if (sock_table)
76
				static_key_slow_inc(&rps_needed);
77
			if (orig_sock_table) {
78
				static_key_slow_dec(&rps_needed);
79
80
81
				synchronize_rcu();
				vfree(orig_sock_table);
			}
Tom Herbert's avatar
Tom Herbert committed
82
83
84
85
86
87
88
89
90
		}
	}

	mutex_unlock(&sock_flow_mutex);

	return ret;
}
#endif /* CONFIG_RPS */

91
92
93
#ifdef CONFIG_NET_FLOW_LIMIT
static DEFINE_MUTEX(flow_limit_update_mutex);

94
static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
				 void __user *buffer, size_t *lenp,
				 loff_t *ppos)
{
	struct sd_flow_limit *cur;
	struct softnet_data *sd;
	cpumask_var_t mask;
	int i, len, ret = 0;

	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
		return -ENOMEM;

	if (write) {
		ret = cpumask_parse_user(buffer, *lenp, mask);
		if (ret)
			goto done;

		mutex_lock(&flow_limit_update_mutex);
		len = sizeof(*cur) + netdev_flow_limit_table_len;
		for_each_possible_cpu(i) {
			sd = &per_cpu(softnet_data, i);
			cur = rcu_dereference_protected(sd->flow_limit,
				     lockdep_is_held(&flow_limit_update_mutex));
			if (cur && !cpumask_test_cpu(i, mask)) {
				RCU_INIT_POINTER(sd->flow_limit, NULL);
				synchronize_rcu();
				kfree(cur);
			} else if (!cur && cpumask_test_cpu(i, mask)) {
				cur = kzalloc(len, GFP_KERNEL);
				if (!cur) {
					/* not unwinding previous changes */
					ret = -ENOMEM;
					goto write_unlock;
				}
				cur->num_buckets = netdev_flow_limit_table_len;
				rcu_assign_pointer(sd->flow_limit, cur);
			}
		}
write_unlock:
		mutex_unlock(&flow_limit_update_mutex);
	} else {
135
136
		char kbuf[128];

137
138
139
140
141
142
143
144
145
146
147
148
149
150
		if (*ppos || !*lenp) {
			*lenp = 0;
			goto done;
		}

		cpumask_clear(mask);
		rcu_read_lock();
		for_each_possible_cpu(i) {
			sd = &per_cpu(softnet_data, i);
			if (rcu_dereference(sd->flow_limit))
				cpumask_set_cpu(i, mask);
		}
		rcu_read_unlock();

151
152
153
154
155
156
157
158
159
160
161
162
163
164
		len = min(sizeof(kbuf) - 1, *lenp);
		len = cpumask_scnprintf(kbuf, len, mask);
		if (!len) {
			*lenp = 0;
			goto done;
		}
		if (len < *lenp)
			kbuf[len++] = '\n';
		if (copy_to_user(buffer, kbuf, len)) {
			ret = -EFAULT;
			goto done;
		}
		*lenp = len;
		*ppos += len;
165
166
167
168
169
170
171
	}

done:
	free_cpumask_var(mask);
	return ret;
}

172
static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
				       void __user *buffer, size_t *lenp,
				       loff_t *ppos)
{
	unsigned int old, *ptr;
	int ret;

	mutex_lock(&flow_limit_update_mutex);

	ptr = table->data;
	old = *ptr;
	ret = proc_dointvec(table, write, buffer, lenp, ppos);
	if (!ret && write && !is_power_of_2(*ptr)) {
		*ptr = old;
		ret = -EINVAL;
	}

	mutex_unlock(&flow_limit_update_mutex);
	return ret;
}
#endif /* CONFIG_NET_FLOW_LIMIT */

194
static struct ctl_table net_core_table[] = {
Linus Torvalds's avatar
Linus Torvalds committed
195
196
197
198
199
200
#ifdef CONFIG_NET
	{
		.procname	= "wmem_max",
		.data		= &sysctl_wmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
201
202
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &one,
Linus Torvalds's avatar
Linus Torvalds committed
203
204
205
206
207
208
	},
	{
		.procname	= "rmem_max",
		.data		= &sysctl_rmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
209
210
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &one,
Linus Torvalds's avatar
Linus Torvalds committed
211
212
213
214
215
216
	},
	{
		.procname	= "wmem_default",
		.data		= &sysctl_wmem_default,
		.maxlen		= sizeof(int),
		.mode		= 0644,
217
218
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &one,
Linus Torvalds's avatar
Linus Torvalds committed
219
220
221
222
223
224
	},
	{
		.procname	= "rmem_default",
		.data		= &sysctl_rmem_default,
		.maxlen		= sizeof(int),
		.mode		= 0644,
225
226
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &one,
Linus Torvalds's avatar
Linus Torvalds committed
227
228
229
230
231
232
	},
	{
		.procname	= "dev_weight",
		.data		= &weight_p,
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
233
		.proc_handler	= proc_dointvec
Linus Torvalds's avatar
Linus Torvalds committed
234
235
236
237
238
239
	},
	{
		.procname	= "netdev_max_backlog",
		.data		= &netdev_max_backlog,
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
240
		.proc_handler	= proc_dointvec
Linus Torvalds's avatar
Linus Torvalds committed
241
	},
242
243
244
245
246
247
248
249
250
#ifdef CONFIG_BPF_JIT
	{
		.procname	= "bpf_jit_enable",
		.data		= &bpf_jit_enable,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
#endif
251
252
253
254
255
256
257
	{
		.procname	= "netdev_tstamp_prequeue",
		.data		= &netdev_tstamp_prequeue,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
Linus Torvalds's avatar
Linus Torvalds committed
258
259
	{
		.procname	= "message_cost",
Dave Young's avatar
Dave Young committed
260
		.data		= &net_ratelimit_state.interval,
Linus Torvalds's avatar
Linus Torvalds committed
261
262
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
263
		.proc_handler	= proc_dointvec_jiffies,
Linus Torvalds's avatar
Linus Torvalds committed
264
265
266
	},
	{
		.procname	= "message_burst",
Dave Young's avatar
Dave Young committed
267
		.data		= &net_ratelimit_state.burst,
Linus Torvalds's avatar
Linus Torvalds committed
268
269
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
270
		.proc_handler	= proc_dointvec,
Linus Torvalds's avatar
Linus Torvalds committed
271
272
273
274
275
276
	},
	{
		.procname	= "optmem_max",
		.data		= &sysctl_optmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
277
		.proc_handler	= proc_dointvec
Linus Torvalds's avatar
Linus Torvalds committed
278
	},
Tom Herbert's avatar
Tom Herbert committed
279
280
281
282
283
284
285
286
#ifdef CONFIG_RPS
	{
		.procname	= "rps_sock_flow_entries",
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= rps_sock_flow_sysctl
	},
#endif
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#ifdef CONFIG_NET_FLOW_LIMIT
	{
		.procname	= "flow_limit_cpu_bitmap",
		.mode		= 0644,
		.proc_handler	= flow_limit_cpu_sysctl
	},
	{
		.procname	= "flow_limit_table_len",
		.data		= &netdev_flow_limit_table_len,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= flow_limit_table_len_sysctl
	},
#endif /* CONFIG_NET_FLOW_LIMIT */
301
302
303
304
#ifdef CONFIG_NET_LL_RX_POLL
	{
		.procname	= "low_latency_poll",
		.data		= &sysctl_net_ll_poll,
305
		.maxlen		= sizeof(unsigned int),
306
		.mode		= 0644,
307
		.proc_handler	= proc_dointvec
308
	},
309
310
311
312
313
314
315
316
	{
		.procname	= "low_latency_read",
		.data		= &sysctl_net_ll_read,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec
	},
#
317
#endif
Linus Torvalds's avatar
Linus Torvalds committed
318
#endif /* CONFIG_NET */
319
320
321
322
323
	{
		.procname	= "netdev_budget",
		.data		= &netdev_budget,
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
324
		.proc_handler	= proc_dointvec
325
	},
326
327
328
329
330
	{
		.procname	= "warnings",
		.data		= &net_msg_warn,
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
331
		.proc_handler	= proc_dointvec
332
	},
333
	{ }
Linus Torvalds's avatar
Linus Torvalds committed
334
};
335

336
337
338
339
340
341
static struct ctl_table netns_core_table[] = {
	{
		.procname	= "somaxconn",
		.data		= &init_net.core.sysctl_somaxconn,
		.maxlen		= sizeof(int),
		.mode		= 0644,
Alexey Dobriyan's avatar
Alexey Dobriyan committed
342
		.proc_handler	= proc_dointvec
343
	},
344
	{ }
345
346
};

347
static __net_init int sysctl_core_net_init(struct net *net)
348
{
349
	struct ctl_table *tbl;
350

351
	net->core.sysctl_somaxconn = SOMAXCONN;
352

353
	tbl = netns_core_table;
354
	if (!net_eq(net, &init_net)) {
355
		tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
356
357
358
		if (tbl == NULL)
			goto err_dup;

359
		tbl[0].data = &net->core.sysctl_somaxconn;
360
361
362
363
364

		/* Don't export any sysctls to unprivileged users */
		if (net->user_ns != &init_user_ns) {
			tbl[0].procname = NULL;
		}
365
366
	}

367
	net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
368
	if (net->core.sysctl_hdr == NULL)
369
		goto err_reg;
370

371
372
373
	return 0;

err_reg:
374
	if (tbl != netns_core_table)
375
376
377
378
379
380
381
382
383
		kfree(tbl);
err_dup:
	return -ENOMEM;
}

static __net_exit void sysctl_core_net_exit(struct net *net)
{
	struct ctl_table *tbl;

384
385
	tbl = net->core.sysctl_hdr->ctl_table_arg;
	unregister_net_sysctl_table(net->core.sysctl_hdr);
386
	BUG_ON(tbl == netns_core_table);
387
388
389
390
391
392
393
394
395
396
	kfree(tbl);
}

static __net_initdata struct pernet_operations sysctl_core_ops = {
	.init = sysctl_core_net_init,
	.exit = sysctl_core_net_exit,
};

static __init int sysctl_core_init(void)
{
397
	register_net_sysctl(&init_net, "net/core", net_core_table);
398
	return register_pernet_subsys(&sysctl_core_ops);
399
400
}

401
fs_initcall(sysctl_core_init);