mdsmap.c 5.05 KB
Newer Older
1
#include <linux/ceph/ceph_debug.h>
Sage Weil's avatar
Sage Weil committed
2 3 4 5 6 7 8

#include <linux/bug.h>
#include <linux/err.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/types.h>

9 10 11
#include <linux/ceph/mdsmap.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/decode.h>
Sage Weil's avatar
Sage Weil committed
12 13 14 15 16 17 18 19 20 21 22

#include "super.h"


/*
 * choose a random mds that is "up" (i.e. has a state > 0), or -1.
 */
int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
{
	int n = 0;
	int i;
23 24 25 26

	/* special case for one mds */
	if (1 == m->m_max_mds && m->m_info[0].state > 0)
		return 0;
Sage Weil's avatar
Sage Weil committed
27 28 29 30 31 32 33 34 35

	/* count */
	for (i = 0; i < m->m_max_mds; i++)
		if (m->m_info[i].state > 0)
			n++;
	if (n == 0)
		return -1;

	/* pick */
36
	n = prandom_u32() % n;
Sage Weil's avatar
Sage Weil committed
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
	i = 0;
	for (i = 0; n > 0; i++, n--)
		while (m->m_info[i].state <= 0)
			i++;

	return i;
}

/*
 * Decode an MDS map
 *
 * Ignore any fields we don't care about (there are quite a few of
 * them).
 */
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
{
	struct ceph_mdsmap *m;
54
	const void *start = *p;
Sage Weil's avatar
Sage Weil committed
55 56
	int i, j, n;
	int err = -EINVAL;
57
	u8 mdsmap_v, mdsmap_cv;
Sage Weil's avatar
Sage Weil committed
58 59 60 61 62

	m = kzalloc(sizeof(*m), GFP_NOFS);
	if (m == NULL)
		return ERR_PTR(-ENOMEM);

63 64 65 66 67 68 69 70 71
	ceph_decode_need(p, end, 1 + 1, bad);
	mdsmap_v = ceph_decode_8(p);
	mdsmap_cv = ceph_decode_8(p);
	if (mdsmap_v >= 4) {
	       u32 mdsmap_len;
	       ceph_decode_32_safe(p, end, mdsmap_len, bad);
	       if (end < *p + mdsmap_len)
		       goto bad;
	       end = *p + mdsmap_len;
72
	}
Sage Weil's avatar
Sage Weil committed
73 74

	ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
75 76 77 78 79 80 81 82
	m->m_epoch = ceph_decode_32(p);
	m->m_client_epoch = ceph_decode_32(p);
	m->m_last_failure = ceph_decode_32(p);
	m->m_root = ceph_decode_32(p);
	m->m_session_timeout = ceph_decode_32(p);
	m->m_session_autoclose = ceph_decode_32(p);
	m->m_max_file_size = ceph_decode_64(p);
	m->m_max_mds = ceph_decode_32(p);
Sage Weil's avatar
Sage Weil committed
83 84 85 86 87 88

	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
	if (m->m_info == NULL)
		goto badmem;

	/* pick out active nodes from mds_info (state > 0) */
89
	n = ceph_decode_32(p);
Sage Weil's avatar
Sage Weil committed
90
	for (i = 0; i < n; i++) {
91
		u64 global_id;
Sage Weil's avatar
Sage Weil committed
92 93 94
		u32 namelen;
		s32 mds, inc, state;
		u64 state_seq;
95 96
		u8 info_v;
		void *info_end = NULL;
Sage Weil's avatar
Sage Weil committed
97 98 99
		struct ceph_entity_addr addr;
		u32 num_export_targets;
		void *pexport_targets = NULL;
100
		struct ceph_timespec laggy_since;
101
		struct ceph_mds_info *info;
Sage Weil's avatar
Sage Weil committed
102

103
		ceph_decode_need(p, end, sizeof(u64) + 1, bad);
104
		global_id = ceph_decode_64(p);
105 106 107 108 109 110 111 112 113 114 115 116 117
		info_v= ceph_decode_8(p);
		if (info_v >= 4) {
			u32 info_len;
			u8 info_cv;
			ceph_decode_need(p, end, 1 + sizeof(u32), bad);
			info_cv = ceph_decode_8(p);
			info_len = ceph_decode_32(p);
			info_end = *p + info_len;
			if (info_end > end)
				goto bad;
		}

		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
118
		*p += sizeof(u64);
119
		namelen = ceph_decode_32(p);  /* skip mds name */
Sage Weil's avatar
Sage Weil committed
120 121 122
		*p += namelen;

		ceph_decode_need(p, end,
123
				 4*sizeof(u32) + sizeof(u64) +
Sage Weil's avatar
Sage Weil committed
124 125
				 sizeof(addr) + sizeof(struct ceph_timespec),
				 bad);
126 127 128 129
		mds = ceph_decode_32(p);
		inc = ceph_decode_32(p);
		state = ceph_decode_32(p);
		state_seq = ceph_decode_64(p);
130 131
		ceph_decode_copy(p, &addr, sizeof(addr));
		ceph_decode_addr(&addr);
132
		ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
Sage Weil's avatar
Sage Weil committed
133 134
		*p += sizeof(u32);
		ceph_decode_32_safe(p, end, namelen, bad);
135
		*p += namelen;
136
		if (info_v >= 2) {
Sage Weil's avatar
Sage Weil committed
137 138
			ceph_decode_32_safe(p, end, num_export_targets, bad);
			pexport_targets = *p;
139
			*p += num_export_targets * sizeof(u32);
Sage Weil's avatar
Sage Weil committed
140 141 142 143
		} else {
			num_export_targets = 0;
		}

144 145 146 147 148 149
		if (info_end && *p != info_end) {
			if (*p > info_end)
				goto bad;
			*p = info_end;
		}

150
		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
151 152
		     i+1, n, global_id, mds, inc,
		     ceph_pr_addr(&addr.in_addr),
Sage Weil's avatar
Sage Weil committed
153
		     ceph_mds_state_name(state));
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174

		if (mds < 0 || mds >= m->m_max_mds || state <= 0)
			continue;

		info = &m->m_info[mds];
		info->global_id = global_id;
		info->state = state;
		info->addr = addr;
		info->laggy = (laggy_since.tv_sec != 0 ||
			       laggy_since.tv_nsec != 0);
		info->num_export_targets = num_export_targets;
		if (num_export_targets) {
			info->export_targets = kcalloc(num_export_targets,
						       sizeof(u32), GFP_NOFS);
			if (info->export_targets == NULL)
				goto badmem;
			for (j = 0; j < num_export_targets; j++)
				info->export_targets[j] =
				       ceph_decode_32(&pexport_targets);
		} else {
			info->export_targets = NULL;
Sage Weil's avatar
Sage Weil committed
175 176 177 178 179 180
		}
	}

	/* pg_pools */
	ceph_decode_32_safe(p, end, n, bad);
	m->m_num_data_pg_pools = n;
181
	m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
Sage Weil's avatar
Sage Weil committed
182 183
	if (!m->m_data_pg_pools)
		goto badmem;
184
	ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
Sage Weil's avatar
Sage Weil committed
185
	for (i = 0; i < n; i++)
186 187
		m->m_data_pg_pools[i] = ceph_decode_64(p);
	m->m_cas_pg_pool = ceph_decode_64(p);
Sage Weil's avatar
Sage Weil committed
188 189

	/* ok, we don't care about the rest. */
190
	*p = end;
Sage Weil's avatar
Sage Weil committed
191 192 193 194 195 196 197
	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
	return m;

badmem:
	err = -ENOMEM;
bad:
	pr_err("corrupt mdsmap\n");
198 199 200
	print_hex_dump(KERN_DEBUG, "mdsmap: ",
		       DUMP_PREFIX_OFFSET, 16, 1,
		       start, end - start, true);
Sage Weil's avatar
Sage Weil committed
201
	ceph_mdsmap_destroy(m);
202
	return ERR_PTR(err);
Sage Weil's avatar
Sage Weil committed
203 204 205 206 207 208 209 210 211 212 213 214
}

void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
{
	int i;

	for (i = 0; i < m->m_max_mds; i++)
		kfree(m->m_info[i].export_targets);
	kfree(m->m_info);
	kfree(m->m_data_pg_pools);
	kfree(m);
}