Commit 49d9601b authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Alexei Starovoitov says:

====================
bpf 2023-01-04

We've added 5 non-merge commits during the last 8 day(s) which contain
a total of 5 files changed, 112 insertions(+), 18 deletions(-).

The main changes are:

1) Always use maximal size for copy_array in the verifier to fix
   KASAN tracking, from Kees.

2) Fix bpf task iterator walking through dead tasks, from Kui-Feng.

3) Make sure livepatch and bpf fexit can coexist, from Chuang.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf: Always use maximal size for copy_array()
  selftests/bpf: add a test for iter/task_vma for short-lived processes
  bpf: keep a reference to the mm, in case the task is dead.
  selftests/bpf: Temporarily disable part of btf_dump:var_data test.
  bpf: Fix panic due to wrong pageattr of im->image
====================

Link: https://lore.kernel.org/r/20230104215500.79435-1-alexei.starovoitov@gmail.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 55801672 45435d8d
Loading
Loading
Loading
Loading
+27 −12
Original line number Diff line number Diff line
@@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info {
	 */
	struct bpf_iter_seq_task_common common;
	struct task_struct *task;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	u32 tid;
	unsigned long prev_vm_start;
@@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
	enum bpf_task_vma_iter_find_op op;
	struct vm_area_struct *curr_vma;
	struct task_struct *curr_task;
	struct mm_struct *curr_mm;
	u32 saved_tid = info->tid;

	/* If this function returns a non-NULL vma, it holds a reference to
	 * the task_struct, and holds read lock on vma->mm->mmap_lock.
	 * the task_struct, holds a refcount on mm->mm_users, and holds
	 * read lock on vma->mm->mmap_lock.
	 * If this function returns NULL, it does not hold any reference or
	 * lock.
	 */
	if (info->task) {
		curr_task = info->task;
		curr_vma = info->vma;
		curr_mm = info->mm;
		/* In case of lock contention, drop mmap_lock to unblock
		 * the writer.
		 *
@@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
		 *    4.2) VMA2 and VMA2' covers different ranges, process
		 *         VMA2'.
		 */
		if (mmap_lock_is_contended(curr_task->mm)) {
		if (mmap_lock_is_contended(curr_mm)) {
			info->prev_vm_start = curr_vma->vm_start;
			info->prev_vm_end = curr_vma->vm_end;
			op = task_vma_iter_find_vma;
			mmap_read_unlock(curr_task->mm);
			if (mmap_read_lock_killable(curr_task->mm))
			mmap_read_unlock(curr_mm);
			if (mmap_read_lock_killable(curr_mm)) {
				mmput(curr_mm);
				goto finish;
			}
		} else {
			op = task_vma_iter_next_vma;
		}
@@ -535,42 +541,47 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
			op = task_vma_iter_find_vma;
		}

		if (!curr_task->mm)
		curr_mm = get_task_mm(curr_task);
		if (!curr_mm)
			goto next_task;

		if (mmap_read_lock_killable(curr_task->mm))
		if (mmap_read_lock_killable(curr_mm)) {
			mmput(curr_mm);
			goto finish;
		}
	}

	switch (op) {
	case task_vma_iter_first_vma:
		curr_vma = find_vma(curr_task->mm, 0);
		curr_vma = find_vma(curr_mm, 0);
		break;
	case task_vma_iter_next_vma:
		curr_vma = find_vma(curr_task->mm, curr_vma->vm_end);
		curr_vma = find_vma(curr_mm, curr_vma->vm_end);
		break;
	case task_vma_iter_find_vma:
		/* We dropped mmap_lock so it is necessary to use find_vma
		 * to find the next vma. This is similar to the  mechanism
		 * in show_smaps_rollup().
		 */
		curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
		curr_vma = find_vma(curr_mm, info->prev_vm_end - 1);
		/* case 1) and 4.2) above just use curr_vma */

		/* check for case 2) or case 4.1) above */
		if (curr_vma &&
		    curr_vma->vm_start == info->prev_vm_start &&
		    curr_vma->vm_end == info->prev_vm_end)
			curr_vma = find_vma(curr_task->mm, curr_vma->vm_end);
			curr_vma = find_vma(curr_mm, curr_vma->vm_end);
		break;
	}
	if (!curr_vma) {
		/* case 3) above, or case 2) 4.1) with vma->next == NULL */
		mmap_read_unlock(curr_task->mm);
		mmap_read_unlock(curr_mm);
		mmput(curr_mm);
		goto next_task;
	}
	info->task = curr_task;
	info->vma = curr_vma;
	info->mm = curr_mm;
	return curr_vma;

next_task:
@@ -579,6 +590,7 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)

	put_task_struct(curr_task);
	info->task = NULL;
	info->mm = NULL;
	info->tid++;
	goto again;

@@ -587,6 +599,7 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
		put_task_struct(curr_task);
	info->task = NULL;
	info->vma = NULL;
	info->mm = NULL;
	return NULL;
}

@@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v)
		 */
		info->prev_vm_start = ~0UL;
		info->prev_vm_end = info->vma->vm_end;
		mmap_read_unlock(info->task->mm);
		mmap_read_unlock(info->mm);
		mmput(info->mm);
		info->mm = NULL;
		put_task_struct(info->task);
		info->task = NULL;
	}
+4 −0
Original line number Diff line number Diff line
@@ -488,6 +488,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
		/* reset fops->func and fops->trampoline for re-register */
		tr->fops->func = NULL;
		tr->fops->trampoline = 0;

		/* reset im->image memory attr for arch_prepare_bpf_trampoline */
		set_memory_nx((long)im->image, 1);
		set_memory_rw((long)im->image, 1);
		goto again;
	}
#endif
+7 −5
Original line number Diff line number Diff line
@@ -1054,6 +1054,8 @@ static void print_insn_state(struct bpf_verifier_env *env,
 */
static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
{
	size_t alloc_bytes;
	void *orig = dst;
	size_t bytes;

	if (ZERO_OR_NULL_PTR(src))
@@ -1062,10 +1064,10 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t
	if (unlikely(check_mul_overflow(n, size, &bytes)))
		return NULL;

	if (ksize(dst) < ksize(src)) {
		kfree(dst);
		dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags);
		if (!dst)
	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
	dst = krealloc(orig, alloc_bytes, flags);
	if (!dst) {
		kfree(orig);
		return NULL;
	}

+73 −0
Original line number Diff line number Diff line
@@ -1465,6 +1465,77 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
	bpf_iter_task_vma__destroy(skel);
}

static void test_task_vma_dead_task(void)
{
	struct bpf_iter_task_vma *skel;
	int wstatus, child_pid = -1;
	time_t start_tm, cur_tm;
	int err, iter_fd = -1;
	int wait_sec = 3;

	skel = bpf_iter_task_vma__open();
	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
		return;

	skel->bss->pid = getpid();

	err = bpf_iter_task_vma__load(skel);
	if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
		goto out;

	skel->links.proc_maps = bpf_program__attach_iter(
		skel->progs.proc_maps, NULL);

	if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
		skel->links.proc_maps = NULL;
		goto out;
	}

	start_tm = time(NULL);
	cur_tm = start_tm;

	child_pid = fork();
	if (child_pid == 0) {
		/* Fork short-lived processes in the background. */
		while (cur_tm < start_tm + wait_sec) {
			system("echo > /dev/null");
			cur_tm = time(NULL);
		}
		exit(0);
	}

	if (!ASSERT_GE(child_pid, 0, "fork_child"))
		goto out;

	while (cur_tm < start_tm + wait_sec) {
		iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
		if (!ASSERT_GE(iter_fd, 0, "create_iter"))
			goto out;

		/* Drain all data from iter_fd. */
		while (cur_tm < start_tm + wait_sec) {
			err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE);
			if (!ASSERT_GE(err, 0, "read_iter_fd"))
				goto out;

			cur_tm = time(NULL);

			if (err == 0)
				break;
		}

		close(iter_fd);
		iter_fd = -1;
	}

	check_bpf_link_info(skel->progs.proc_maps);

out:
	waitpid(child_pid, &wstatus, 0);
	close(iter_fd);
	bpf_iter_task_vma__destroy(skel);
}

void test_bpf_sockmap_map_iter_fd(void)
{
	struct bpf_iter_sockmap *skel;
@@ -1586,6 +1657,8 @@ void test_bpf_iter(void)
		test_task_file();
	if (test__start_subtest("task_vma"))
		test_task_vma();
	if (test__start_subtest("task_vma_dead_task"))
		test_task_vma_dead_task();
	if (test__start_subtest("task_btf"))
		test_task_btf();
	if (test__start_subtest("tcp4"))
+1 −1
Original line number Diff line number Diff line
@@ -801,7 +801,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
				   char *str)
{
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
#if 0
	TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
			  "int cpu_number = (int)100", 100);
#endif