Linux 匿名页反向映射

news2025/6/8 17:05:25

1. 何为反向映射

正向映射: 用户进程在申请内存时,内核并不会立刻给其分配物理内存,而是先为其分配一段虚拟地址空间,当进程访问该虚拟地址空间时,触发page fault异常,异常处理流程中会为其分配物理页面,并将虚拟地址和物理地址的映射关系保存在进程页表中,后续我们可以通过页表找到这个已分配的物理内存。
反向映射: 反向映射恰好相反,想根据已分配的物理页找到该映射到该物理页上的所有用户进程,该操作主要用于页面回收,当内存不足时,回收一个页面就需要找到映射到该页面的所有用户进程,修改进程页表的映射信息,防止出现非法访问。最简单找到该页面被哪些进程所映射的方式,是通过遍历所有的进程的页表,然后遍历每个页表项,检查是否映射到该页面上,但这种方式无疑是最耗时的,所以才有了后面的反向映射机制。

本篇只讲述匿名页的反向映射原理,文件页的反向映射留着以后再叙述。

2. 匿名页反向映射数据结构

2.1 struct page

struct page {
	...
	union {
		struct {	/* Page cache and anonymous pages */
			...
			/* See page-flags.h for PAGE_MAPPING_FLAGS */
			struct address_space *mapping;
			pgoff_t index;		/* Our offset within mapping. */
			...
		};
} _struct_page_alignment;

匿名页的数据结构中有一个 mapping 指针,通过这个mapping可以找到anon_vma,通过遍历anon_vma->root下所有的anon_vma_chain,可以获得所有的vma,进而就可以知道有哪些应用进程映射了该匿名页。

2.2 struct anon_vma

/*
 * The anon_vma heads a list of private "related" vmas, to scan if
 * an anonymous page pointing to this anon_vma needs to be unmapped:
 * the vmas on the list will be related by forking, or by splitting.
 *
 * Since vmas come and go as they are split and merged (particularly
 * in mprotect), the mapping field of an anonymous page cannot point
 * directly to a vma: instead it points to an anon_vma, on whose list
 * the related vmas can be easily linked or unlinked.
 *
 * After unlinking the last vma on the list, we must garbage collect
 * the anon_vma object itself: we're guaranteed no page can be
 * pointing to this anon_vma once its vma list is empty.
 */
struct anon_vma {
	struct anon_vma *root;		/* Root of this anon_vma tree */
	...
	struct anon_vma *parent;	/* Parent of this anon_vma */
	/* Interval tree of private "related" vmas */
	struct rb_root_cached rb_root;
};

anon_vma 反向映射的关键数据结构,匿名页通过mapping指向该数据结构,通过该数据结构保存的信息进而获取到映射该匿名页的所有用户进程。

2.3 struct anon_vma_chain

/*
 * The copy-on-write semantics of fork mean that an anon_vma
 * can become associated with multiple processes. Furthermore,
 * each child process will have its own anon_vma, where new
 * pages for that process are instantiated.
 *
 * This structure allows us to find the anon_vmas associated
 * with a VMA, or the VMAs associated with an anon_vma.
 * The "same_vma" list contains the anon_vma_chains linking
 * all the anon_vmas associated with this VMA.
 * The "rb" field indexes on an interval tree the anon_vma_chains
 * which link all the VMAs associated with this anon_vma.
 */
struct anon_vma_chain {
	struct vm_area_struct *vma;
	struct anon_vma *anon_vma;
	struct list_head same_vma;   /* locked by mmap_lock & page_table_lock */
	struct rb_node rb;			/* locked by anon_vma->rwsem */
	unsigned long rb_subtree_last;
#ifdef CONFIG_DEBUG_VM_RB
	unsigned long cached_vma_start, cached_vma_last;
#endif
};

anon_vma_chain 作为链接anon_vma和vma之间的桥梁,为啥需要有这个桥梁的存在,主要是减少锁冲突。

2.4 struct vm_area_struct

/*
 * This struct describes a virtual memory area. There is one of these
 * per VM-area/task. A VM area is any part of the process virtual memory
 * space that has a special rule for the page-fault handlers (ie a shared
 * library, the executable area etc).
 */
struct vm_area_struct {
	...
	/*
	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
	 * list, after a COW of one of the file pages.	A MAP_SHARED vma
	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
	 * or brk vma (with NULL file) can only be in an anon_vma list.
	 */
	struct list_head anon_vma_chain; /* Serialized by mmap_lock &
					  * page_table_lock */
	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
	...
} __randomize_layout;

vm_area_struct 该数据结构是用来描述应用进程的一段虚拟地址空间,anon_vma指针指向与该vma关联的反向映射结构,anon_vma_chain用来保存所有关联该vma的avc对象。

3. 建立反向映射流程(物理页面新分配时)

最终结果

在这里插入图片描述
当应用进程的虚拟地址未映射物理内存被访问时,会触发page fault,page fault处理流程中不仅包含为其分配物理页面,保存映射信息到进程页表,同时也会为新分配的物理页面建立反向映射。

handle_mm_fault()
-> __handle_mm_fault()
--> handle_pte_fault()
---> do_anonymous_page()

/*
 * We enter with non-exclusive mmap_lock (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_lock still held, but pte unmapped and unlocked.
 */
static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;
	struct page *page;
	vm_fault_t ret = 0;
	pte_t entry;

	/* File mapping without ->vm_ops ? */
	if (vma->vm_flags & VM_SHARED)
		return VM_FAULT_SIGBUS;

	/*
	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
	 * pte_offset_map() on pmds where a huge pmd might be created
	 * from a different thread.
	 *
	 * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
	 * parallel threads are excluded by other means.
	 *
	 * Here we only have mmap_read_lock(mm).
	 */
	if (pte_alloc(vma->vm_mm, vmf->pmd))
		return VM_FAULT_OOM;

	/* See comment in handle_pte_fault() */
	if (unlikely(pmd_trans_unstable(vmf->pmd)))
		return 0;

	/* Use the zero-page for reads */
	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
			!mm_forbids_zeropage(vma->vm_mm)) {
		entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
						vma->vm_page_prot));
		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
				vmf->address, &vmf->ptl);
		if (!pte_none(*vmf->pte)) {
			update_mmu_tlb(vma, vmf->address, vmf->pte);
			goto unlock;
		}
		ret = check_stable_address_space(vma->vm_mm);
		if (ret)
			goto unlock;
		/* Deliver the page fault to userland, check inside PT lock */
		if (userfaultfd_missing(vma)) {
			pte_unmap_unlock(vmf->pte, vmf->ptl);
			return handle_userfault(vmf, VM_UFFD_MISSING);
		}
		goto setpte;
	}

	// 为vma创建anon_vma
	/* Allocate our own private page. */
	if (unlikely(anon_vma_prepare(vma)))
		goto oom;
	page = alloc_zeroed_user_highpage_movable(vma, vmf->address);
	if (!page)
		goto oom;

	if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL))
		goto oom_free_page;
	cgroup_throttle_swaprate(page, GFP_KERNEL);

	/*
	 * The memory barrier inside __SetPageUptodate makes sure that
	 * preceding stores to the page contents become visible before
	 * the set_pte_at() write.
	 */
	__SetPageUptodate(page);

	entry = mk_pte(page, vma->vm_page_prot);
	entry = pte_sw_mkyoung(entry);
	if (vma->vm_flags & VM_WRITE)
		entry = pte_mkwrite(pte_mkdirty(entry));

	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
			&vmf->ptl);
	if (!pte_none(*vmf->pte)) {
		update_mmu_tlb(vma, vmf->address, vmf->pte);
		goto release;
	}

	ret = check_stable_address_space(vma->vm_mm);
	if (ret)
		goto release;

	/* Deliver the page fault to userland, check inside PT lock */
	if (userfaultfd_missing(vma)) {
		pte_unmap_unlock(vmf->pte, vmf->ptl);
		put_page(page);
		return handle_userfault(vmf, VM_UFFD_MISSING);
	}

	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
	// 为页面建立反向映射
	page_add_new_anon_rmap(page, vma, vmf->address);
	lru_cache_add_inactive_or_unevictable(page, vma);
setpte:
	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);

	/* No need to invalidate - it was non-present before */
	update_mmu_cache(vma, vmf->address, vmf->pte);
unlock:
	pte_unmap_unlock(vmf->pte, vmf->ptl);
	return ret;
release:
	put_page(page);
	goto unlock;
oom_free_page:
	put_page(page);
oom:
	return VM_FAULT_OOM;
}

在这里我们只需要关注两个函数 anon_vma_prepare(vma)page_add_new_anon_rmap(page, vma, vmf->address); 一个是为该vma分配anon_vma,另一个是为该页面建立反向映射。

3.1 anon_vma_prepare()

static inline int anon_vma_prepare(struct vm_area_struct *vma)
{
	// 如果该vma已经指向了一个anon_vma,直接返回无需创建新的anon_vma结构
	if (likely(vma->anon_vma))
		return 0;

	return __anon_vma_prepare(vma); // 否则就创建
}
/**
 * __anon_vma_prepare - attach an anon_vma to a memory region
 * @vma: the memory region in question
 *
 * This makes sure the memory mapping described by 'vma' has
 * an 'anon_vma' attached to it, so that we can associate the
 * anonymous pages mapped into it with that anon_vma.
 *
 * The common case will be that we already have one, which
 * is handled inline by anon_vma_prepare(). But if
 * not we either need to find an adjacent mapping that we
 * can re-use the anon_vma from (very common when the only
 * reason for splitting a vma has been mprotect()), or we
 * allocate a new one.
 *
 * Anon-vma allocations are very subtle, because we may have
 * optimistically looked up an anon_vma in folio_lock_anon_vma_read()
 * and that may actually touch the rwsem even in the newly
 * allocated vma (it depends on RCU to make sure that the
 * anon_vma isn't actually destroyed).
 *
 * As a result, we need to do proper anon_vma locking even
 * for the new allocation. At the same time, we do not want
 * to do any locking for the common case of already having
 * an anon_vma.
 *
 * This must be called with the mmap_lock held for reading.
 */
int __anon_vma_prepare(struct vm_area_struct *vma)
{
	struct mm_struct *mm = vma->vm_mm;
	struct anon_vma *anon_vma, *allocated;
	struct anon_vma_chain *avc;

	might_sleep();

	avc = anon_vma_chain_alloc(GFP_KERNEL); // 首先创建一个avc,用来连接vma和anon_vma
	if (!avc)
		goto out_enomem;

	// 能否复用一个已有的anon_vma结构
	anon_vma = find_mergeable_anon_vma(vma);
	allocated = NULL;
	if (!anon_vma) { // 找不到可复用的,就重新分配一个
		anon_vma = anon_vma_alloc();
		if (unlikely(!anon_vma))
			goto out_enomem_free_avc;
		anon_vma->num_children++; /* self-parent link for new root */
		allocated = anon_vma;
	}

	anon_vma_lock_write(anon_vma);
	/* page_table_lock to protect against threads */
	spin_lock(&mm->page_table_lock);
	if (likely(!vma->anon_vma)) { // 设置vma的反向映射信息
		vma->anon_vma = anon_vma; // 指向anon_vma
		anon_vma_chain_link(vma, avc, anon_vma); // 将avc,anon_vma,vma链接起来
		anon_vma->num_active_vmas++;
		allocated = NULL;
		avc = NULL;
	}
	spin_unlock(&mm->page_table_lock);
	anon_vma_unlock_write(anon_vma);

	if (unlikely(allocated))
		put_anon_vma(allocated);
	if (unlikely(avc))
		anon_vma_chain_free(avc);

	return 0;

 out_enomem_free_avc:
	anon_vma_chain_free(avc);
 out_enomem:
	return -ENOMEM;
}
// 这个就是链接的具体实现
static void anon_vma_chain_link(struct vm_area_struct *vma,
				struct anon_vma_chain *avc,
				struct anon_vma *anon_vma)
{
	avc->vma = vma; // avc->vma指向该用户进程的vma
	avc->anon_vma = anon_vma; // avc->anon_vma指向anon_vma
	list_add(&avc->same_vma, &vma->anon_vma_chain); // 并将该avc插入到vma->anon_vma_chain链表中,方便后续通过该链表找到所有与该vma关联的anon_vma
	anon_vma_interval_tree_insert(avc, &anon_vma->rb_root); // 并将avc插入到anon_vma的红黑树中,方便后续遍历该树节点,找到与该anon_vma关联的所有vma
}

3.2 page_add_new_anon_rmap()

/**
 * page_add_new_anon_rmap - add mapping to a new anonymous page
 * @page:	the page to add the mapping to
 * @vma:	the vm area in which the mapping is added
 * @address:	the user virtual address mapped
 *
 * If it's a compound page, it is accounted as a compound page. As the page
 * is new, it's assume to get mapped exclusively by a single process.
 *
 * Same as page_add_anon_rmap but must only be called on *new* pages.
 * This means the inc-and-test can be bypassed.
 * Page does not have to be locked.
 */
// 为新分配的物理页建立反向映射信息
void page_add_new_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address)
{
	const bool compound = PageCompound(page);
	int nr = compound ? thp_nr_pages(page) : 1;

	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
	__SetPageSwapBacked(page);
	if (compound) {
		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
		/* increment count (starts at -1) */
		atomic_set(compound_mapcount_ptr(page), 0);
		atomic_set(compound_pincount_ptr(page), 0);

		__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
	} else {
		/* increment count (starts at -1) */
		atomic_set(&page->_mapcount, 0);
	}
	__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
	__page_set_anon_rmap(page, vma, address, 1); // 直接看该函数
}
/**
 * __page_set_anon_rmap - set up new anonymous rmap
 * @page:	Page or Hugepage to add to rmap
 * @vma:	VM area to add page to.
 * @address:	User virtual address of the mapping	
 * @exclusive:	the page is exclusively owned by the current process
 */
static void __page_set_anon_rmap(struct page *page,
	struct vm_area_struct *vma, unsigned long address, int exclusive)
{
	struct anon_vma *anon_vma = vma->anon_vma;

	BUG_ON(!anon_vma);

	if (PageAnon(page))
		goto out;

	/*
	 * If the page isn't exclusively mapped into this vma,
	 * we must use the _oldest_ possible anon_vma for the
	 * page mapping!
	 */
	if (!exclusive)
		anon_vma = anon_vma->root;

	/*
	 * page_idle does a lockless/optimistic rmap scan on page->mapping.
	 * Make sure the compiler doesn't split the stores of anon_vma and
	 * the PAGE_MAPPING_ANON type identifier, otherwise the rmap code
	 * could mistake the mapping for a struct address_space and crash.
	 */
	// 为了区分是匿名反向映射还是文件反向映射,需要对anon_vma地址加上PAGE_MAPPING_ANON
	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
	// 然后将page->mapping进行赋值,指向该anon_vma反向映射结构
	WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
	// 这里比较有意思,这里是将该匿名页处于vma区域哪个位置(按页粒度)进行保存,方便计算该页面的虚拟起始地址
	page->index = linear_page_index(vma, address);
out:
	if (exclusive)
		SetPageAnonExclusive(page);
}
static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
					unsigned long address)
{
	pgoff_t pgoff;
	if (unlikely(is_vm_hugetlb_page(vma)))
		return linear_hugepage_index(vma, address);
	pgoff = (address - vma->vm_start) >> PAGE_SHIFT; // 用该页面的起始的虚拟地址-该vma映射的起始虚拟地址,再除以PAGE_SIZE
	pgoff += vma->vm_pgoff; // 匿名页映射的vma->vm_pgoff为0,文件映射的vma->vm_pgoff表示该vma起始地址映射到文件的位置偏移,也是以页粒度计算
	return pgoff;
}

4. 建立反向映射流程(fork流程)

最终结果

在这里插入图片描述

fork进程涉及到创建反向映射主要在 dup_mmap() 函数中处理

SYSCALL_DEFINE0(fork)
-> kernel_clone()
--> copy_process()
---> copy_mm()
----> dup_mm()
-----> dup_mmap()

dup_mmap()函数作用主要是将父进程的vma复制一份到子进程中,并建立页表映射和反向映射

#ifdef CONFIG_MMU
static __latent_entropy int dup_mmap(struct mm_struct *mm,
					struct mm_struct *oldmm)
{
	...
	mas_for_each(&old_mas, mpnt, ULONG_MAX) { //遍历父进程的所有vma
		...
		tmp = vm_area_dup(mpnt); // 复制当前父进程的vma
		if (!tmp)
			goto fail_nomem;
		retval = vma_dup_policy(mpnt, tmp);
		if (retval)
			goto fail_nomem_policy;
		tmp->vm_mm = mm;
		retval = dup_userfaultfd(tmp, &uf);
		if (retval)
			goto fail_nomem_anon_vma_fork;
		if (tmp->vm_flags & VM_WIPEONFORK) {
			/*
			 * VM_WIPEONFORK gets a clean slate in the child.
			 * Don't prepare anon_vma until fault since we don't
			 * copy page for current vma.
			 */
			tmp->anon_vma = NULL;
		} else if (anon_vma_fork(tmp, mpnt)) // fork反向映射(核心函数)
			goto fail_nomem_anon_vma_fork;
		...
	}
	...
}

4.1 vm_area_dup()

struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
{
	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); // 为其分配一个新的vma

	if (new) {
		ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
		ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
		/*
		 * orig->shared.rb may be modified concurrently, but the clone
		 * will be reinitialized.
		 */
		*new = data_race(*orig); // 将父进程的vma信息全部复制到新的vma中
		INIT_LIST_HEAD(&new->anon_vma_chain); // 初始化新的vma的anon_vma_chain链表
		dup_anon_vma_name(orig, new); // 复制反向映射空间的名称
	}
	return new;
}

该函数主要是为了生成父进程vma的一份新的拷贝

4.2 anon_vma_fork()

/*
 * Attach vma to its own anon_vma, as well as to the anon_vmas that
 * the corresponding VMA in the parent process is attached to.
 * Returns 0 on success, non-zero on failure.
 */
int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
{
	struct anon_vma_chain *avc;
	struct anon_vma *anon_vma;
	int error;

	/* Don't bother if the parent process has no anon_vma here. */
	if (!pvma->anon_vma) // 如果父进程的vma没有反向映射,则无需进行下面的fork动作
		return 0;

	/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
	vma->anon_vma = NULL; // 将子进程的vma指向的anon_vma清空,因为之前是从父进程vma拷贝过来的

	/*
	 * First, attach the new VMA to the parent VMA's anon_vmas,
	 * so rmap can find non-COWed pages in child processes.
	 */
	error = anon_vma_clone(vma, pvma); // 将该子进程的vma链接到父进程的所有anon_vma中
	if (error)
		return error;

	/* An existing anon_vma has been reused, all done then. */
	if (vma->anon_vma) // 如果vma->anon_vma可以被重用,则说明不需要分配新的anon_vma
		return 0;

	/* Then add our own anon_vma. */
	anon_vma = anon_vma_alloc(); // 分配一个新的anon_vma
	if (!anon_vma)
		goto out_error;
	anon_vma->num_active_vmas++;
	avc = anon_vma_chain_alloc(GFP_KERNEL); // 创建一个avc,用来链接anon_vma和新创建的vma
	if (!avc)
		goto out_error_free_anon_vma;

	/*
	 * The root anon_vma's rwsem is the lock actually used when we
	 * lock any of the anon_vmas in this anon_vma tree.
	 */
	anon_vma->root = pvma->anon_vma->root; // 新分配的anon_vma->root指向父进程anon_vma->root
	anon_vma->parent = pvma->anon_vma; // 新分配的anon_vma->parent应该指向父进程的anon_vma
	/*
	 * With refcounts, an anon_vma can stay around longer than the
	 * process it belongs to. The root anon_vma needs to be pinned until
	 * this anon_vma is freed, because the lock lives in the root.
	 */
	get_anon_vma(anon_vma->root); // 增加根节点anon_vma的引用计数
	/* Mark this anon_vma as the one where our new (COWed) pages go. */
	vma->anon_vma = anon_vma; // 子进程的vma的anon_vma指向新分配的anon_vma
	anon_vma_lock_write(anon_vma);
	anon_vma_chain_link(vma, avc, anon_vma); // 建立子进程vma,avc和新分配的anon_vma之间的关系
	anon_vma->parent->num_children++;
	anon_vma_unlock_write(anon_vma);

	return 0;

 out_error_free_anon_vma:
	put_anon_vma(anon_vma);
 out_error:
	unlink_anon_vmas(vma);
	return -ENOMEM;
}

4.3 anon_vma_clone()

/*
 * Attach the anon_vmas from src to dst.
 * Returns 0 on success, -ENOMEM on failure.
 *
 * anon_vma_clone() is called by __vma_adjust(), __split_vma(), copy_vma() and
 * anon_vma_fork(). The first three want an exact copy of src, while the last
 * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
 * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
 * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
 *
 * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
 * and reuse existing anon_vma which has no vmas and only one child anon_vma.
 * This prevents degradation of anon_vma hierarchy to endless linear chain in
 * case of constantly forking task. On the other hand, an anon_vma with more
 * than one child isn't reused even if there was no alive vma, thus rmap
 * walker has a good chance of avoiding scanning the whole hierarchy when it
 * searches where page is mapped.
 */
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
	struct anon_vma_chain *avc, *pavc;
	struct anon_vma *root = NULL;

	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { // 遍历父进程vma->anon_vma_chain链表上保存的所有avc
		struct anon_vma *anon_vma;

		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN); // 分配一个新的avc结构,用于链接子进程vma和父进程avc->anon_vma
		if (unlikely(!avc)) {
			unlock_anon_vma_root(root);
			root = NULL;
			avc = anon_vma_chain_alloc(GFP_KERNEL);
			if (!avc)
				goto enomem_failure;
		}
		anon_vma = pavc->anon_vma; // 获取父进程avc中的指向的anon_vma
		root = lock_anon_vma_root(root, anon_vma);
		anon_vma_chain_link(dst, avc, anon_vma); // 将子进程的vma,新分配的avc,与父进程avc中指向的anon_vma进行链接

		/*
		 * Reuse existing anon_vma if it has no vma and only one
		 * anon_vma child.
		 *
		 * Root anon_vma is never reused:
		 * it has self-parent reference and at least one child.
		 */
		if (!dst->anon_vma && src->anon_vma && // 是否能复用父进程的anon_vma
		    anon_vma->num_children < 2 &&
		    anon_vma->num_active_vmas == 0)
			dst->anon_vma = anon_vma;
	}
	if (dst->anon_vma)
		dst->anon_vma->num_active_vmas++;
	unlock_anon_vma_root(root);
	return 0;

 enomem_failure:
	/*
	 * dst->anon_vma is dropped here otherwise its degree can be incorrectly
	 * decremented in unlink_anon_vmas().
	 * We can safely do this because callers of anon_vma_clone() don't care
	 * about dst->anon_vma if anon_vma_clone() failed.
	 */
	dst->anon_vma = NULL;
	unlink_anon_vmas(dst);
	return -ENOMEM;
}

就此匿名页的反向映射的建立到此结束。

5. 回收匿名页时使用反向映射

这里以kswapd进行匿名页回收为例,与直接回收的路径下半部分是重合的(从shrink_node()函数开始)

kswapd()
-> balance_pgdat()
--> kswapd_shrink_node()
---> shrink_node()
----> shrink_node_memcgs()
-----> shrink_lruvec()
------> shrink_list()
-------> shrink_inactive_list()
--------> shrink_folio_list()
---------> try_to_unmap()

在回收匿名页时,需要使用反向映射,将映射到该页面的所有应用进程的进行unmap动作,修改页表映射,所以我们接下来重点看try_to_unmap() 函数。

5.1 try_to_unmap()

/**
 * try_to_unmap - Try to remove all page table mappings to a folio.
 * @folio: The folio to unmap.
 * @flags: action and flags
 *
 * Tries to remove all the page table entries which are mapping this
 * folio.  It is the caller's responsibility to check if the folio is
 * still mapped if needed (use TTU_SYNC to prevent accounting races).
 *
 * Context: Caller must hold the folio lock.
 */
void try_to_unmap(struct folio *folio, enum ttu_flags flags)
{
	struct rmap_walk_control rwc = {
		.rmap_one = try_to_unmap_one,
		.arg = (void *)flags,
		.done = page_not_mapped,
		.anon_lock = folio_lock_anon_vma_read,
	};

	if (flags & TTU_RMAP_LOCKED)
		rmap_walk_locked(folio, &rwc);
	else
		rmap_walk(folio, &rwc);
}
void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc)
{
	if (unlikely(folio_test_ksm(folio)))
		rmap_walk_ksm(folio, rwc);
	else if (folio_test_anon(folio))
		rmap_walk_anon(folio, rwc, false);
	else
		rmap_walk_file(folio, rwc, false);
}

因为我们是匿名页反向映射,所以看 rmap_walk_anon() 如何处理。

5.2 rmap_walk_anon()

/*
 * rmap_walk_anon - do something to anonymous page using the object-based
 * rmap method
 * @page: the page to be handled
 * @rwc: control variable according to each walk type
 *
 * Find all the mappings of a page using the mapping pointer and the vma chains
 * contained in the anon_vma struct it points to.
 */
static void rmap_walk_anon(struct folio *folio,
		struct rmap_walk_control *rwc, bool locked)
{
	struct anon_vma *anon_vma;
	pgoff_t pgoff_start, pgoff_end;
	struct anon_vma_chain *avc;

	if (locked) {
		anon_vma = folio_anon_vma(folio); // 根据页面获取到该页面对应的anon_vma
		/* anon_vma disappear under us? */
		VM_BUG_ON_FOLIO(!anon_vma, folio);
	} else {
		anon_vma = rmap_walk_anon_lock(folio, rwc);
	}
	if (!anon_vma)
		return;

	pgoff_start = folio_pgoff(folio); // 计算该页面的起始偏移位置
	pgoff_end = pgoff_start + folio_nr_pages(folio) - 1; // 该页面的终止偏移位置
	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, // 从anon_vma->root中查出所有的avc,从而找到映射了该页面的所有vma
			pgoff_start, pgoff_end) {
		struct vm_area_struct *vma = avc->vma;
		unsigned long address = vma_address(&folio->page, vma);

		VM_BUG_ON_VMA(address == -EFAULT, vma);
		cond_resched();

		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
			continue;

		if (!rwc->rmap_one(folio, vma, address, rwc->arg)) // 进行映射解除
			break;
		if (rwc->done && rwc->done(folio))
			break;
	}

	if (!locked)
		anon_vma_unlock_read(anon_vma);
}

至此匿名页的反向映射处理和使用流程介绍完毕,感谢各位读者浏览!

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/1278508.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

哪一款台灯适合学生考研用?热门学生护眼台灯推荐

近些年近视人数持续升高&#xff0c;我们越来越注意个人的健康问题&#xff0c;而对于视力健康&#xff0c;尤其是儿童青少年们&#xff0c;在3-14岁这个年龄段近视机率更高&#xff0c;儿童青少年近视率高达52.7%&#xff0c;随着市面上护眼台灯的种类越来越多&#xff0c;各种…

OpenTelemetry系列 - 第1篇 相关概念

目录 一、背景二、概念2.1 Traces & Span2.2 Metrics2.3 Logs2.4 Baggage2.5 OTel2.6 OTLP2.7 Resources2.8 Instrumentation Scope2.9 Sampling 三、核心组件 一、背景 OpenTelemetry是一个可观察性框架和工具包&#xff0c;旨在创建和管理遥测数据&#xff0c;如跟踪、指…

运维 | 关于IP网络相关的概念和原理

关注&#xff1a;CodingTechWork IP地址 IP介绍 概述 IP是TCP/IP协议族的核心&#xff0c;IP地址是电脑在网络中的唯一标识&#xff0c;全球唯一&#xff08;G公网IP&#xff09;。IP地址&#xff08;Internet Protocol Address&#xff09;是互联网协议地址&#xff0c;也…

LabVIEW在不同操作系统上使VI、可执行文件或安装程序

LabVIEW在不同操作系统上使VI、可执行文件或安装程序 LabVIEW可以在多个操作系统上运行&#xff0c;主要支持以下几种操作系统&#xff1a; Windows&#xff1a; LabVIEW在各个版本的Windows操作系统上都能运行&#xff0c;包括Windows 7、Windows 8和Windows10。LabVIEW为Wi…

【数据中台】开源项目(5)-Amoro

介绍 Amoro is a Lakehouse management system built on open data lake formats. Working with compute engines including Flink, Spark, and Trino, Amoro brings pluggable and self-managed features for Lakehouse to provide out-of-the-box data warehouse experience,…

海外之路,英语相助:我的雅思考试之旅(成功以此申请到了港科大硕士)

文章目录 背景注册过程 雅思考试之旅全程记录2023年2月14日 - 报名补充2023年2月15日 - 模拟考试2023年2月16日 - 在线考试失利2023年2月20日 - 线下考试再战成功2023年2月23日 - 成绩出来了 考试经验寄送成绩问题请求额外的IELTS成绩寄送 结语 背景 在新加坡Shopee工作了两年…

【面试经典 150 | 二分查找】搜索插入位置

文章目录 写在前面Tag题目来源题目解读解题思路方法一&#xff1a;二分查找闭区间左闭右开区间开区间总结 知识总结写在最后 写在前面 本专栏专注于分析与讲解【面试经典150】算法&#xff0c;两到三天更新一篇文章&#xff0c;欢迎催更…… 专栏内容以分析题目为主&#xff0c…

【Python 训练营】N_14 文件查找和替换

题目 新建一个test3.txt文件&#xff0c;内容如下图&#xff0c;然后从中查找字符串’five’&#xff0c;并统计出现的次数&#xff1b;替换其中的’five’字符串为’python’。 分析 类似Excel中的查找和替换&#xff0c;查找相应内容需用到正则&#xff0c;还考察文件打开、…

持续集成交付CICD:CentOS 7 安装 Sonarqube9.6

目录 一、实验 1.CentOS 7 安装 Sonarqube9.6 二、问题 1.安装postgresql13服务端报错 2.postgresql13创建用户报错 一、实验 1.CentOS 7 安装 Sonarqube9.6 &#xff08;1&#xff09;下载软件及依赖包 ①Sonarqube9.6下载地址 https://binaries.sonarsource.com/Dis…

欧洲各国及发达国家经济支柱和第一出口商品是什么

工业在欧洲各国经济支柱中的表现 一般发达国家&#xff0c;像西欧的国家第三产业即服务业占GDP70%甚至更高&#xff0c;从业人数比重也最大&#xff0c;只是越发达的国家服务业的知识性和科技含量会更高&#xff0c;如商业咨询、律师、医疗卫生、科技服务、商业服务。服务业的…

布隆过滤器,Redis之 bitmap,场景题【如果微博某个大V发了一条消息,怎么统计有多少人看过了】

学习文档 文章目录 一、什么是 Bitmap1-1、Bitmap 相关命令 二、Bitmap 和 Set 对比2-1、数据准备2-2、内存对比2-3、性能对比 三、布隆过滤器3-1、理论3-2、代码实现 四、Java中的 Hash 函数 最近面试时&#xff0c;遇到了一个场景题&#xff0c;面试官问如何统计一条微博大V的…

计算机网络扫盲(1)——因特网

一、概述 因特网是一个世界范围的计算机网络&#xff0c;即它是一个互联了遍及全世界数十亿计算设备的网络。大家对此应该并不陌生&#xff0c;我们身边有着不计其数的计算机设备被接入了因特网&#xff0c;如今计算机网络这个术语似乎已经有点过时了&#xff0c;用因特网的术语…

结合贝叶斯定理浅谈商业银行员工异常行为排查

1.贝叶斯定理的数学表达 贝叶斯方法依据贝叶斯定理。关于贝叶斯定理解释如下&#xff1a;首先我们设定在事件B条件下&#xff0c;发生事件A的条件概率&#xff0c;即 &#xff0c;从数学公式上&#xff0c;此条件概率等于事件A与事件B同时发生的概率除以事件B发生的概率。 上述…

MyBatis增删改查和配置文件

MyBatis增删改查 MyBatis新增 新增用户 持久层接口添加方法 void add(User user);映射文件添加标签 <insert id"add" parameterType"com.mybatis.pojo.User">insert into user(username,sex,address) values(# {username},# {sex},# {address}) <…

海林猴头菇 区域公用品牌形象正式发布

猴头菇是中国八大“山珍”之一&#xff0c;自古就有“山珍猴头&#xff0c;海味燕窝”之说&#xff0c;猴头菇在中国既是食用珍品&#xff0c;又是重要的药用菌。 海林市位于黑龙江省东南部&#xff0c;地处长白山脉张广才岭东麓&#xff0c;素有“林海雪原”之称。 海林猴头菇…

虚函数表和虚函数在内存中的位置

文章目录 结论验证 结论 虚函数表指针是虚函数表所在位置的地址。虚函数表指针属于对象实例。因而通过new出来的对象的虚函数表指针位于堆&#xff0c;声名对象的虚函数表指针位于栈 虚函数表位于只读数据段&#xff08;.rodata&#xff09;&#xff0c;即&#xff1a;C内存模…

《opencv实用探索·八》图像模糊之均值滤波简单理解

1、前言 什么是噪声&#xff1f; 该像素与周围像素的差别非常大&#xff0c;导致从视觉上就能看出该像素无法与周围像素组成可识别的图像信息&#xff0c;降低了整个图像的质量。这种“格格不入”的像素就被称为图像的噪声。如果图像中的噪声都是随机的纯黑像素或者纯白像素&am…

jionlp :一款超级强大的Python 神器!轻松提取地址中的省、市、县

在日常数据处理中&#xff0c;如果你需要从一个完整的地址中提取出省、市、县三级地名&#xff0c;或者乡镇、村、社区两级详细地名&#xff0c;你可以使用一个第三方库来实现快速解析。在使用之前&#xff0c;你需要先安装这个库。 pip install jionlp -i https://pypi.douba…

如何使用注解实现接口的幂等性校验

如何使用注解实现接口的幂等性校验 背景什么是幂等性为什么要实现幂等性校验如何实现接口的幂等性校验1. 数据库唯一主键2. 数据库乐观锁3. 防重 Token 令牌4. redis 如何将这几种方式都组装到一起结语 背景 最近在小组同学卷的受不了的情况下&#xff0c;我决定换一个方向卷去…

Docker Compose及Docker 知识点整理

目录 1、Docker Compose 简介 2、为什么要使用Docker Compose 3、Docker Compose安装使用&#xff08;Linux&#xff09; 3.1 下载 3.2 mkdir docker 文件夹目录 3.3 上传docker-compose到docker文件夹 3.4 移动到 /usr/local/bin 目录下 3.5 添加执行权限 3.6 修改文…