linux 시작 메모리 분배 기

Liux 가 메모리 분배 기 를 시작 하 는 것 은 파트너 시스템, slab 체제 가 실현 되 기 전에 커 널 의 메모리 분 배 를 만족 시 키 기 위해 만들어 진 것 입 니 다.자체 의 메커니즘 이 비교적 간단 하고 비트 맵 을 사용 하여 표지 의 분배 와 방출 을 한다.
1. 데이터 구조 소개
1, 보존 구간
시작 메모리 분배 기 를 만 들 때 메모리 보존 과 관련 되 기 때문이다.즉, 이전에 페이지 시트, 분배 기 자체 (맵 에 사용 되 는 비트 맵), io 등에 분배 기 가 존재 한 후에 이 를 사용 하여 메모리 공간 을 분배 할 때 남 겨 진 부분 은 더 이상 분배 할 수 없다 는 것 이다.Liux 에서 메모리 공간 을 유지 하 는 부분 에 대해 다음 데이터 구조 로 표시 합 니 다.
/*
 * Early reserved memory areas.
 */
#define MAX_EARLY_RES 20/*        */

struct early_res {/*      */
	u64 start, end;
	char name[16];
	char overlap_ok;
};
/*          */
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
	{ 0, PAGE_SIZE, "BIOS data page" },	/* BIOS data page */
	{}
};

2, bootmem 분배 기
/*
 * node_bootmem_map is a map pointer - the bits represent all physical 
 * memory pages (including holes) on the node.
 */
 /*  bootmem          */
typedef struct bootmem_data {
	unsigned long node_min_pfn;/*  bootmem        (              )。*/
	unsigned long node_low_pfn;/*       ,     896MB。*/
	void *node_bootmem_map;
	unsigned long last_end_off;/*                        last_pos    */
	unsigned long hint_idx;/*               */
	struct list_head list;
} bootmem_data_t;

전역 링크
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);

2. 분배 기 구축 시작
분배 기 를 시작 하 는 주요 절 차 는 맵 비트 맵, 활성 메모리 구역 의 맵 위치 0 (사용 가능 함 을 표시 함), 메모리 영역 처 리 를 초기 화 하 는 것 입 니 다. 그 중에서 보존 구역 은 위 에서 소개 한 전체 배열 에 저장 되 어 있 습 니 다. 여 기 는 분배 기 에 대응 하 는 맵 비트 맵 값 1 만 할당 되 었 음 을 표시 합 니 다.
다음은 커 널 의 구체 적 인 초기 화 절 차 를 살 펴 보 겠 습 니 다.
start_kernel()->setup_arch()->initmem_init()
void __init setup_arch(char **cmdline_p)
{
          .......
	/*       bootmem            
	          bootmem    */
	initmem_init(0, max_pfn);
          .......
}           
void __init initmem_init(unsigned long start_pfn,
				  unsigned long end_pfn)
{
#ifdef CONFIG_HIGHMEM
	highstart_pfn = highend_pfn = max_pfn;
	if (max_pfn > max_low_pfn)
		highstart_pfn = max_low_pfn;
	/*       early_node_map ,        */
	e820_register_active_regions(0, 0, highend_pfn);
	/*             ,     
	      */
	sparse_memory_present_with_active_regions(0);
	printk(KERN_NOTICE "%ldMB HIGHMEM available.
", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; /* */ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else e820_register_active_regions(0, 0, max_low_pfn); sparse_memory_present_with_active_regions(0); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif #ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif __vmalloc_start_set = true; printk(KERN_NOTICE "%ldMB LOWMEM available.
", pages_to_mb(max_low_pfn)); /* bootmem , */ setup_bootmem_allocator(); }


void __init setup_bootmem_allocator(void)
{
	int nodeid;
	unsigned long bootmap_size, bootmap;
	/*
	 * Initialize the boot-time allocator (with low memory only):
	 */
	 /*                  ,
	              8*/
	bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
	/*   e820             ,    */
	bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
				 PAGE_SIZE);
	if (bootmap == -1L)
		panic("Cannot find bootmem map of size %ld
", bootmap_size); /* */ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); printk(KERN_INFO " mapped low ram: 0 - %08lx
", max_pfn_mapped<<PAGE_SHIFT); printk(KERN_INFO " low ram: 0 - %08lx
", max_low_pfn<<PAGE_SHIFT); /* node*/ for_each_online_node(nodeid) { unsigned long start_pfn, end_pfn; #ifdef CONFIG_NEED_MULTIPLE_NODES/*not set*/ start_pfn = node_start_pfn[nodeid]; end_pfn = node_end_pfn[nodeid]; if (start_pfn > max_low_pfn) continue; if (end_pfn > max_low_pfn) end_pfn = max_low_pfn; #else start_pfn = 0; end_pfn = max_low_pfn; #endif /* */ bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, bootmap); } /*bootmem , after_bootmem 1, */ after_bootmem = 1; }
static unsigned long __init setup_node_bootmem(int nodeid,
				 unsigned long start_pfn,
				 unsigned long end_pfn,
				 unsigned long bootmap)
{
	unsigned long bootmap_size;

	/* don't touch min_low_pfn */
	/*       ,         1*/
	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
					 bootmap >> PAGE_SHIFT,
					 start_pfn, end_pfn);
	printk(KERN_INFO "  node %d low ram: %08lx - %08lx
", nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); printk(KERN_INFO " node %d bootmap %08lx - %08lx
", nodeid, bootmap, bootmap + bootmap_size); /* 0, */ free_bootmem_with_active_regions(nodeid, end_pfn); /* 1, ( )*/ early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); /* , */ return bootmap + bootmap_size; }
맵 비트 맵 초기 화 에 대해 init 를 호출 합 니 다.bootmem_core()
/*
 * Called once to set up the allocator itself.
 */
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
	unsigned long mapstart, unsigned long start, unsigned long end)
{
	unsigned long mapsize;

	mminit_validate_memmodel_limits(&start, &end);
	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
	bdata->node_min_pfn = start;
	bdata->node_low_pfn = end;
	/*  bdata      */
	link_bootmem(bdata);

	/*
	 * Initially all pages are reserved - setup_arch() has to
	 * register free RAM areas explicitly.
	 */
	 /*   bdata mapsize,          1/8*/
	mapsize = bootmap_bytes(end - start);
	/*   map 1*/
	memset(bdata->node_bootmem_map, 0xff, mapsize);

	bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx
", bdata - bootmem_node_data, start, mapstart, end, mapsize); return mapsize; }
/*
 * link bdata in order
 */
 /*     ,        
               */
static void __init link_bootmem(bootmem_data_t *bdata)
{
	struct list_head *iter;
	/*       bdata_list */
	list_for_each(iter, &bdata_list) {
		bootmem_data_t *ent;

		ent = list_entry(iter, bootmem_data_t, list);
		if (bdata->node_min_pfn < ent->node_min_pfn)
			break;
	}
	list_add_tail(&bdata->list, iter);
}
/**
 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
 * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node
 *
 * If an architecture guarantees that all ranges registered with
 * add_active_ranges() contain no holes and may be freed, this
 * this function may be used instead of calling free_bootmem() manually.
 */
 /* active_region    bootmem   ,       */
void __init free_bootmem_with_active_regions(int nid,
						unsigned long max_low_pfn)
{
	int i;
	/*            */
	for_each_active_range_index_in_nid(i, nid) {
		unsigned long size_pages = 0;
		unsigned long end_pfn = early_node_map[i].end_pfn;

		if (early_node_map[i].start_pfn >= max_low_pfn)
			continue;

		if (end_pfn > max_low_pfn)
			end_pfn = max_low_pfn;
		/*         */
		size_pages = end_pfn - early_node_map[i].start_pfn;
		/*       ,         0*/
		free_bootmem_node(NODE_DATA(early_node_map[i].nid),
				PFN_PHYS(early_node_map[i].start_pfn),
				size_pages << PAGE_SHIFT);
	}
}
/**
 * free_bootmem_node - mark a page range as usable
 * @pgdat: node the range resides on
 * @physaddr: starting address of the range
 * @size: size of the range in bytes
 *
 * Partial pages will be considered reserved and left as they are.
 *
 * The range must reside completely on the specified node.
 */
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
			      unsigned long size)
{
	unsigned long start, end;
	/*       ,   */
	kmemleak_free_part(__va(physaddr), size);

	start = PFN_UP(physaddr);/*   */
	end = PFN_DOWN(physaddr + size);/*   */

	
	/*        bit  0,      ,      0*/
	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
}
static int __init mark_bootmem_node(bootmem_data_t *bdata,
				unsigned long start, unsigned long end,
				int reserve, int flags)
{
	unsigned long sidx, eidx;

	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x
", bdata - bootmem_node_data, start, end, reserve, flags); BUG_ON(start < bdata->node_min_pfn); BUG_ON(end > bdata->node_low_pfn); /* */ sidx = start - bdata->node_min_pfn; eidx = end - bdata->node_min_pfn; if (reserve)/* */ return __reserve(bdata, sidx, eidx, flags); else/* map 0*/ __free(bdata, sidx, eidx); return 0; }
/*bootmem        */
static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
			unsigned long eidx, int flags)
{
	unsigned long idx;
	int exclusive = flags & BOOTMEM_EXCLUSIVE;

	bdebug("nid=%td start=%lx end=%lx flags=%x
", bdata - bootmem_node_data, sidx + bdata->node_min_pfn, eidx + bdata->node_min_pfn, flags); /* */ for (idx = sidx; idx < eidx; idx++) if (test_and_set_bit(idx, bdata->node_bootmem_map)) { if (exclusive) { __free(bdata, sidx, idx); return -EBUSY; } bdebug("silent double reserve of PFN %lx
", idx + bdata->node_min_pfn); } return 0; }
/*bootmem        */
static void __init __free(bootmem_data_t *bdata,
			unsigned long sidx, unsigned long eidx)
{
	unsigned long idx;

	bdebug("nid=%td start=%lx end=%lx
", bdata - bootmem_node_data, sidx + bdata->node_min_pfn, eidx + bdata->node_min_pfn); if (bdata->hint_idx > sidx) bdata->hint_idx = sidx;/* hint_idx, */ for (idx = sidx; idx < eidx; idx++)/* 0*/ if (!test_and_clear_bit(idx, bdata->node_bootmem_map)) BUG(); }
void __init early_res_to_bootmem(u64 start, u64 end)
{
	int i, count;
	u64 final_start, final_end;

	count  = 0;
	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
		count++;/*        */

	printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]
", count, start, end); for (i = 0; i < count; i++) { struct early_res *r = &early_res[i]; printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, r->start, r->end, r->name); final_start = max(start, r->start); final_end = min(end, r->end); if (final_start >= final_end) { printk(KERN_CONT "
"); continue; } printk(KERN_CONT " ==> [%010llx - %010llx]
", final_start, final_end); /* */ reserve_bootmem_generic(final_start, final_end - final_start, BOOTMEM_DEFAULT); } }
위의 보존 지정 구간 reservebootmem_generic () 함수 최종 호출 다음 함수
/**
 * reserve_bootmem - mark a page range as usable
 * @addr: starting address of the range
 * @size: size of the range in bytes
 * @flags: reservation flags (see linux/bootmem.h)
 *
 * Partial pages will be reserved.
 *
 * The range must be contiguous but may span node boundaries.
 */
int __init reserve_bootmem(unsigned long addr, unsigned long size,
			    int flags)
{
	unsigned long start, end;

	start = PFN_DOWN(addr);/*  */
	end = PFN_UP(addr + size);/*  */

	return mark_bootmem(start, end, 1, flags);
}
/*        */
static int __init mark_bootmem(unsigned long start, unsigned long end,
				int reserve, int flags)
{
	unsigned long pos;
	bootmem_data_t *bdata;

	pos = start;
	/*  bdata_list          bdata*/
	list_for_each_entry(bdata, &bdata_list, list) {
		int err;
		unsigned long max;

		if (pos < bdata->node_min_pfn ||
		    pos >= bdata->node_low_pfn) {
			BUG_ON(pos != start);
			continue;
		}

		max = min(bdata->node_low_pfn, end);
		/*     */
		err = mark_bootmem_node(bdata, pos, max, reserve, flags);
		if (reserve && err) {/*    ,    */
			mark_bootmem(start, pos, 0, 0);
			return err;
		}

		if (max == end)
			return 0;
		pos = bdata->node_low_pfn;
	}
	BUG();
}
3. 메모리 의 배분 과 방출
위의 초기 화 절 차 를 소 개 했 습 니 다. 분배 와 방출 은 간단 합 니 다. 분 배 는 분배 기 맵 에 대응 하 는 위치 1 이 고 방출 과정 은 반대 입 니 다.
/*  size     */
static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
					unsigned long size, unsigned long align,
					unsigned long goal, unsigned long limit)
{
	unsigned long fallback = 0;
	unsigned long min, max, start, sidx, midx, step;

	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx
", bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT, align, goal, limit); BUG_ON(!size); BUG_ON(align & (align - 1)); BUG_ON(limit && goal + size > limit); /* , */ if (!bdata->node_bootmem_map) return NULL; min = bdata->node_min_pfn; max = bdata->node_low_pfn; goal >>= PAGE_SHIFT; limit >>= PAGE_SHIFT; if (limit && max > limit) max = limit; if (max <= min) return NULL; /*step */ step = max(align >> PAGE_SHIFT, 1UL); /* */ if (goal && min < goal && goal < max) start = ALIGN(goal, step); else start = ALIGN(min, step); /* */ sidx = start - bdata->node_min_pfn; midx = max - bdata->node_min_pfn; /* , , , */ if (bdata->hint_idx > sidx) { * Handle the valid case of sidx being zero and still * catch the fallback below. */ fallback = sidx + 1; /* , */ sidx = align_idx(bdata, bdata->hint_idx, step); } while (1) { int merge; void *region; unsigned long eidx, i, start_off, end_off; find_block: /* 0 */ sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); sidx = align_idx(bdata, sidx, step); eidx = sidx + PFN_UP(size);/* */ if (sidx >= midx || eidx > midx)/* */ break; for (i = sidx; i < eidx; i++)/* */ if (test_bit(i, bdata->node_bootmem_map)) {/* , */ sidx = align_idx(bdata, i, step); if (sidx == i) sidx += step; goto find_block; } if (bdata->last_end_off & (PAGE_SIZE - 1) &&/* , */ PFN_DOWN(bdata->last_end_off) + 1 == sidx) start_off = align_off(bdata, bdata->last_end_off, align); else start_off = PFN_PHYS(sidx); /*merge==1 */ merge = PFN_DOWN(start_off) < sidx; end_off = start_off + size; /* */ bdata->last_end_off = end_off; bdata->hint_idx = PFN_UP(end_off); /* * Reserve the area now: */ /* , 1*/ if (__reserve(bdata, PFN_DOWN(start_off) + merge, PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) BUG(); /* */ region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + start_off); memset(region, 0, size);/* */ /* * The min_count is set to 0 so that bootmem allocated blocks * are never reported as leaks. */ /* */ kmemleak_alloc(region, size, 0, 0); return region; } if (fallback) {/* , */ sidx = align_idx(bdata, fallback - 1, step); fallback = 0; goto find_block; } return NULL; }

좋은 웹페이지 즐겨찾기