Linux 가상 파일 시스템 (노드 경로 검색)
17873 단어 링크 ux 가상 파일 시스템리 눅 스 커 널 학습 노트
1. 검색 에 사용 되 는 데이터 구조
/* , 。
*/
struct nameidata {
struct path path;/* mount path */
struct qstr last;
struct path root;
unsigned int flags;/* */
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
/* Intent data */
union {
struct open_intent open;
} intent;
};
/* */
struct qstr {
unsigned int hash;
unsigned int len;
const unsigned char *name;
};
수색
/*name ;
flag ,nd
*/
int path_lookup(const char *name, unsigned int flags,
struct nameidata *nd)
{
return do_path_lookup(AT_FDCWD, name, flags, nd);
}
실제 작업 은 위의 dopath_lookup () 함수 가 실현 되 었 습 니 다. 여기 서 우 리 는 그 에 대해 분석 합 니 다.
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{ /* , nd */
int retval = path_init(dfd, name, flags, nd);
if (!retval)
/* ,
nd */
retval = path_walk(name, nd);
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
nd->path.dentry->d_inode))
audit_inode(name, nd->path.dentry);
if (nd->root.mnt) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
return retval;
}
2.1 초기 화 단계
초기 화 단 계 는 함수 pathinit () 함수 구현
/*path_init , nd , :
a, ( / ), dentry。 task_struct fs fs_struct 。
b, , task_struct fs, fs_struct,
fs_struct “ ” dentry。
*/
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
{
int retval = 0;
int fput_needed;
struct file *file;
/* , ;
, , LAST_NORM
. , LAST_DOT(*/
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;
nd->root.mnt = NULL;
if (*name=='/') {/* '/' */
set_root(nd);/* nd root fs root*/
nd->path = nd->root;/* */
path_get(&nd->root);/* */
} else if (dfd == AT_FDCWD) {/* */
struct fs_struct *fs = current->fs;
read_lock(&fs->lock);
nd->path = fs->pwd;/* */
path_get(&fs->pwd);/* */
read_unlock(&fs->lock);
} else {/*???*/
struct dentry *dentry;
/*fget_light struct files_struct
fd 。 , fs_struct
, int
*/
file = fget_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
dentry = file->f_path.dentry;
retval = -ENOTDIR;
if (!S_ISDIR(dentry->d_inode->i_mode))
goto fput_fail;
/* */
retval = file_permission(file, MAY_EXEC);
if (retval)
goto fput_fail;
/* path*/
nd->path = file->f_path;
path_get(&file->f_path);
/* */
fput_light(file, fput_needed);
}
return 0;
fput_fail:
fput_light(file, fput_needed);
out_fail:
return retval;
}
2.2 실제 검색 작업
static int path_walk(const char *name, struct nameidata *nd)
{
current->total_link_count = 0;
return link_path_walk(name, nd);
}
/*
* Wrapper to retry pathname resolution whenever the underlying
* file system returns an ESTALE.
*
* Retry the whole path once, forcing real lookup requests
* instead of relying on the dcache.
*/
static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
{
struct path save = nd->path;
int result;
/* make sure the stuff we saved doesn't go away */
path_get(&save);/* path */
/* */
result = __link_path_walk(name, nd);
if (result == -ESTALE) {
/* nd->path had been dropped */
nd->path = save;
path_get(&nd->path);
nd->flags |= LOOKUP_REVAL;
result = __link_path_walk(name, nd);
}
path_put(&save);
return result;
}
/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
* the final dentry. We expect 'base' to be positive and a directory.
*
* Returns 0 and nd will have valid dentry and mnt on success.
* Returns error and drops reference to input namei data on failure.
*/
static int __link_path_walk(const char *name, struct nameidata *nd)
{
struct path next;
struct inode *inode;
int err;
unsigned int lookup_flags = nd->flags;
/* '/' , , nd
path , , '/'
‘/’ , '/' ,
, , */
while (*name=='/')
name++;
if (!*name)
goto return_reval;
/* path_walk ,
, inode , */
inode = nd->path.dentry->d_inode;
/* task_struct link_count.
( ) ,
, ,
, 。 ,
, path_walk。 , path_walk ,
0, path_walk ,
LOOKUP_FOLLOW 1.*/
if (nd->depth)
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
/* At this point we know we have a real path component. */
for(;;) {
unsigned long hash;
struct qstr this;
unsigned int c;
nd->flags |= LOOKUP_CONTINUE;
/* ,
( ) 。 ,
, MAY_EXEC*/
err = exec_permission_lite(inode);
if (err)
break;
this.name = name;
c = *(const unsigned char *)name;
hash = init_name_hash();
do {
name++;
hash = partial_name_hash(c, hash);
c = *(const unsigned char *)name;
} while (c && (c != '/'));/* ‘/’ ,*/
this.len = name - (const char *) this.name;
this.hash = end_name_hash(hash);
/* remove trailing slashes? */
if (!c)/* '\0', */
goto last_component;/* */
/* '/'*/
while (*++name == '/');
/* ,
'/' , , shell
*/
if (!*name)
goto last_with_slashes;/* */
/* , , '/' */
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
* parent relationships.
*/
/* '.' ,
。 '.', , '..',
*/
if (this.name[0] == '.') switch (this.len) {
default:
break;
case 2:
if (this.name[1] != '.')
break;
follow_dotdot(nd);/* '..', */
inode = nd->path.dentry->d_inode;
/* fallthrough */
/*2 break , 1 ,
for */
case 1:
continue;
}
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
/* ,
*/
if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
&this);
if (err < 0)
break;
}
/* This does the actual lookups.. */
/* */
err = do_lookup(nd, &this, &next);
if (err)
break;
err = -ENOENT;
inode = next.dentry->d_inode;
if (!inode)
goto out_dput;
/* */
if (inode->i_op->follow_link) {
err = do_follow_link(&next, nd);
if (err)
goto return_err;
err = -ENOENT;
inode = nd->path.dentry->d_inode;
if (!inode)
break;
} else/* path nd */
path_to_nameidata(&next, nd);
err = -ENOTDIR;
if (!inode->i_op->lookup)
break;
continue;
/* here ends the main loop */
last_with_slashes:
lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
/* Clear LOOKUP_CONTINUE iff it was previously unset */
nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
if (lookup_flags & LOOKUP_PARENT)/* , */
goto lookup_parent;
if (this.name[0] == '.') switch (this.len) {
default:
break;
case 2:
if (this.name[1] != '.')
break;
follow_dotdot(nd);/* */
inode = nd->path.dentry->d_inode;
/* fallthrough */
case 1:
goto return_reval;
}
/* */
if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
&this);
if (err < 0)
break;
}/* , next */
err = do_lookup(nd, &this, &next);
if (err)
break;
inode = next.dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)/* */
&& inode && inode->i_op->follow_link) {
err = do_follow_link(&next, nd);
if (err)
goto return_err;
inode = nd->path.dentry->d_inode;
} else
/*path nd*/
path_to_nameidata(&next, nd);
err = -ENOENT;
if (!inode)
break;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
if (!inode->i_op->lookup)
break;
}
goto return_base;
lookup_parent:
nd->last = this;
nd->last_type = LAST_NORM;/* */
if (this.name[0] != '.')
goto return_base;
if (this.len == 1)
nd->last_type = LAST_DOT;
else if (this.len == 2 && this.name[1] == '.')
nd->last_type = LAST_DOTDOT;
else
goto return_base;
return_reval:
/*
* We bypassed the ordinary revalidation routines.
* We may need to check the cached dentry for staleness.
*/
if (nd->path.dentry && nd->path.dentry->d_sb &&
(nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
err = -ESTALE;
/* Note: we do not d_invalidate() */
if (!nd->path.dentry->d_op->d_revalidate(
nd->path.dentry, nd))
break;
}
return_base:
return 0;
out_dput:
path_put_conditional(&next, nd);
break;
}
path_put(&nd->path);
return_err:
return err;
}
2.2.1 처리 double dot
더 블 닷 이란 상위 디 렉 터 리 에 접근 하기 위 한 것 입 니 다.
static __always_inline void follow_dotdot(struct nameidata *nd)
{
set_root(nd);
while(1) {
struct vfsmount *parent;
struct dentry *old = nd->path.dentry;
/* ,
*/
if (nd->path.dentry == nd->root.dentry &&
nd->path.mnt == nd->root.mnt) {
break;
}
spin_lock(&dcache_lock);
/* 。
dentry ,
dentry , dentry
d_parent , */
if (nd->path.dentry != nd->path.mnt->mnt_root) {
nd->path.dentry = dget(nd->path.dentry->d_parent);/* , */
spin_unlock(&dcache_lock);
dput(old);/* */
break;
}
spin_unlock(&dcache_lock);
spin_lock(&vfsmount_lock);
/* ,
,
, vfsmount
, 。
, vfsmount ,
mnt_parent , mnt_mountpoint
dentry */
/* nd mnt mnt*/
parent = nd->path.mnt->mnt_parent;
/* vfsmount */
if (parent == nd->path.mnt) {
spin_unlock(&vfsmount_lock);
break;
}
/* */
mntget(parent);
/* */
nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
spin_unlock(&vfsmount_lock);
dput(old);
mntput(nd->path.mnt);
nd->path.mnt = parent;/* vfsmount */
}
follow_mount(&nd->path);/*mnt */
}
static void follow_mount(struct path *path)
{
while (d_mountpoint(path->dentry)) {/* */
/* mnt*/
struct vfsmount *mounted = lookup_mnt(path);
if (!mounted)/* mnt */
break;
/* */
dput(path->dentry);
mntput(path->mnt);
path->mnt = mounted;/* mnt path mnt*/
path->dentry = dget(mounted->mnt_root);/* , mnt path*/
}
}
2.2.2 실제 경로 검색 작업
/*
* It's more convoluted than I'd like it to be, but... it's still fairly
* small and for now I'd prefer to have fast path as straight as possible.
* It _is_ time-critical.
*/
static int do_lookup(struct nameidata *nd, struct qstr *name,
struct path *path)
{
struct vfsmount *mnt = nd->path.mnt;
/* dentry 。 hash dentry_hashtable
list_head , dentry
hash hash , hash
hash */
struct dentry *dentry = __d_lookup(nd->path.dentry, name);
if (!dentry)/* , */
goto need_lookup;
if (dentry->d_op && dentry->d_op->d_revalidate)
goto need_revalidate;
done:/* dentry*/
path->mnt = mnt;
path->dentry = dentry;
/* mnt, mnt mnt*/
__follow_mount(path);
return 0;
need_lookup:/* dentry */
/* ,
dentry hash */
dentry = real_lookup(nd->path.dentry, name, nd);
if (IS_ERR(dentry))
goto fail;
goto done;
need_revalidate:
dentry = do_revalidate(dentry, nd);
if (!dentry)
goto need_lookup;
if (IS_ERR(dentry))
goto fail;
goto done;
fail:
return PTR_ERR(dentry);
}
/*
* This is called when everything else fails, and we actually have
* to go to the low-level filesystem to find out what we should do..
*
* We get the directory semaphore, and after getting that we also
* make sure that nobody added the entry to the dcache in the meantime..
* SMP-safe
*/
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
{
struct dentry * result;
struct inode *dir = parent->d_inode;
mutex_lock(&dir->i_mutex);
/*
* First re-do the cached lookup just in case it was created
* while we waited for the directory semaphore..
*
* FIXME! This could use version numbering or similar to
* avoid unnecessary cache lookups.
*
* The "dcache_lock" is purely to protect the RCU list walker
* from concurrent renames at this point (we mustn't get false
* negatives from the RCU list walk here, unlike the optimistic
* fast walk).
*
* so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
*/
result = d_lookup(parent, name);
if (!result) {
struct dentry *dentry;
/* Don't create child dentry for a dead directory. */
result = ERR_PTR(-ENOENT);
if (IS_DEADDIR(dir))
goto out_unlock;
/* slab dentry */
dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM);
if (dentry) {
/* loopup */
result = dir->i_op->lookup(dir, dentry, nd);
if (result)
dput(dentry);
else
result = dentry;
}
out_unlock:
mutex_unlock(&dir->i_mutex);
return result;
}
/*
* Uhhuh! Nasty case: the cache was re-populated while
* we waited on the semaphore. Need to revalidate.
*/
mutex_unlock(&dir->i_mutex);
if (result->d_op && result->d_op->d_revalidate) {
result = do_revalidate(result, nd);
if (!result)
result = ERR_PTR(-ENOENT);
}
return result;
}
2.2.2.1 dentry 할당 및 초기 화
struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
{
struct dentry *dentry;
char *dname;
/* slab dentry*/
dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
if (name->len > DNAME_INLINE_LEN-1) {
dname = kmalloc(name->len + 1, GFP_KERNEL);
if (!dname) {
kmem_cache_free(dentry_cache, dentry);
return NULL;
}
} else {
dname = dentry->d_iname;
}
/* dentry */
dentry->d_name.name = dname;
dentry->d_name.len = name->len;
dentry->d_name.hash = name->hash;
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
dentry->d_mounted = 0;
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
if (parent) {
dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
} else {
INIT_LIST_HEAD(&dentry->d_u.d_child);
}
spin_lock(&dcache_lock);
if (parent)
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
dentry_stat.nr_dentry++;
spin_unlock(&dcache_lock);
return dentry;
}
위의 코드 에서 볼 수 있 듯 이 Liux 커 널 의 경로 검색 은 대체적으로 다음 과 같 습 니 다.
1. 조 회 를 초기 화하 고 nd 구조 가 조회 시작 부분 을 가리 키 는 파일 을 설정 합 니 다.
2. 출발점 에서 경 로 를 검색 합 니 다. 그 중에서 nd 는 검색 결 과 를 되 돌려 주 는 데 사 용 됩 니 다. 검색 과정 에서 경로 이름 에 따라 한 걸음 한 걸음 방문 해 야 합 니 다. 문자 '/' 의 처리, 상층 디 렉 터 리 에 접근 하 는 처리 (이 파일 시스템 을 초과 하 는 것 을 고려 해 야 합 니 다) 와 방문 한 dentry 는 메모리 에서 새로 할당 해 야 하 는 상황 이 존재 하지 않 습 니 다.
프로그램 이 돌아 온 후, 매개 변수 중의 nd 구 조 는 현재 검색 결과 정 보 를 저장 합 니 다. 대상 파일 이나 디 렉 터 리 의 dentry 구조 와 inode 구 조 를 포함 합 니 다.