Home > ext4 i2n
More actions

ext4 i2n

Tags:  

Make searching from inode to file path faster than now !

What we are doing now
accomplished

Idea of this project

If one wants to find all files' path of an inode number, he/she has to run 'find -i inode_number' to scan all the partition! It consumes much time and hurts hard disk very much.

ext4 i2n will record hardlinks information into inode, by help from this information, user space application can find all path of this inode 10 times faster than current method.

A new field is added to ext4 inode: i_lnblk.
- For regular file, it points to another block, if another files point to this inode, inode of parent directories of these files will be record to this block.
- For directory file, it records inode of its parent directory's inode number.

When create a directory by mkdir, or create a hardlink by ln, i_lnblk will be updated.

A new systemcall is added to kernel, called as sys_link2path(). User space application can call this function by an inode number.
- If inode number of regular file, return all inodes number of directories which contain directory entry point to the inode.
- If inode number of directory, return inode number of its parent directory.

By this syscall, user space application can get a list of inode number from lowest file to topest directory. User space application walks through the inodes list again and prints out all directory entry names --- by this means, we get a file path of a specified inode number.

Who does what

Kernel patch
mission: i_lnblk related, serve sys_link2path()
player: 方学能(Si), 吴桐(Max), 杨麟, 陈思行

User space program
mission: print out corresponded inode number by input inode number
player: 高山山,鲁燕山

Bash script for i2n
mission: print out all file paths by input inode number
player: 杨帆 刘之岳
唐禹铭, 梓軒, 陈健新

Wiki
mission: record funny stuffs
player: all team members

First day passed
 9:00 - 10:00
greeting and introduction to mentors, attendance, codefest games
reference projects show and team building
10:00 - 14:00
basic conception for ext4 filesystem and i2n idea
14:00 - 22:00
enjoy coding
22:00 - ??
tired, crazy, neurotic, excited, ..
Second day passed
10:00 - 12:00
debug kernel patch
13:00 - 15:00
debug user space utility and bash script
15:00 - 16:00
review demo
16:00 - 17:00
presentation for codefest project
17:00
event complished


Kernel patch
here is the patch based on 2.6.25-rc4, it's still buggy and bsic
diff -ru linux-2.6-orig/arch/x86/ia32/ia32entry.S linux-2.6-lnrec/arch/x86/ia32/ia32entry.S
--- linux-2.6-orig/arch/x86/ia32/ia32entry.S    2008-03-08 20:44:50.000000000 +0800
+++ linux-2.6-lnrec/arch/x86/ia32/ia32entry.S    2008-03-09 09:54:07.000000000 +0800
@@ -727,4 +727,5 @@
     .quad sys32_fallocate
     .quad compat_sys_timerfd_settime    /* 325 */
     .quad compat_sys_timerfd_gettime
+    .quad sys_link2path
 ia32_syscall_end:
diff -ru linux-2.6-orig/arch/x86/kernel/syscall_table_32.S linux-2.6-lnrec/arch/x86/kernel/syscall_table_32.S
--- linux-2.6-orig/arch/x86/kernel/syscall_table_32.S    2008-03-08 20:44:54.000000000 +0800
+++ linux-2.6-lnrec/arch/x86/kernel/syscall_table_32.S    2008-03-09 09:54:07.000000000 +0800
@@ -326,3 +326,4 @@
     .long sys_fallocate
     .long sys_timerfd_settime    /* 325 */
     .long sys_timerfd_gettime
+    .long sys_link2path
diff -ru linux-2.6-orig/fs/ext4/balloc.c linux-2.6-lnrec/fs/ext4/balloc.c
--- linux-2.6-orig/fs/ext4/balloc.c    2008-03-08 20:42:11.000000000 +0800
+++ linux-2.6-lnrec/fs/ext4/balloc.c    2008-03-09 09:54:06.000000000 +0800
@@ -1900,6 +1900,19 @@
     return ret;
 }
 
+ext4_fsblk_t ext4_new_lnblk(handle_t *handle, struct inode *inode, int *errp)
+{
+    struct super_block *sb = inode->i_sb;
+    ext4_fsblk_t ret;
+    ext4_fsblk_t goal = le32_to_cpu(
+                EXT4_SB(sb)->s_es->s_first_data_block) +
+                (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
+                EXT4_BLOCKS_PER_GROUP(sb);
+
+    ret = ext4_new_block(handle, inode, goal, errp);
+printk("%s: ret from ext4_new_block: %lu\n", __func__, ret);
+    return ret;
+}
 
 /**
  * ext4_count_free_blocks() -- count filesystem free blocks
diff -ru linux-2.6-orig/fs/ext4/inode.c linux-2.6-lnrec/fs/ext4/inode.c
--- linux-2.6-orig/fs/ext4/inode.c    2008-03-08 20:42:10.000000000 +0800
+++ linux-2.6-lnrec/fs/ext4/inode.c    2008-03-09 09:54:06.000000000 +0800
@@ -2825,6 +2825,8 @@
     } else
         ei->i_extra_isize = 0;
 
+    ei->i_lnblk = le64_to_cpu(raw_inode->i_lnblk);
+
     EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
     EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
     EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
@@ -3027,7 +3029,7 @@
             cpu_to_le32(inode->i_version >> 32);
         raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
     }
-
+    raw_inode->i_lnblk = cpu_to_le64(ei->i_lnblk);
 
     BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
     rc = ext4_journal_dirty_metadata(handle, bh);
@@ -3455,6 +3457,58 @@
 }
 #endif
 
+int ext4_get_inodelinks(struct super_block *sb, int ino, u64 *kbuf, int kbufsz)
+{
+    struct ext4_iloc iloc;
+    struct inode *inode;
+    struct ext4_inode_info *ei;
+    struct buffer_head *bh;
+    int i;
+
+    printk("enter %s\n", __func__);
+    inode = iget_locked(sb, ino);
+    if (!inode) {
+        printk("get inode for %d failed.\n", ino);
+        return -ENOMEM;
+    }
+
+    if (__ext4_get_inode_loc(inode, &iloc, 0) < 0) {
+        printk("failed to get inode loc.\n");
+        iget_failed(inode);
+        return -EFAULT;
+    }
+    ei = EXT4_I(inode);
+    printk("add of inode: 0x%p\n", inode);
+    printk("record blk: %lu\n", ei->i_lnblk);
+    // if it's a directory inode, ei->i_lnblk
+    // points to inode of parent directory
+    if (S_ISDIR(inode->i_mode)) {
+        kbuf[0] = cpu_to_le64(ei->i_lnblk);
+        unlock_new_inode(inode);
+        iput(inode);
+        brelse(iloc.bh);
+        return 1;
+    }
+
+    bh = ext4_bread(NULL, inode, ei->i_lnblk, 0, &i);
+    if (!bh) {
+        printk("read link record block falied.\n");
+        brelse(iloc.bh);
+        return -EIO;
+    }
+    printk("copy %d bytes to kbuf.\n", kbufsz);
+    memcpy(kbuf, bh->b_data, kbufsz);
+    for (i = 0; i < inode->i_nlink; i++)
+        printk("linkrec: %llx\n", le64_to_cpu(kbuf[i]));
+
+    unlock_new_inode(inode);
+    iput(inode);
+    brelse(iloc.bh);
+    brelse(bh);
+    return i;
+}
+
+
 int ext4_change_inode_journal_flag(struct inode *inode, int val)
 {
     journal_t *journal;
diff -ru linux-2.6-orig/fs/ext4/namei.c linux-2.6-lnrec/fs/ext4/namei.c
--- linux-2.6-orig/fs/ext4/namei.c    2008-03-08 20:42:11.000000000 +0800
+++ linux-2.6-lnrec/fs/ext4/namei.c    2008-03-09 09:54:06.000000000 +0800
@@ -1700,6 +1700,7 @@
     return err;
 }
 
+static int ext4_record_link(handle_t *handle, struct inode *dir,struct inode *inode);
 /*
  * By the time this is called, we already have created
  * the directory cache entry for the new file, but it
@@ -1732,6 +1733,10 @@
         inode->i_fop = &ext4_file_operations;
         ext4_set_aops(inode);
         err = ext4_add_nondir(handle, dentry, inode);
+        if ((!err) && ext4_record_link(handle, dir, inode)) {
+            printk("%s: call ext4_record_link failed.\n", __func__);
+            return -EFAULT;
+        }
     }
     ext4_journal_stop(handle);
     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
@@ -1774,6 +1779,16 @@
     return err;
 }
 
+static int ext4_record_parent(handle_t *handle, struct inode *dir, struct inode *inode)
+{
+    struct ext4_inode_info *ei;
+
+    ei = EXT4_I(inode);
+    ei->i_lnblk = dir->i_ino;
+    ext4_dirty_inode(inode);
+    return 0;
+}
+
 static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 {
     handle_t *handle;
@@ -1826,6 +1841,9 @@
     ext4_journal_dirty_metadata(handle, dir_block);
     brelse (dir_block);
     ext4_mark_inode_dirty(handle, inode);
+    err = ext4_record_parent(handle, dir, inode);
+    if (err)
+        goto out_clear_inode;
     err = ext4_add_entry (handle, dentry, inode);
     if (err) {
 out_clear_inode:
@@ -2230,6 +2248,81 @@
     return err;
 }
 
+static int ext4_record_link(handle_t *handle, struct inode *dir,struct inode *inode)
+{
+    struct ext4_inode_info *ei = EXT4_I(inode);
+    struct buffer_head *bh = NULL;
+    ext4_fsblk_t  link_blk;
+    u64 *lnrec_table;
+    int new = (inode->i_nlink ==1);
+    int i;
+    int ret;
+
+    printk("enter %s\n", __func__);
+    printk("addr of inode: 0x%p\n", inode);
+    if (inode->i_nlink > (inode->i_sb->s_blocksize / sizeof(u64))) {
+        printk("%s: too many links %d (max: %d)\n", __func__, inode->i_nlink,
+            (inode->i_sb->s_blocksize / sizeof(u64)));
+        ret = -EFAULT;
+        goto cleanup;   
+    }
+
+    if (new) {
+        printk("%s: create new record block\n", __func__);
+        /* allocate 1 block for link record block */
+        link_blk = ext4_new_lnblk(handle, inode, &ret);
+        if (ret)
+            goto cleanup;
+        ei->i_lnblk = link_blk;
+        printk("dirty inode\n");
+        ext4_dirty_inode(inode);
+    } else {
+        printk("%s: get existed record block\n", __func__);
+        link_blk = ei->i_lnblk;
+    }
+    printk("%s: record block is %d\n", __func__, link_blk);
+
+    bh = ext4_bread(handle, inode, link_blk, new, &ret);
+    if (!bh) {
+        printk("%s: can not get block from sb:%p blk:%d\n", __func__, inode->i_sb, link_blk);
+        ret = -EIO;
+        return ret;
+    }
+
+    ret = ext4_journal_get_write_access(handle, bh);
+    printk("%s: call ext4_journal_get_write_access\n", __func__);
+
+    if (ret) {
+        printk("%s: get journal access failed.\n", __func__);
+        goto cleanup;
+    }
+
+    lnrec_table = (u64 *)bh->b_data;
+    if (new) {
+        memset(lnrec_table, 0, inode->i_sb->s_blocksize);
+        lnrec_table[0] = cpu_to_le64(dir->i_ino);
+        printk("for new links, write %lu.\n", dir->i_ino);
+    } else {
+        lnrec_table[inode->i_nlink - 1] = cpu_to_le64(dir->i_ino);
+        printk("for existed links, write %lu to rec.\n", dir->i_ino);
+    }
+
+    for(i = 0; i < inode->i_nlink; i++)
+        printk("link record[%d]: %llx\n", i, le64_to_cpu(lnrec_table[i]));
+
+    ret = ext4_journal_dirty_metadata(handle, bh);
+    if (ret) {
+        printk("%s: falied to dirty metadata.\n", __func__);
+    }
+cleanup:
+    if (bh) {
+        brelse(bh);
+    }
+
+    return 0;
+}
+
+
 static int ext4_link (struct dentry * old_dentry,
         struct inode * dir, struct dentry *dentry)
 {
@@ -2248,8 +2341,9 @@
         return -ENOENT;
 
 retry:
+    /* 1 more block for hardlink record block */
     handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
-                    EXT4_INDEX_EXTRA_TRANS_BLOCKS);
+                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
     if (IS_ERR(handle))
         return PTR_ERR(handle);
 
@@ -2261,6 +2355,10 @@
     atomic_inc(&inode->i_count);
 
     err = ext4_add_nondir(handle, dentry, inode);
+    if ((!err) && S_ISREG(inode->i_mode)) {
+        printk("%s: this is a hard link, try to record it.\n", __func__);
+        err = ext4_record_link(handle, dir, inode);
+    }
     ext4_journal_stop(handle);
     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
         goto retry;
diff -ru linux-2.6-orig/fs/ext4/super.c linux-2.6-lnrec/fs/ext4/super.c
--- linux-2.6-orig/fs/ext4/super.c    2008-03-08 20:42:11.000000000 +0800
+++ linux-2.6-lnrec/fs/ext4/super.c    2008-03-09 09:54:06.000000000 +0800
@@ -758,6 +758,41 @@
     return 0;
 }
 
+int ext4_get_inodelinks(struct super_block *sb, int ino, u64 *kbuf, int kbufsz);
+static int ext4_get_links(struct super_block *sb, unsigned long ino, const char __user * buf, int bufsz)
+{
+    u64 *kbuf = NULL;
+    int i, count;
+
+    printk("enter %s\n", __func__);
+    kbuf = (u64 *)__get_free_page(GFP_KERNEL);
+    if (!kbuf) {
+        printk("%s: alloc memory failed.\n", __func__);
+        return -ENOMEM;
+    }
+    memset(kbuf, 0, PAGE_SIZE);
+    count = ext4_get_inodelinks(sb, ino, kbuf, PAGE_SIZE);
+    printk("count returned from ext4_get_inodelinks: %d\n", count);
+    if ((count <= 0) || (count >= PAGE_SIZE/sizeof(u64))) {
+        printk("count out of range (0, %d)\n", PAGE_SIZE/sizeof(u64));
+        free_page((unsigned long)kbuf);
+        return -EFAULT;
+    }
+    for (i = 0; i < count; i++) {
+        printk("convert link record %d.\n", i);
+        kbuf[i] = le64_to_cpu(kbuf[i]);
+    }
+    if (bufsz > PAGE_SIZE)
+        bufsz = PAGE_SIZE;
+    if (copy_to_user((void *)buf, (void *)kbuf, bufsz)) {
+        printk("%s: copy buffer to user space failed.\n", __func__);
+        free_page((unsigned long)kbuf);
+        return -EFAULT;
+    }
+    printk("leave %s\n", __func__);
+    return count;
+}
+
 
 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
         u64 ino, u32 generation)
@@ -865,6 +900,7 @@
     .quota_read    = ext4_quota_read,
     .quota_write    = ext4_quota_write,
 #endif
+    .get_links    = ext4_get_links,
 };
 
 static const struct export_operations ext4_export_ops = {
diff -ru linux-2.6-orig/fs/stat.c linux-2.6-lnrec/fs/stat.c
--- linux-2.6-orig/fs/stat.c    2008-03-08 20:42:12.000000000 +0800
+++ linux-2.6-lnrec/fs/stat.c    2008-03-09 09:54:06.000000000 +0800
@@ -324,6 +324,56 @@
     return sys_readlinkat(AT_FDCWD, path, buf, bufsiz);
 }
 
+asmlinkage long sys_link2path(const char __user *path, unsigned long ino, const char __user *buf, unsigned long bufsiz)
+{
+    struct nameidata nd;
+    char *kpath;
+    struct dentry * path_de;
+    struct super_block *sb;
+    int error;
+
+    printk("enter %s\n", __func__);   
+    printk("path: 0x%p, ino: 0x%x, buf: 0x%p, bufsiz: 0x%x\n", path, ino, buf, bufsiz);
+    if (bufsiz < (4<<10)) {
+        printk("bufsize small than 0x%x\n", 4<<10);
+        return -EINVAL;
+    }
+
+    kpath = (char *)kzalloc(PAGE_SIZE, GFP_KERNEL);
+    if (!kpath) {
+        printk("malloc for kpath failed.\n");
+        return -ENOMEM;
+    }
+    if(copy_from_user(kpath, path, PAGE_SIZE)) {
+        printk("copy path to kpath failed.\n");
+        kfree(kpath);
+        return -EFAULT;
+    }
+    printk("kpath: %s\n", kpath);
+    error = path_lookup(kpath, LOOKUP_FOLLOW, &nd);
+    if (error) {
+        printk("call path_lookup failed.\n");
+        kfree(kpath);
+        return -EINVAL;
+    }
+    printk("path_lookup done.\n");
+    path_de = nd.path.dentry;
+    printk("d_iname: %s\n", path_de->d_iname);
+    sb = path_de->d_sb;
+    if (!sb) {
+        printk("sb error\n");
+        kfree(kpath);
+        path_put(&nd.path);
+        return -EINVAL;
+    }
+   
+    error = sb->s_op->get_links(sb, ino, buf, bufsiz);
+    printk("leave %s\n", __func__);   
+    kfree(kpath);
+    path_put(&nd.path);
+    return error;
+}
+
 
 /* ---------- LFS-64 ----------- */
 #ifdef __ARCH_WANT_STAT64
diff -ru linux-2.6-orig/include/asm-x86/unistd_64.h linux-2.6-lnrec/include/asm-x86/unistd_64.h
--- linux-2.6-orig/include/asm-x86/unistd_64.h    2008-03-08 20:47:00.000000000 +0800
+++ linux-2.6-lnrec/include/asm-x86/unistd_64.h    2008-03-09 09:54:31.000000000 +0800
@@ -639,7 +639,8 @@
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime            287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-
+#define __NR_link2path                327
+__SYSCALL(__NR_link2path, sys_link2path)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff -ru linux-2.6-orig/include/linux/ext4_fs.h linux-2.6-lnrec/include/linux/ext4_fs.h
--- linux-2.6-orig/include/linux/ext4_fs.h    2008-03-08 20:47:40.000000000 +0800
+++ linux-2.6-lnrec/include/linux/ext4_fs.h    2008-03-09 09:55:02.000000000 +0800
@@ -382,6 +382,7 @@
     __le32  i_crtime;       /* File Creation time */
     __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
     __le32  i_version_hi;    /* high 32 bits for 64-bit version */
+    __le64    i_lnblk;
 };
 
 
@@ -963,6 +964,8 @@
             ext4_fsblk_t goal, int *errp);
 extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
             ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern ext4_fsblk_t ext4_new_lnblk (handle_t *handle, struct inode *inode,
+            int *errp);
 extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
             ext4_fsblk_t goal, unsigned long *count, int *errp);
 extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
diff -ru linux-2.6-orig/include/linux/ext4_fs_i.h linux-2.6-lnrec/include/linux/ext4_fs_i.h
--- linux-2.6-orig/include/linux/ext4_fs_i.h    2008-03-08 20:47:43.000000000 +0800
+++ linux-2.6-lnrec/include/linux/ext4_fs_i.h    2008-03-09 09:55:05.000000000 +0800
@@ -159,6 +159,9 @@
      */
     struct timespec i_crtime;
 
+    /* block id for ln records table block */
+    ext4_fsblk_t i_lnblk;
+
     /* mballoc */
     struct list_head i_prealloc_list;
     spinlock_t i_prealloc_lock;
diff -ru linux-2.6-orig/include/linux/fs.h linux-2.6-lnrec/include/linux/fs.h
--- linux-2.6-orig/include/linux/fs.h    2008-03-08 20:47:39.000000000 +0800
+++ linux-2.6-lnrec/include/linux/fs.h    2008-03-09 09:55:02.000000000 +0800
@@ -1268,8 +1268,10 @@
     ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
     ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
+    int (*get_links)(struct super_block *, unsigned long , const char __user *, int);
 };
 
+
 /*
  * Inode state bits.  Protected by inode_lock.
  *

User space utility code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *prog="i2p";

void usage()
{
    printf("usage:\n"
        "        %s path inode_nr\n", prog);
    exit(1);
}

#define SYS_I2P 327   
#if 0
int call64_link2path(char *path, unsigned long ino, char *buf, int bufsiz)
{
    unsigned int resultvar;
    unsigned long int __arg1 = (unsigned long)path;
    unsigned long int __arg2 = (unsigned long)ino;
    unsigned long int __arg3 = (unsigned long)buf;
    unsigned long int __arg4 = (unsigned long)bufsiz;

    register unsigned long int _a1 asm("rdi") = __arg1;
    register unsigned long int _a2 asm("rsi") = __arg2;
    register unsigned long int _a3 asm("rdx") = __arg3;
    register unsigned long int _a4 asm("r10") = __arg4;

    __asm__ __volatile__(
    "syscall\n\t"
    : "=a"(resultvar)
    : "0"(SYS_I2P), "r"(_a1), "r"(_a2), "r"(_a3), "r"(_a4)
    : "memory", "cc", "r11", "cx"
    );

    return resultvar;
}
#endif
int call32_link2path(char *path, unsigned long ino, char *buf, int bufsiz)
{
    unsigned int resultvar;

    __asm__ __volatile__(
    "movl    %1, %%eax\n\t"
    "int $0x80\n\t"
    : "=a" (resultvar)
    : "i" (SYS_I2P), "b"(path), "c"(ino), "d"(buf), "S"(bufsiz)
    : "memory", "cc");

    return resultvar;
}

int main(int argc, char *argv[])
{
    char path[4096] = {0,};
    unsigned long ino;
    unsigned long long buf[4096/sizeof(unsigned long)];
    int count, i;

    if (argc != 3)
        usage();

    memcpy(path, argv[1], strlen(argv[1]));
    ino = strtol(argv[2], NULL, 10);
    printf("path: %s(0x%p), inode number: 0x%lx\n", path, path, ino);
    count = call32_link2path(path, ino, (char *)buf, sizeof(buf));
    if (count < 0)
    {
        printf("execute error: %s\n", strerror(count));
        exit(1);
    }
    for (i = 0; i < count; i ++)
    {
        unsigned long long d_ino;
        d_ino = buf[i];
        printf("%llu\n", d_ino);
    }
    printf("\n");
    return 0;
}

The kernel patch and user space code is very basic. But it shows our idea on how to make it work. It's buggy and simple, anyway, it makes us enjoyed in 2 days codefest :-)



0 Comments  Show recent to old
Post a comment



 RSS of this page

Written by:   Version:   Last Edited By:   Modified