leava@server:~/linux $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- menuconfig
-*- Cryptographic API --->
Certificates for signature checking --->
(ca.pem) Additional X.509 keys for default system keyring
// 3393:ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space * mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t written = 0;
ssize_t err;
ssize_t status;
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err)
goto out;
err = file_update_time(file);
if (err)
goto out;
if (iocb->ki_flags & IOCB_DIRECT) {
loff_t pos, endbyte;
written = generic_file_direct_write(iocb, from);
/* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to * holes, for example. For DAX files, a buffered write will * not succeed (even if it did, DAX does not handle dirty * page-cache pages correctly). */if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
goto out;
status = generic_perform_write(file, from, pos = iocb->ki_pos);
/* * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were * direct-written, or the error code if that was zero. Note * that this differs from normal direct-io semantics, which * will return -EFOO even if some bytes were written. */if (unlikely(status < 0)) {
err = status;
goto out;
}
/* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */
endbyte = pos + status - 1;
err = filemap_write_and_wait_range(mapping, pos, endbyte);
if (err == 0) {
iocb->ki_pos = endbyte + 1;
written += status;
invalidate_mapping_pages(mapping,
pos >> PAGE_SHIFT,
endbyte >> PAGE_SHIFT);
} else {
/* * We don't know how much we wrote, so just return * the number of bytes which were direct-written */
}
} else {
written = generic_perform_write(file, from, iocb->ki_pos);
if (likely(written > 0))
iocb->ki_pos += written;
}
out:
current->backing_dev_info = NULL;
return written ? written : err;
}
// 1923:int file_remove_privs(struct file *file)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = file_inode(file);
int kill;
int error = 0;
/* * Fast path for nothing security related. * As well for non-regular files, e.g. blkdev inodes. * For example, blkdev_write_iter() might get here * trying to remove privs which it is not allowed to. */if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return0;
kill = dentry_needs_remove_privs(dentry);
if (kill < 0)
return kill;
if (kill)
error = __remove_privs(dentry, kill);
if (!error)
inode_has_no_xattr(inode);
return error;
}
// 2245:void __mark_inode_dirty(struct inode *inode, int flags)
{
/* * Paired with smp_mb() in __writeback_single_inode() for the * following lockless i_state test. See there for details. */
smp_mb();
spin_lock(&inode->i_lock);
inode->i_state |= flags;
/* * If the inode was already on b_dirty/b_io/b_more_io, don't * reposition it (that would break b_dirty time-ordering). */struct bdi_writeback *wb;
struct list_head *dirty_list;
bool wakeup_bdi = false;
wb = locked_inode_to_wb_and_lock_list(inode);
inode->dirtied_when = jiffies;
inode->dirtied_time_when = jiffies;
if (inode->i_state & I_DIRTY)
dirty_list = &wb->b_dirty;
else
dirty_list = &wb->b_dirty_time;
wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);
spin_unlock(&wb->list_lock);
/* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread * to make sure background write-back happens * later. */if (wakeup_bdi &&
(wb->bdi->capabilities & BDI_CAP_WRITEBACK))
wb_wakeup_delayed(wb);
return;
}
// 3288:ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, loff_t pos)
{
struct address_space *mapping = file->f_mapping;
conststruct address_space_operations *a_ops = mapping->a_ops;
long status = 0;
ssize_t written = 0;
unsignedint flags = 0;
do {
struct page *page;
unsignedlong offset; /* Offset into pagecache page */unsignedlong bytes; /* Bytes to write to page */size_t copied; /* Bytes copied from user */void *fsdata;
offset = (pos & (PAGE_SIZE - 1));
bytes = min_t(unsignedlong, PAGE_SIZE - offset,
iov_iter_count(i));
again:
/* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * Not only is this an optimisation, but it is also required * to check that the address is actually valid, when atomic * usercopies are used, below. */if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}
if (fatal_signal_pending(current)) {
status = -EINTR;
break;
}
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
if (unlikely(status < 0))
break;
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
flush_dcache_page(page);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
if (unlikely(status < 0))
break;
copied = status;
cond_resched();
iov_iter_advance(i, copied);
if (unlikely(copied == 0)) {
/* * If we were unable to copy any data at all, we must * fall back to a single segment length write. * * If we didn't fallback here, we could livelock * because not all segments in the iov can be copied at * once without a pagefault. */
bytes = min_t(unsignedlong, PAGE_SIZE - offset,
iov_iter_single_seg_count(i));
goto again;
}
pos += copied;
written += copied;
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
return written ? written : status;
}
EXPORT_SYMBOL(generic_perform_write);
// 3309:/* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * Not only is this an optimisation, but it is also required * to check that the address is actually valid, when atomic * usercopies are used, below. */if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}
// 534:/* * Might pages of this file have been modified in userspace? * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * * If i_mmap_writable is negative, no new writable mappings are allowed. You * can only deny writable mappings, if none exists right now. */staticinlineint mapping_writably_mapped(struct address_space *mapping)
{
return atomic_read(&mapping->i_mmap_writable) > 0;
}
// 296:/* * Ensure cache coherency between kernel mapping and userspace mapping * of this page. * * We have three cases to consider: * - VIPT non-aliasing cache: fully coherent so nothing required. * - VIVT: fully aliasing, so we need to handle every alias in our * current VM view. * - VIPT aliasing: need to handle one alias in our current VM view. * * If we need to handle aliasing: * If the page only exists in the page cache and there are no user * space mappings, we can be lazy and remember that we may have dirty * kernel cache lines for later. Otherwise, we assume we have * aliasing mappings. * * Note that we disable the lazy flush for SMP configurations where * the cache maintenance operations are not automatically broadcasted. */void flush_dcache_page(struct page *page)
{
struct address_space *mapping;
/* * The zero page is never written to, so never has any dirty * cache lines, and therefore never needs to be flushed. */if (page == ZERO_PAGE(0))
return;
if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) {
if (test_bit(PG_dcache_clean, &page->flags))
clear_bit(PG_dcache_clean, &page->flags);
return;
}
mapping = page_mapping_file(page);
if (!cache_ops_need_broadcast() &&
mapping && !page_mapcount(page))
clear_bit(PG_dcache_clean, &page->flags);
else {
__flush_dcache_page(mapping, page);
if (mapping && cache_is_vivt())
__flush_dcache_aliases(mapping, page);
elseif (mapping)
__flush_icache_all();
set_bit(PG_dcache_clean, &page->flags);
}
}
EXPORT_SYMBOL(flush_dcache_page);
leava@server:~ $ cd linux
leava@server:~/linux $ KERNEL=kernel7l
leava@server:~/linux $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- bcm2711_defconfig
コンフィグを修正する (fs-verityとdm-verityの有効化)
leava@server:~/linux $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- menuconfig
File systems --->
[*] FS Verity (read-only file-based authenticity protection)
[*] FS Verity builtin signature support
Device Drivers --->
[*] Multiple devices driver support (RAID and LVM) --->
<*> Device mapper support
[*] Device mapper debugging support
<*> Verity target support
[*] Verity data device root hash signature verification support
root@server:/# apt-get install libssl-dev fio
root@server:/# wget https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git/snapshot/fsverity-utils-1.2.tar.gz
root@server:/# tar xf fsverity-utils-1.2.tar.gz
root@server:/# cd fsverity-utils-1.2
root@server:/fsverity-utils-1.2# make && make install
Raspbian GNU/Linux 10 raspberrypi ttyS0
raspberrypi login: root
Password:
Last login: Thu Dec 3 16:27:35 GMT 2020 on ttyS0
Linux raspberrypi 5.10.3-v7l+ #1 SMP Mon Dec 28 05:53:15 UTC 2020 armv7l
The programs included with the Debian GNU/Linux system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.
Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent
permitted by applicable law.
SSH is enabled and the default password for the 'pi' user has not been changed.
This is a security risk - please login as the 'pi' user and type 'passwd' to set a new password.
Wi-Fi is currently blocked by rfkill.
Use raspi-config to set the country before use.
root@raspberrypi:~#
おわりに
本記事ではRaspberry Pi 用カーネルのLinux Kernel 5.10を自前でビルドして、ネットワークブートを利用して起動する方法を記載する。
次回は、作成した環境でdm-verityとfs-verityの使い方について確認する。
leava@ubuntu-bionic:~$ wget https://buildroot.org/downloads/buildroot-2020.02.8.tar.gz
leava@ubuntu-bionic:~$ tar zxvf buildroot-2020.02.8.tar.gz && cd buildroot-2020.02.8
leava@ubuntu-bionic:~/busybox$ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi-
leava@ubuntu-bionic:~/busybox$ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- install
leava@ubuntu-bionic:~/busybox/_install$ cat <<EOF > init
#!/bin/busybox sh
echo "Mounting Proc and Sysfs"
# Mount the /proc and /sys filesystems.
mount -t devtmpfs devtempfs /dev
mount -t proc none /proc
mount -t sysfs none /sys
# Mount the root filesystem
mount -t ext4 /dev/mmcblk0 /mnt/newroot
# Switch mount point
mount -n -o move /sys /mnt/newroot/sys
mount -n -o move /proc /mnt/newroot/proc
mount -n -o move /dev /mnt/newroot/dev
# Execute new mount rootfilesystem
exec switch_root -c /dev/console /mnt/newroot /sbin/init
EOF
=> bootz 0x62000000 0x63008000 0x63000000
Kernel image @ 0x62000000 [ 0x000000 - 0x46a478 ]
## Loading init Ramdisk from Legacy Image at 63008000 ...
Image Name:
Image Type: ARM Linux RAMDisk Image (gzip compressed)
Data Size: 1123310 Bytes = 1.1 MiB
Load Address: 00000000
Entry Point: 00000000
Verifying Checksum ... OK
## Flattened Device Tree blob at 63000000
Booting using the fdt blob at 0x63000000
Loading Ramdisk to 6fd65000, end 6fe773ee ... OK
Loading Device Tree to 6fd5e000, end 6fd6473e ... OK
Starting kernel ...
...
buildroot login: # rootでログイン可能
#
作成ファイル置き場
#!/bin/busybox shecho"Mounting Proc and Sysfs"# Mount the /proc and /sys filesystems.
mount -t devtmpfs devtempfs /dev
mount -t proc none /proc
mount -t sysfs none /sys
# Mount the root filesystem
mount -t ext4 /dev/mmcblk0 /mnt/newroot
# Switch mount point
mount -n-o move /sys /mnt/newroot/sys
mount -n-o move /proc /mnt/newroot/proc
mount -n-o move /dev /mnt/newroot/dev
# Execute new mount rootfilesystemexec switch_root -c /dev/console /mnt/newroot /sbin/init
4 To look up the file structure given an fd, a reader must use either fcheck() or fcheck_files() APIs.
These take care of barrier requirements due to lock-free lookup.