leava@server:~/linux $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- menuconfig
-*- Cryptographic API --->
Certificates for signature checking --->
(ca.pem) Additional X.509 keys for default system keyring
// 3832:ssize_t__generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t written = 0;
ssize_t err;
ssize_t status;
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err)
goto out;
err = file_update_time(file);
if (err)
goto out;
if (iocb->ki_flags & IOCB_DIRECT) {
loff_t pos, endbyte;
written = generic_file_direct_write(iocb, from);
/* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to * holes, for example. For DAX files, a buffered write will * not succeed (even if it did, DAX does not handle dirty * page-cache pages correctly). */if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
goto out;
status = generic_perform_write(file, from, pos = iocb->ki_pos);
/* * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were * direct-written, or the error code if that was zero. Note * that this differs from normal direct-io semantics, which * will return -EFOO even if some bytes were written. */if (unlikely(status < 0)) {
err = status;
goto out;
}
/* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */
endbyte = pos + status - 1;
err = filemap_write_and_wait_range(mapping, pos, endbyte);
if (err == 0) {
iocb->ki_pos = endbyte + 1;
written += status;
invalidate_mapping_pages(mapping,
pos >> PAGE_SHIFT,
endbyte >> PAGE_SHIFT);
} else {
/* * We don't know how much we wrote, so just return * the number of bytes which were direct-written */
}
} else {
written = generic_perform_write(file, from, iocb->ki_pos);
if (likely(written > 0))
iocb->ki_pos += written;
}
out:
current->backing_dev_info = NULL;
return written ? written : err;
}
// 1936:intfile_remove_privs(struct file *file)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = file_inode(file);
int kill;
int error = 0;
/* * Fast path for nothing security related. * As well for non-regular files, e.g. blkdev inodes. * For example, blkdev_write_iter() might get here * trying to remove privs which it is not allowed to. */if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return0;
kill = dentry_needs_remove_privs(dentry);
if (kill < 0)
return kill;
if (kill)
error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
if (!error)
inode_has_no_xattr(inode);
return error;
}
// 1785:staticintupdate_time(struct inode *inode, struct timespec64 *time, int flags)
{
if (inode->i_op->update_time)
return inode->i_op->update_time(inode, time, flags);
returngeneric_update_time(inode, time, flags);
}
// 2381:void__mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block *sb = inode->i_sb;
int dirtytime = 0;
trace_writeback_mark_inode_dirty(inode, flags);
if (flags & I_DIRTY_INODE) {
/* * Notify the filesystem about the inode being dirtied, so that * (if needed) it can update on-disk fields and journal the * inode. This is only needed when the inode itself is being * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not * for just I_DIRTY_PAGES or I_DIRTY_TIME. */trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode)
sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
trace_writeback_dirty_inode(inode, flags);
/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
flags &= ~I_DIRTY_TIME;
} else {
/* * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing. * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME * in one call to __mark_inode_dirty().) */
dirtytime = flags & I_DIRTY_TIME;
WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
}
/* * Paired with smp_mb() in __writeback_single_inode() for the * following lockless i_state test. See there for details. */smp_mb();
if (((inode->i_state & flags) == flags) ||
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;
spin_lock(&inode->i_lock);
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
goto out_unlock_inode;
if ((inode->i_state & flags) != flags) {
constint was_dirty = inode->i_state & I_DIRTY;
inode_attach_wb(inode, NULL);
/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */if (flags & I_DIRTY_INODE)
inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;
/* * If the inode is queued for writeback by flush worker, just * update its dirty state. Once the flush worker is done with * the inode it will place it on the appropriate superblock * list, based upon its state. */if (inode->i_state & I_SYNC_QUEUED)
goto out_unlock_inode;
/* * Only add valid (hashed) inodes to the superblock's * dirty list. Add blockdev inodes as well. */if (!S_ISBLK(inode->i_mode)) {
if (inode_unhashed(inode))
goto out_unlock_inode;
}
if (inode->i_state & I_FREEING)
goto out_unlock_inode;
/* * If the inode was already on b_dirty/b_io/b_more_io, don't * reposition it (that would break b_dirty time-ordering). */if (!was_dirty) {
struct bdi_writeback *wb;
struct list_head *dirty_list;
bool wakeup_bdi = false;
wb = locked_inode_to_wb_and_lock_list(inode);
inode->dirtied_when = jiffies;
if (dirtytime)
inode->dirtied_time_when = jiffies;
if (inode->i_state & I_DIRTY)
dirty_list = &wb->b_dirty;
else
dirty_list = &wb->b_dirty_time;
wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);
spin_unlock(&wb->list_lock);
trace_writeback_dirty_inode_enqueue(inode);
/* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread * to make sure background write-back happens * later. */if (wakeup_bdi &&
(wb->bdi->capabilities & BDI_CAP_WRITEBACK))
wb_wakeup_delayed(wb);
return;
}
}
out_unlock_inode:
spin_unlock(&inode->i_lock);
}
// 2476:
wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);
spin_unlock(&wb->list_lock);
trace_writeback_dirty_inode_enqueue(inode);
/* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread * to make sure background write-back happens * later. */if (wakeup_bdi &&
(wb->bdi->capabilities & BDI_CAP_WRITEBACK))
wb_wakeup_delayed(wb);
return;
// 3733:ssize_tgeneric_perform_write(struct file *file,
struct iov_iter *i, loff_t pos)
{
struct address_space *mapping = file->f_mapping;
conststruct address_space_operations *a_ops = mapping->a_ops;
long status = 0;
ssize_t written = 0;
unsignedint flags = 0;
do {
struct page *page;
unsignedlong offset; /* Offset into pagecache page */unsignedlong bytes; /* Bytes to write to page */size_t copied; /* Bytes copied from user */void *fsdata;
offset = (pos & (PAGE_SIZE - 1));
bytes = min_t(unsignedlong, PAGE_SIZE - offset,
iov_iter_count(i));
again:
/* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. */if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}
if (fatal_signal_pending(current)) {
status = -EINTR;
break;
}
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
if (unlikely(status < 0))
break;
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
copied = copy_page_from_iter_atomic(page, offset, bytes, i);
flush_dcache_page(page);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
if (unlikely(status != copied)) {
iov_iter_revert(i, copied - max(status, 0L));
if (unlikely(status < 0))
break;
}
cond_resched();
if (unlikely(status == 0)) {
/* * A short copy made ->write_end() reject the * thing entirely. Might be memory poisoning * halfway through, might be a race with munmap, * might be severe memory pressure. */if (copied)
bytes = copied;
goto again;
}
pos += status;
written += status;
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
return written ? written : status;
}
// 3754:/* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * Not only is this an optimisation, but it is also required * to check that the address is actually valid, when atomic * usercopies are used, below. */if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}
// 260:staticinlineunsignedlong__phys_to_virt(phys_addr_t x)
{
unsignedlong t;
/* * 'unsigned long' cast discard upper word when * phys_addr_t is 64 bit, and makes sure that inline * assembler expression receives 32 bit argument * in place where 'r' 32 bit operand is expected. */__pv_stub((unsignedlong) x, t, "sub");
return t;
}
// 296:/* * Ensure cache coherency between kernel mapping and userspace mapping * of this page. * * We have three cases to consider: * - VIPT non-aliasing cache: fully coherent so nothing required. * - VIVT: fully aliasing, so we need to handle every alias in our * current VM view. * - VIPT aliasing: need to handle one alias in our current VM view. * * If we need to handle aliasing: * If the page only exists in the page cache and there are no user * space mappings, we can be lazy and remember that we may have dirty * kernel cache lines for later. Otherwise, we assume we have * aliasing mappings. * * Note that we disable the lazy flush for SMP configurations where * the cache maintenance operations are not automatically broadcasted. */voidflush_dcache_page(struct page *page)
{
struct address_space *mapping;
/* * The zero page is never written to, so never has any dirty * cache lines, and therefore never needs to be flushed. */if (page == ZERO_PAGE(0))
return;
if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) {
if (test_bit(PG_dcache_clean, &page->flags))
clear_bit(PG_dcache_clean, &page->flags);
return;
}
mapping = page_mapping_file(page);
if (!cache_ops_need_broadcast() &&
mapping && !page_mapcount(page))
clear_bit(PG_dcache_clean, &page->flags);
else {
__flush_dcache_page(mapping, page);
if (mapping && cache_is_vivt())
__flush_dcache_aliases(mapping, page);
elseif (mapping)
__flush_icache_all();
set_bit(PG_dcache_clean, &page->flags);
}
}
EXPORT_SYMBOL(flush_dcache_page);
// 3790:if (unlikely(status == 0)) {
/* * A short copy made ->write_end() reject the * thing entirely. Might be memory poisoning * halfway through, might be a race with munmap, * might be severe memory pressure. */if (copied)
bytes = copied;
goto again;
}
pos += status;
written += status;
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
return written ? written : status;
leava@server:~ $ cd linux
leava@server:~/linux $ KERNEL=kernel7l
leava@server:~/linux $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- bcm2711_defconfig
コンフィグを修正する (fs-verityとdm-verityの有効化)
leava@server:~/linux $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- menuconfig
File systems --->
[*] FS Verity (read-only file-based authenticity protection)
[*] FS Verity builtin signature support
Device Drivers --->
[*] Multiple devices driver support (RAID and LVM) --->
<*> Device mapper support
[*] Device mapper debugging support
<*> Verity target support
[*] Verity data device root hash signature verification support
root@server:/# apt-get install libssl-dev fio
root@server:/# wget https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git/snapshot/fsverity-utils-1.2.tar.gz
root@server:/# tar xf fsverity-utils-1.2.tar.gz
root@server:/# cd fsverity-utils-1.2
root@server:/fsverity-utils-1.2# make && make install
Raspbian GNU/Linux 10 raspberrypi ttyS0
raspberrypi login: root
Password:
Last login: Thu Dec 3 16:27:35 GMT 2020 on ttyS0
Linux raspberrypi 5.10.3-v7l+ #1 SMP Mon Dec 28 05:53:15 UTC 2020 armv7l
The programs included with the Debian GNU/Linux system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.
Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent
permitted by applicable law.
SSH is enabled and the default password for the 'pi' user has not been changed.
This is a security risk - please login as the 'pi' user and type 'passwd' to set a new password.
Wi-Fi is currently blocked by rfkill.
Use raspi-config to set the country before use.
root@raspberrypi:~#
おわりに
本記事ではRaspberry Pi 用カーネルのLinux Kernel 5.10を自前でビルドして、ネットワークブートを利用して起動する方法を記載する。
次回は、作成した環境でdm-verityとfs-verityの使い方について確認する。
// 366:intrw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
if (unlikely((ssize_t) count < 0))
return -EINVAL;
/* * ranged mandatory locking does not apply to streams - it makes sense * only for files where position has a meaning. */if (ppos) {
loff_t pos = *ppos;
if (unlikely(pos < 0)) {
if (!unsigned_offsets(file))
return -EINVAL;
if (count >= -pos) /* both values are in 0..LLONG_MAX */return -EOVERFLOW;
} elseif (unlikely((loff_t) (pos + count) < 0)) {
if (!unsigned_offsets(file))
return -EINVAL;
}
}
returnsecurity_file_permission(file,
read_write == READ ? MAY_READ : MAY_WRITE);
}
leava@ubuntu-bionic:~$ wget https://buildroot.org/downloads/buildroot-2020.02.8.tar.gz
leava@ubuntu-bionic:~$ tar zxvf buildroot-2020.02.8.tar.gz && cd buildroot-2020.02.8
leava@ubuntu-bionic:~/busybox$ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi-
leava@ubuntu-bionic:~/busybox$ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- install
leava@ubuntu-bionic:~/busybox/_install$ cat <<EOF > init
#!/bin/busybox sh
echo "Mounting Proc and Sysfs"
# Mount the /proc and /sys filesystems.
mount -t devtmpfs devtempfs /dev
mount -t proc none /proc
mount -t sysfs none /sys
# Mount the root filesystem
mount -t ext4 /dev/mmcblk0 /mnt/newroot
# Switch mount point
mount -n -o move /sys /mnt/newroot/sys
mount -n -o move /proc /mnt/newroot/proc
mount -n -o move /dev /mnt/newroot/dev
# Execute new mount rootfilesystem
exec switch_root -c /dev/console /mnt/newroot /sbin/init
EOF
=> bootz 0x62000000 0x63008000 0x63000000
Kernel image @ 0x62000000 [ 0x000000 - 0x46a478 ]
## Loading init Ramdisk from Legacy Image at 63008000 ...
Image Name:
Image Type: ARM Linux RAMDisk Image (gzip compressed)
Data Size: 1123310 Bytes = 1.1 MiB
Load Address: 00000000
Entry Point: 00000000
Verifying Checksum ... OK
## Flattened Device Tree blob at 63000000
Booting using the fdt blob at 0x63000000
Loading Ramdisk to 6fd65000, end 6fe773ee ... OK
Loading Device Tree to 6fd5e000, end 6fd6473e ... OK
Starting kernel ...
...
buildroot login: # rootでログイン可能
#
作成ファイル置き場
#!/bin/busybox shecho"Mounting Proc and Sysfs"# Mount the /proc and /sys filesystems.
mount -t devtmpfs devtempfs /dev
mount -t proc none /proc
mount -t sysfs none /sys
# Mount the root filesystem
mount -t ext4 /dev/mmcblk0 /mnt/newroot
# Switch mount point
mount -n-o move /sys /mnt/newroot/sys
mount -n-o move /proc /mnt/newroot/proc
mount -n-o move /dev /mnt/newroot/dev
# Execute new mount rootfilesystemexec switch_root -c /dev/console /mnt/newroot /sbin/init