Skip to content

Commit

Permalink
[XFS] Concurrent Multi-File Data Streams
Browse files Browse the repository at this point in the history
In media spaces, video is often stored in a frame-per-file format. When
dealing with uncompressed realtime HD video streams in this format, it is
crucial that files do not get fragmented and that multiple files a placed
contiguously on disk.

When multiple streams are being ingested and played out at the same time,
it is critical that the filesystem does not cross the streams and
interleave them together as this creates seek and readahead cache miss
latency and prevents both ingest and playout from meeting frame rate
targets.

This patch set creates a "stream of files" concept into the allocator to
place all the data from a single stream contiguously on disk so that RAID
array readahead can be used effectively. Each additional stream gets
placed in different allocation groups within the filesystem, thereby
ensuring that we don't cross any streams. When an AG fills up, we select a
new AG for the stream that is not in use.

The core of the functionality is the stream tracking - each inode that we
create in a directory needs to be associated with the directories' stream.
Hence every time we create a file, we look up the directories' stream
object and associate the new file with that object.

Once we have a stream object for a file, we use the AG that the stream
object point to for allocations. If we can't allocate in that AG (e.g. it
is full) we move the entire stream to another AG. Other inodes in the same
stream are moved to the new AG on their next allocation (i.e. lazy
update).

Stream objects are kept in a cache and hold a reference on the inode.
Hence the inode cannot be reclaimed while there is an outstanding stream
reference. This means that on unlink we need to remove the stream
association and we also need to flush all the associations on certain
events that want to reclaim all unreferenced inodes (e.g. filesystem
freeze).

SGI-PV: 964469
SGI-Modid: xfs-linux-melb:xfs-kern:29096a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Donald Douwsma <donaldd@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
Signed-off-by: Vlad Apostolov <vapo@sgi.com>
  • Loading branch information
David Chinner authored and Tim Shimmin committed Jul 14, 2007
1 parent 0892ccd commit 2a82b8b
Show file tree
Hide file tree
Showing 21 changed files with 1,730 additions and 12 deletions.
2 changes: 2 additions & 0 deletions fs/xfs/Makefile-linux-2.6
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \
xfs_dir2_sf.o \
xfs_error.o \
xfs_extfree_item.o \
xfs_filestream.o \
xfs_fsops.o \
xfs_ialloc.o \
xfs_ialloc_btree.o \
Expand All @@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \
xfs_log.o \
xfs_log_recover.o \
xfs_mount.o \
xfs_mru_cache.o \
xfs_rename.o \
xfs_trans.o \
xfs_trans_ail.o \
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/linux-2.6/xfs_globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ xfs_param_t xfs_params = {
.inherit_nosym = { 0, 0, 1 },
.rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 50, 3600*100},
};

/*
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/linux-2.6/xfs_linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
#define xfs_rotorstep xfs_params.rotorstep.val
#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val

#define current_cpu() (raw_smp_processor_id())
#define current_pid() (current->pid)
Expand Down
11 changes: 11 additions & 0 deletions fs/xfs/linux-2.6/xfs_sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,17 @@ static ctl_table xfs_table[] = {
.extra1 = &xfs_params.inherit_nodfrg.min,
.extra2 = &xfs_params.inherit_nodfrg.max
},
{
.ctl_name = XFS_FILESTREAM_TIMER,
.procname = "filestream_centisecs",
.data = &xfs_params.fstrm_timer.val,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &xfs_params.fstrm_timer.min,
.extra2 = &xfs_params.fstrm_timer.max,
},
/* please keep this the last entry */
#ifdef CONFIG_PROC_FS
{
Expand Down
2 changes: 2 additions & 0 deletions fs/xfs/linux-2.6/xfs_sysctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ typedef struct xfs_param {
xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */
} xfs_param_t;

/*
Expand Down Expand Up @@ -86,6 +87,7 @@ enum {
XFS_INHERIT_NOSYM = 19,
XFS_ROTORSTEP = 20,
XFS_INHERIT_NODFRG = 21,
XFS_FILESTREAM_TIMER = 22,
};

extern xfs_param_t xfs_params;
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/xfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#define XFS_RW_TRACE 1
#define XFS_BUF_TRACE 1
#define XFS_VNODE_TRACE 1
#define XFS_FILESTREAMS_TRACE 1
#endif

#include <linux-2.6/xfs_linux.h>
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/xfs_ag.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ typedef struct xfs_perag
lock_t pagb_lock; /* lock for pagb_list */
#endif
xfs_perag_busy_t *pagb_list; /* unstable blocks */
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
} xfs_perag_t;

#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
Expand Down
69 changes: 62 additions & 7 deletions fs/xfs/xfs_bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_buf_item.h"
#include "xfs_filestream.h"


#ifdef DEBUG
Expand Down Expand Up @@ -2725,9 +2726,15 @@ xfs_bmap_btalloc(
}
nullfb = ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
if (nullfb)
ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
else
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
ap->rval = ap->firstblock;

xfs_bmap_adjacent(ap);
Expand All @@ -2751,13 +2758,22 @@ xfs_bmap_btalloc(
args.firstblock = ap->firstblock;
blen = 0;
if (nullfb) {
args.type = XFS_ALLOCTYPE_START_BNO;
if (ap->userdata && xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_NEAR_BNO;
else
args.type = XFS_ALLOCTYPE_START_BNO;
args.total = ap->total;

/*
* Find the longest available space.
* We're going to try for the whole allocation at once.
* Search for an allocation group with a single extent
* large enough for the request.
*
* If one isn't found, then adjust the minimum allocation
* size to the largest space found.
*/
startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
if (startag == NULLAGNUMBER)
startag = ag = 0;
notinit = 0;
down_read(&mp->m_peraglock);
while (blen < ap->alen) {
Expand All @@ -2783,6 +2799,35 @@ xfs_bmap_btalloc(
blen = longest;
} else
notinit = 1;

if (xfs_inode_is_filestream(ap->ip)) {
if (blen >= ap->alen)
break;

if (ap->userdata) {
/*
* If startag is an invalid AG, we've
* come here once before and
* xfs_filestream_new_ag picked the
* best currently available.
*
* Don't continue looping, since we
* could loop forever.
*/
if (startag == NULLAGNUMBER)
break;

error = xfs_filestream_new_ag(ap, &ag);
if (error) {
up_read(&mp->m_peraglock);
return error;
}

/* loop again to set 'blen'*/
startag = NULLAGNUMBER;
continue;
}
}
if (++ag == mp->m_sb.sb_agcount)
ag = 0;
if (ag == startag)
Expand All @@ -2807,8 +2852,18 @@ xfs_bmap_btalloc(
*/
else
args.minlen = ap->alen;

/*
* set the failure fallback case to look in the selected
* AG as the stream may have moved.
*/
if (xfs_inode_is_filestream(ap->ip))
ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
} else if (ap->low) {
args.type = XFS_ALLOCTYPE_START_BNO;
if (xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_FIRST_AG;
else
args.type = XFS_ALLOCTYPE_START_BNO;
args.total = args.minlen = ap->minlen;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
Expand Down
2 changes: 2 additions & 0 deletions fs/xfs/xfs_clnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,7 @@ struct xfs_mount_args {
*/
#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
* I/O size in stat(2) */
#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
* allocator */

#endif /* __XFS_CLNT_H__ */
4 changes: 3 additions & 1 deletion fs/xfs/xfs_dinode.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_EXTSIZE_BIT 11 /* inode extent size allocator hint */
#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
Expand All @@ -271,12 +272,13 @@ typedef enum xfs_dinode_fmt
#define XFS_DIFLAG_EXTSIZE (1 << XFS_DIFLAG_EXTSIZE_BIT)
#define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
#define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT)
#define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT)

#define XFS_DIFLAG_ANY \
(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG)
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)

#endif /* __XFS_DINODE_H__ */
Loading

0 comments on commit 2a82b8b

Please sign in to comment.