Skip to content

Commit

Permalink
get rid of NR_OPEN and introduce a sysctl_nr_open
Browse files Browse the repository at this point in the history
NR_OPEN (historically set to 1024*1024) actually forbids processes to open
more than 1024*1024 handles.

Unfortunatly some production servers hit the not so 'ridiculously high
value' of 1024*1024 file descriptors per process.

Changing NR_OPEN is not considered safe because of vmalloc space potential
exhaust.

This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to
1024*1024, so that admins can decide to change this limit if their workload
needs it.

[akpm@linux-foundation.org: export it for sparc64]
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Eric Dumazet authored and Linus Torvalds committed Feb 6, 2008
1 parent 774ed22 commit 9cfe015
Show file tree
Hide file tree
Showing 11 changed files with 41 additions and 10 deletions.
8 changes: 8 additions & 0 deletions Documentation/filesystems/proc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,14 @@ nr_inodes
Denotes the number of inodes the system has allocated. This number will
grow and shrink dynamically.

nr_open
-------

Denotes the maximum number of file-handles a process can
allocate. Default value is 1024*1024 (1048576) which should be
enough for most machines. Actual limit depends on RLIMIT_NOFILE
resource limit.

nr_free_inodes
--------------

Expand Down
10 changes: 10 additions & 0 deletions Documentation/sysctl/fs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
- inode-max
- inode-nr
- inode-state
- nr_open
- overflowuid
- overflowgid
- suid_dumpable
Expand Down Expand Up @@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.

==============================================================

nr_open:

This denotes the maximum number of file-handles a process can
allocate. Default value is 1024*1024 (1048576) which should be
enough for most machines. Actual limit depends on RLIMIT_NOFILE
resource limit.

==============================================================

inode-max, inode-nr & inode-state:

As with file handles, the kernel allocates the inode structures
Expand Down
2 changes: 1 addition & 1 deletion arch/alpha/kernel/osf_sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ sys_getpagesize(void)
asmlinkage unsigned long
sys_getdtablesize(void)
{
return NR_OPEN;
return sysctl_nr_open;
}

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/kernel/sysirix.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs)
retval = NGROUPS_MAX;
goto out;
case 5:
retval = NR_OPEN;
retval = sysctl_nr_open;
goto out;
case 6:
retval = 1;
Expand Down
1 change: 1 addition & 0 deletions arch/sparc64/kernel/sparc64_ksyms.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid);
EXPORT_SYMBOL(sys_geteuid);
EXPORT_SYMBOL(sys_getuid);
EXPORT_SYMBOL(sys_getegid);
EXPORT_SYMBOL(sysctl_nr_open);
EXPORT_SYMBOL(sys_getgid);
EXPORT_SYMBOL(svr4_getcontext);
EXPORT_SYMBOL(svr4_setcontext);
Expand Down
2 changes: 1 addition & 1 deletion arch/sparc64/solaris/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val)
case 3: /* UL_GMEMLIM */
return current->signal->rlim[RLIMIT_DATA].rlim_cur;
case 4: /* UL_GDESLIM */
return NR_OPEN;
return sysctl_nr_open;
}
return -EINVAL;
}
Expand Down
6 changes: 4 additions & 2 deletions arch/sparc64/solaris/timod.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)

SOLD("entry");
lock_kernel();
if(fd >= NR_OPEN) goto out;
if (fd >= sysctl_nr_open)
goto out;

fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
Expand Down Expand Up @@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, u32 arg2, u32 arg3)

SOLD("entry");
lock_kernel();
if(fd >= NR_OPEN) goto out;
if (fd >= sysctl_nr_open)
goto out;

fdt = files_fdtable(current->files);
filp = fdt->fd[fd];
Expand Down
8 changes: 5 additions & 3 deletions fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ struct fdtable_defer {
struct fdtable *next;
};

int sysctl_nr_open __read_mostly = 1024*1024;

/*
* We use this list to defer free fdtables that have vmalloced
* sets/arrays. By keeping a per-cpu list, we avoid having to embed
Expand Down Expand Up @@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *));
if (nr > NR_OPEN)
nr = NR_OPEN;
if (nr > sysctl_nr_open)
nr = sysctl_nr_open;

fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
if (!fdt)
Expand Down Expand Up @@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr)
if (nr < fdt->max_fds)
return 0;
/* Can we expand? */
if (nr >= NR_OPEN)
if (nr >= sysctl_nr_open)
return -EMFILE;

/* All good, so we try */
Expand Down
2 changes: 1 addition & 1 deletion include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

/* Fixed constants first: */
#undef NR_OPEN
#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */
extern int sysctl_nr_open;
#define INR_OPEN 1024 /* Initial setting for nfile rlimits */

#define BLOCK_SIZE_BITS 10
Expand Down
2 changes: 1 addition & 1 deletion kernel/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
return -EPERM;

retval = security_task_setrlimit(resource, &new_rlim);
Expand Down
8 changes: 8 additions & 0 deletions kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1202,6 +1202,14 @@ static struct ctl_table fs_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nr_open",
.data = &sysctl_nr_open,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = FS_DENTRY,
.procname = "dentry-state",
Expand Down

0 comments on commit 9cfe015

Please sign in to comment.