aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2010-02-22 14:47:51 +0000
committerDavid Howells <dhowells@redhat.com>2010-02-22 14:47:51 +0000
commitbf145f9d25647b699fb06981e1011272c29f2c61 (patch)
tree38270de29c399903bf9b6181e8d2e92c80625a4b
downloadcachefilesd-bf145f9d25647b699fb06981e1011272c29f2c61.tar.gz
cachefilesd historical version 0.4v0.4
-rw-r--r--Makefile51
-rw-r--r--README274
-rw-r--r--cachefilesd.840
-rw-r--r--cachefilesd.c1365
-rw-r--r--cachefilesd.conf17
-rw-r--r--cachefilesd.conf.5122
-rwxr-xr-xcachefilesd.initd85
-rw-r--r--redhat/cachefilesd.spec83
8 files changed, 2037 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..6ceb019
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,51 @@
+CFLAGS := -g -O2 -Wall
+INSTALL := install
+DESTDIR :=
+MAJOR := 0
+MINOR := 4
+VERSION := $(MAJOR).$(MINOR)
+BUILDFOR :=
+ETCDIR := /etc
+BINDIR := /bin
+SBINDIR := /sbin
+LIBDIR := /lib
+USRLIBDIR := /usr/lib
+SHAREDIR := /usr/share/keyutils
+INCLUDEDIR := /usr/include
+ARLIB := libkeyutils.a
+DEVELLIB := libkeyutils.so
+SONAME := libkeyutils.so.$(MAJOR)
+LIBNAME := libkeyutils-$(VERSION).so
+
+LNS := ln -sf
+
+ifeq ($(BUILDFOR),32-bit)
+CFLAGS += -m32
+LIBDIR := /lib
+USRLIBDIR := /usr/lib
+else
+ifeq ($(BUILDFOR),64-bit)
+CFLAGS += -m64
+LIBDIR := /lib64
+USRLIBDIR := /usr/lib64
+endif
+endif
+
+all: cachefilesd
+
+cachefilesd: cachefilesd.c Makefile
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+MAN5 := $(DESTDIR)/usr/share/man/man5
+MAN8 := $(DESTDIR)/usr/share/man/man8
+
+install: all
+ $(INSTALL) -D cachefilesd $(DESTDIR)$(SBINDIR)/cachefilesd
+ $(INSTALL) -D -m 0644 cachefilesd.conf $(DESTDIR)$(ETCDIR)/cachefilesd.conf
+ $(INSTALL) -D -m 0644 cachefilesd.conf.5 $(MAN5)/cachefilesd.conf.5
+ $(INSTALL) -D -m 0644 cachefilesd.8 $(MAN8)/cachefilesd.8
+
+clean:
+ $(RM) cachefilesd
+ $(RM) *.o *~
+ $(RM) debugfiles.list debugsources.list
diff --git a/README b/README
new file mode 100644
index 0000000..c9875f2
--- /dev/null
+++ b/README
@@ -0,0 +1,274 @@
+ ===============================================
+ CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
+ ===============================================
+
+Contents:
+
+ (*) Overview.
+
+ (*) Requirements.
+
+ (*) Configuration.
+
+ (*) Starting the cache.
+
+ (*) Things to avoid.
+
+
+========
+OVERVIEW
+========
+
+CacheFiles is a caching backend that's meant to use as a cache a directory on
+an already mounted filesystem of a local type (such as Ext3).
+
+CacheFiles uses a userspace daemon to do some of the cache management - such as
+reaping stale nodes and culling. This is called cachefilesd and lives in
+/sbin.
+
+The filesystem and data integrity of the cache are only as good as those of the
+filesystem providing the backing services. Note that CacheFiles does not
+attempt to journal anything since the journalling interfaces of the various
+filesystems are very specific in nature.
+
+CacheFiles creates a proc-file - "/proc/fs/cachefiles" - that is used for
+communication with the daemon. Only one thing may have this open at once, and
+whilst it is open, a cache is at least partially in existence. The daemon
+opens this and sends commands down it to control the cache.
+
+CacheFiles is currently limited to a single cache.
+
+CacheFiles attempts to maintain at least a certain percentage of free space on
+the filesystem, shrinking the cache by culling the objects it contains to make
+space if necessary - see the "Cache Culling" section. This means it can be
+placed on the same medium as a live set of data, and will expand to make use of
+spare space and automatically contract when the set of data requires more
+space.
+
+
+============
+REQUIREMENTS
+============
+
+The use of CacheFiles and its daemon requires the following features to be
+available in the system and in the cache filesystem:
+
+ - dnotify.
+
+ - extended attributes (xattrs).
+
+ - openat() and friends.
+
+ - bmap() support on files in the filesystem (FIBMAP ioctl).
+
+ - The use of bmap() to detect a partial page at the end of the file.
+
+It is strongly recommended that the "dir_index" option is enabled on Ext3
+filesystems being used as a cache.
+
+
+=============
+CONFIGURATION
+=============
+
+The cache is configured by a script in /etc/cachefilesd.conf. These commands
+set up cache ready for use. The following script commands are available:
+
+ (*) brun <N>%
+ (*) bcull <N>%
+ (*) bstop <N>%
+
+ Configure the culling limits. Optional. See the section on culling
+ The defaults are 7%, 5% and 1% respectively.
+
+ (*) dir <path>
+
+ Specify the directory containing the root of the cache. Mandatory.
+
+ (*) tag <name>
+
+ Specify a tag to FS-Cache to use in distinguishing multiple caches.
+ Optional. The default is "CacheFiles".
+
+ (*) debug <mask>
+
+ Specify a numeric bitmask to control debugging in the kernel module.
+ Optional. The default is zero (all off).
+
+
+==================
+STARTING THE CACHE
+==================
+
+The cache is started by running the daemon. The daemon opens the cache proc
+file, configures the cache and tells it to begin caching. At that point the
+cache binds to fscache and the cache becomes live.
+
+The daemon is run as follows:
+
+ /sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
+
+The flags are:
+
+ (*) -d
+
+ Increase the debugging level. This can be specified multiple times and
+ is cumulative with itself.
+
+ (*) -s
+
+ Send messages to stderr instead of syslog.
+
+ (*) -n
+
+ Don't daemonise and go into background.
+
+ (*) -f <configfile>
+
+ Use an alternative configuration file rather than the default one.
+
+
+===============
+THINGS TO AVOID
+===============
+
+Do not mount other things within the cache as this will cause problems. The
+kernel module contains its own very cut-down path walking facility that ignores
+mountpoints, but the daemon can't avoid them.
+
+Do not create, rename or unlink files and directories in the cache whilst the
+cache is active, as this may cause the state to become uncertain.
+
+Renaming files in the cache might make objects appear to be other objects (the
+filename is part of the lookup key).
+
+Do not change or remove the extended attributes attached to cache files by the
+cache as this will cause the cache state management to get confused.
+
+Do not create files or directories in the cache, lest the cache get confused or
+serve incorrect data.
+
+Do not chmod files in the cache. The module creates things with minimal
+permissions to prevent random users being able to access them directly.
+
+
+=============
+CACHE CULLING
+=============
+
+The cache may need culling occasionally to make space. This involves
+discarding objects from the cache that have been used less recently than
+anything else. Culling is based on the access time of data objects. Empty
+directories are culled if not in use.
+
+Cache culling is done on the basis of the percentage of blocks available in the
+underlying filesystem. There are three "limits":
+
+ (*) brun
+
+ If the amount of available space in the cache rises above this limit, then
+ culling is turned off.
+
+ (*) bcull
+
+ If the amount of available space in the cache falls below this limit, then
+ culling is started.
+
+ (*) bstop
+
+ If the amount of available space in the cache falls below this limit, then
+ no further allocation of disk space is permitted until culling has raised
+ the amount above this limit again.
+
+These must be configured thusly:
+
+ 0 <= bstop < bcull < brun < 100
+
+Note that these are percentages of available space, and do _not_ appear as 100
+minus the percentage displayed by the "df" program.
+
+The userspace daemon scans the cache to build up a table of cullable objects.
+These are then culled in least recently used order. A new scan of the cache is
+started as soon as space is made in the table. Objects will be skipped if
+their atimes have changed or if the kernel module says it is still using them.
+
+
+===============
+CACHE STRUCTURE
+===============
+
+The CacheFiles module will create two directories in the directory it was
+given:
+
+ (*) cache/
+
+ (*) graveyard/
+
+The active cache objects all reside in the first directory. The CacheFiles
+kernel module moves any retired or culled objects that it can't simply unlink
+to the graveyard from which the daemon will actually delete them.
+
+The daemon uses dnotify to monitor the graveyard directory, and will delete
+anything that appears therein.
+
+
+The module represents index objects as directories with the filename "I..." or
+"J...". Note that the "cache/" directory is itself a special index.
+
+Data objects are represented as files if they have no children, or directories
+if they do. Their filenames all begin "D..." or "E...". If represented as a
+directory, data objects will have a file in the directory called "data" that
+actually holds the data.
+
+Special objects are similar to data objects, except their filenames begin
+"S..." or "T...".
+
+
+If an object has children, then it will be represented as a directory.
+Immediately in the representative directory are a collection of directories
+named for hash values of the child object keys with an '@' prepended. Into
+this directory, if possible, will be placed the representations of the child
+objects:
+
+ INDEX INDEX INDEX DATA FILES
+ ========= ========== ================================= ================
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
+
+
+If the key is so long that it exceeds NAME_MAX with the decorations added on to
+it, then it will be cut into pieces, the first few of which will be used to
+make a nest of directories, and the last one of which will be the objects
+inside the last directory. The names of the intermediate directories will have
+'+' prepended:
+
+ J1223/@23/+xy...z/+kl...m/Epqr
+
+
+Note that keys are raw data, and not only may they exceed NAME_MAX in size,
+they may also contain things like '/' and NUL characters, and so they may not
+be suitable for turning directly into a filename.
+
+To handle this, CacheFiles will use a suitably printable filename directly and
+"base-64" encode ones that aren't directly suitable. The two versions of
+object filenames indicate the encoding:
+
+ OBJECT TYPE PRINTABLE ENCODED
+ =============== =============== ===============
+ Index "I..." "J..."
+ Data "D..." "E..."
+ Special "S..." "T..."
+
+Intermediate directories are always "@" or "+" as appropriate.
+
+
+Each object in the cache has an extended attribute label that holds the object
+type ID (required to distinguish special objects) and the auxiliary data from
+the netfs. The latter is used to detect stale objects in the cache and update
+or retire them.
+
+
+Note that CacheFiles will erase from the cache any file it doesn't recognise or
+any file of an incorrect type (such as a FIFO file or a device file).
diff --git a/cachefilesd.8 b/cachefilesd.8
new file mode 100644
index 0000000..07335e0
--- /dev/null
+++ b/cachefilesd.8
@@ -0,0 +1,40 @@
+.\" -*- nroff -*-
+.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" This program is free software; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License
+.\" as published by the Free Software Foundation; either version
+.\" 2 of the License, or (at your option) any later version.
+.\"
+.TH cachefilesd 8 "11 July 2006"
+.SH NAME
+cachefilesd \- CacheFiles userspace management daemon
+.SH SYNOPSIS
+.B "cachefilesd [-d]* [-s] [-n] [-f <configfile>]"
+.SH DESCRIPTION
+The \fBcachefilesd\fP daemon manages the cache data store that is used by
+network filesystems such a AFS and NFS to cache data locally on disk.
+.P
+The README file should be read before attempting to configure this facility:
+.IP
+/usr/share/docs/cachefilesd-*/README
+.SH OPTIONS
+.TP
+.B -d
+Turn on debugging mode (message written to stderr).
+.B -s
+Don't use syslog.
+.TP
+.B -n
+Don't daemonise.
+.TP
+.BI "-f <configfile>"
+Read the alternate configuration files.
+.SH FILES
+.BR /etc/cachefilesd.conf
+.SH SEE ALSO
+\fBcachefilesd.conf\fR(5), /usr/share/docs/cachefilesd-*/README
+.SH AUTHORS
+.br
+David Howells <dhowells@redhat.com>
diff --git a/cachefilesd.c b/cachefilesd.c
new file mode 100644
index 0000000..9d9c9ef
--- /dev/null
+++ b/cachefilesd.c
@@ -0,0 +1,1365 @@
+/* cachefilesd.c: CacheFiles userspace management daemon
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Configuration file goes in /etc/cachefiles.conf and is of the form:
+ *
+ * dir /var/fscache
+ * tag mycache
+ * brun 10%
+ * bcull 7%
+ * bstop 3%
+ *
+ * Only "dir" is mandatory
+ * Blank lines and lines beginning with a hash are comments
+ * Trailing spaces are significant
+ * There is no character escaping mechanism
+ * NUL characters are cause for error
+ */
+
+#define _GNU_SOURCE
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <time.h>
+#include <sys/inotify.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+
+typedef enum objtype {
+ OBJTYPE_INDEX,
+ OBJTYPE_DATA,
+ OBJTYPE_SPECIAL,
+ OBJTYPE_INTERMEDIATE,
+} objtype_t;
+
+struct object {
+ struct object *parent; /* parent dir of this object (or NULL) */
+ struct object *children; /* children of this object */
+ struct object *next; /* next child of parent */
+ struct object *prev; /* previous child of parent */
+ DIR *dir; /* this object's directory (or NULL for data obj) */
+ ino_t ino; /* inode number of this object */
+ int usage; /* number of users of this object */
+ char empty; /* T if directory empty */
+ char new; /* T if object new */
+ char cullable; /* T if object now cullable */
+ objtype_t type; /* type of object */
+ time_t atime; /* last access time on this object */
+ char name[1]; /* name of this object */
+};
+
+/* cache root representation */
+static struct object root = {
+ .parent = NULL,
+ .usage = 2,
+ .type = OBJTYPE_INDEX,
+};
+
+static int nobjects = 1;
+static int nopendir = 0;
+
+/* current scan point */
+static struct object *scan = &root;
+
+/* ranked order of cullable objects
+ * - we have two tables: one we're building and one that's full of ready to be
+ * culled objects
+ */
+#define CULLTABLE_SIZE 4096
+static struct object *cullbuild[CULLTABLE_SIZE];
+static struct object *cullready[CULLTABLE_SIZE];
+
+static int oldest_build = -1;
+static int oldest_ready = -1;
+static int ncullable = 0;
+
+
+static const char *configfile = "/etc/cachefilesd.conf";
+static const char *procfile = "/proc/fs/cachefiles";
+static char *cacheroot, *graveyardpath;
+
+static int xdebug, xnolog, xopenedlog;
+static int stop, reap, cull, statecheck;
+static int graveyardfd;
+static unsigned long long brun, bcull, bstop;
+
+#define cachefd 3
+
+static void help(void) __attribute__((noreturn));
+static void help(void)
+{
+ fprintf(stderr,
+ "Format:\n"
+ " /sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]\n"
+ "\n"
+ "Options:\n"
+ " -d\tIncrease debugging level (cumulative)\n"
+ " -n\tDon't daemonise the process\n"
+ " -s\tMessage output to stderr instead of syslog\n"
+ " -f <configfile>\n"
+ "\tRead the specified configuration file instead of"
+ " /etc/cachefiles.conf\n");
+
+ exit(2);
+}
+
+static void __error(int excode, const char *fmt, ...) __attribute__((noreturn));
+static void __error(int excode, const char *fmt, ...)
+{
+ va_list va;
+
+ if (xnolog) {
+ va_start(va, fmt);
+ vfprintf(stderr, fmt, va);
+ va_end(va);
+ }
+ else {
+ if (!xopenedlog) {
+ openlog("cachefilesd", LOG_PID, LOG_DAEMON);
+ xopenedlog = 1;
+ }
+
+ va_start(va, fmt);
+ vsyslog(LOG_ERR, fmt, va);
+ va_end(va);
+
+ closelog();
+ }
+
+ exit(excode);
+}
+
+#define error(FMT,...) __error(3, "Internal error: "FMT"\n" ,##__VA_ARGS__)
+#define oserror(FMT,...) __error(1, FMT": errno %d (%m)\n" ,errno ,##__VA_ARGS__)
+#define cfgerror(FMT,...) __error(2, "%s:%d:"FMT"\n", configfile, lineno ,##__VA_ARGS__)
+#define opterror(FMT,...) __error(2, FMT"\n" ,##__VA_ARGS__)
+
+static void __message(int dlevel, int level, const char *fmt, ...)
+{
+ va_list va;
+
+ if (dlevel <= xdebug) {
+ if (xnolog) {
+ va_start(va, fmt);
+ vfprintf(stderr, fmt, va);
+ va_end(va);
+ }
+ else if (!xnolog) {
+ if (!xopenedlog) {
+ openlog("cachefilesd", LOG_PID, LOG_DAEMON);
+ xopenedlog = 1;
+ }
+
+ va_start(va, fmt);
+ vsyslog(level, fmt, va);
+ va_end(va);
+
+ closelog();
+ }
+ }
+}
+
+#define info(FMT,...) __message(0, LOG_INFO, FMT"\n" ,##__VA_ARGS__)
+#define debug(DL, FMT,...) __message(DL, LOG_DEBUG, FMT"\n" ,##__VA_ARGS__)
+#define notice(FMT,...) __message(0, LOG_NOTICE, FMT"\n" ,##__VA_ARGS__)
+
+static void open_cache(void);
+static void cachefilesd(void) __attribute__((noreturn));
+static void reap_graveyard(void);
+static void reap_graveyard_aux(const char *dirname);
+static void read_cache_state(void);
+static void cull_file(int dirfd, const char *filename);
+static void build_cull_table(void);
+static void decant_cull_table(void);
+static void insert_into_cull_table(struct object *object);
+static void put_object(struct object *object);
+static struct object *create_object(struct object *parent, const char *name, struct stat *st);
+static void destroy_unexpected_object(struct object *parent, struct dirent *de);
+static int get_dir_fd(struct object *dir);
+static void cull_object(struct object *object);
+static void cull_objects(void);
+
+/*****************************************************************************/
+/*
+ * termination request
+ */
+static void sigterm(int sig)
+{
+ stop = 1;
+
+} /* end sigterm() */
+
+/*****************************************************************************/
+/*
+ * the graveyard was populated
+ */
+static void sigio(int sig)
+{
+ reap = 1;
+
+} /* end sigio() */
+
+/*****************************************************************************/
+/*
+ * the CacheFiles module signalled a significant change of state
+ */
+static void sigurg(int sig)
+{
+ statecheck = 1;
+
+} /* end sigurg() */
+
+/*****************************************************************************/
+/*
+ * start up the cache and go
+ */
+int main(int argc, char *argv[])
+{
+ struct stat st;
+ unsigned lineno;
+ ssize_t n;
+ size_t m;
+ FILE *config;
+ char *line, *cp;
+ long page_size;
+ int _cachefd, nullfd, opt, loop, open_max, nodaemon = 0;
+
+ /* handle help request */
+ if (argc == 2 && strcmp(argv[1], "--help") == 0)
+ help();
+
+ /* parse the arguments */
+ while (opt = getopt(argc, argv, "dsnf:"),
+ opt != EOF
+ ) {
+ switch (opt) {
+ case 'd':
+ /* turn on debugging */
+ xdebug++;
+ break;
+
+ case 's':
+ /* disable syslog writing */
+ xnolog = 1;
+ break;
+
+ case 'n':
+ /* don't daemonise */
+ nodaemon = 1;
+ break;
+
+ case 'f':
+ /* use a specific config file */
+ configfile = optarg;
+ break;
+
+ default:
+ opterror("Unknown commandline option '%c'", optopt);
+ }
+ }
+
+ /* read various parameters */
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ oserror("Unable to get page size");
+
+ open_max = sysconf(_SC_OPEN_MAX);
+ if (open_max < 0)
+ oserror("Unable to get max open files");
+
+ /* become owned by root */
+ if (setresuid(0, 0, 0) < 0)
+ oserror("Unable to set UID to 0");
+
+ if (setresgid(0, 0, 0) < 0)
+ oserror("Unable to set GID to 0");
+
+ /* just in case... */
+ sync();
+
+ /* open the procfile on fd 3 */
+ _cachefd = open(procfile, O_RDWR);
+ if (_cachefd < 0)
+ oserror("Unable to open %s", procfile);
+ if (_cachefd != cachefd) {
+ if (dup2(_cachefd, cachefd) < 0)
+ oserror("Unable to transfer cache fd to 3");
+ if (close(_cachefd) < 0)
+ oserror("Close of original cache fd failed");
+ }
+
+ /* open /dev/null */
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0)
+ oserror("Unable to open /dev/null");
+
+ /* open the config file */
+ config = fopen(configfile, "r");
+ if (!config)
+ oserror("Unable to open %s", configfile);
+
+ /* read the configuration */
+ m = 0;
+ line = NULL;
+ lineno = 0;
+ while (n = getline(&line, &m, config),
+ n != EOF
+ ) {
+ lineno++;
+
+ if (n >= page_size)
+ cfgerror("Line too long");
+
+ if (memchr(line, 0, n) != 0)
+ cfgerror("Line contains a NUL character");
+
+ /* eat blank lines, leading white space and trailing NL */
+ cp = strchr(line, '\n');
+ if (!cp)
+ cfgerror("Unterminated line");
+
+ if (cp == line)
+ continue;
+ *cp = '\0';
+
+ for (cp = line; isspace(*cp); cp++) {;}
+
+ if (!*cp)
+ continue;
+
+ /* eat full line comments */
+ if (*cp == '#')
+ continue;
+
+ /* note the dir command */
+ if (memcmp(cp, "dir", 3) == 0 && isspace(cp[3])) {
+ char *sp;
+
+ for (sp = cp + 4; isspace(*sp); sp++) {;}
+
+ if (stat(sp, &st) < 0)
+ oserror("Can't confirm cache location");
+
+ if (strlen(sp) > PATH_MAX - 10)
+ cfgerror("Cache pathname is too long");
+
+ cacheroot = strdup(sp);
+ if (!cacheroot)
+ oserror("Can't copy cache name");
+ }
+
+ /* object to the bind command */
+ if (memcmp(cp, "bind", 4) == 0 &&
+ (!cp[4] || isspace(cp[4])))
+ cfgerror("'bind' command not permitted");
+
+ /* pass the config options over to the kernel module */
+ if (write(cachefd, line, strlen(line)) < 0) {
+ if (errno == -ENOMEM || errno == -EIO)
+ oserror("CacheFiles");
+ cfgerror("CacheFiles gave config error: %m");
+ }
+ }
+
+ if (line)
+ free(line);
+
+ if (!feof(config))
+ oserror("Unable to read %s", configfile);
+
+ if (fclose(config) == EOF)
+ oserror("Unable to close %s", configfile);
+
+ /* leave stdin, stdout, stderr and cachefd open only */
+ if (nullfd != 0)
+ dup2(nullfd, 0);
+ if (nullfd != 1)
+ dup2(nullfd, 1);
+
+ for (loop = 4; loop < open_max; loop++)
+ close(loop);
+
+ /* set up a connection to syslog whilst we still can (the bind command
+ * will give us our own namespace with no /dev/log */
+ openlog("cachefilesd", LOG_PID, LOG_DAEMON);
+ xopenedlog = 1;
+ info("About to bind cache");
+
+ /* now issue the bind command */
+ if (write(cachefd, "bind", 4) < 0)
+ oserror("CacheFiles bind failed");
+
+ info("Bound cache");
+
+ /* we now have a live cache - daemonise the process */
+ if (!nodaemon) {
+ if (!xdebug)
+ dup2(1, 2);
+
+ switch (fork()) {
+ case -1:
+ oserror("fork");
+
+ case 0:
+ if (xdebug)
+ fprintf(stderr, "Daemon PID %d\n", getpid());
+
+ signal(SIGTTIN, SIG_IGN);
+ signal(SIGTTOU, SIG_IGN);
+ signal(SIGTSTP, SIG_IGN);
+ setsid();
+ cachefilesd();
+
+ default:
+ break;
+ }
+ }
+ else {
+ cachefilesd();
+ }
+
+ exit(0);
+
+} /* end main() */
+
+/*****************************************************************************/
+/*
+ * open the cache directories
+ */
+static void open_cache(void)
+{
+ struct statfs sfs;
+ char buffer[PATH_MAX + 1];
+
+ /* open the cache directory so we can scan it */
+ snprintf(buffer, PATH_MAX, "%s/cache", cacheroot);
+
+ root.dir = opendir(buffer);
+ if (!root.dir)
+ oserror("Unable to open cache directory");
+ nopendir++;
+
+ /* open the graveyard so we can set a notification on it */
+ if (asprintf(&graveyardpath, "%s/graveyard", cacheroot) < 0)
+ oserror("Unable to copy graveyard name");
+
+ graveyardfd = open(graveyardpath, O_DIRECTORY);
+ if (graveyardfd < 0)
+ oserror("Unable to open graveyard directory");
+
+ if (fstatfs(graveyardfd, &sfs) < 0)
+ oserror("Unable to stat cache filesystem");
+
+ if (sfs.f_bsize == -1 ||
+ sfs.f_blocks == -1 ||
+ sfs.f_bfree == -1 ||
+ sfs.f_bavail == -1)
+ error("Backing filesystem returns unusable statistics through fstatfs()");
+
+} /* end open_cache() */
+
+/*****************************************************************************/
+/*
+ * manage the cache
+ */
+static void cachefilesd(void)
+{
+ sigset_t sigs, osigs;
+
+ notice("Daemon Started");
+
+ /* the cache handle should generate SIGURG */
+ if (fcntl(cachefd, F_SETSIG, SIGURG) < 0)
+ oserror("Unable to set cache handle to generate SIGURG");
+
+ if (fcntl(cachefd, F_SETOWN, getpid()) < 0)
+ oserror("Unable to set cache handle to deliver SIGURG here");
+
+ /* open the cache directories */
+ open_cache();
+
+ /* we need to disable I/O and termination signals so they're only
+ * caught at appropriate times
+ */
+ sigemptyset(&sigs);
+ sigaddset(&sigs, SIGIO);
+ sigaddset(&sigs, SIGURG);
+ sigaddset(&sigs, SIGINT);
+ sigaddset(&sigs, SIGTERM);
+
+ signal(SIGTERM, sigterm);
+ signal(SIGINT, sigterm);
+ signal(SIGURG, sigurg);
+
+ /* check the graveyard for graves */
+ reap_graveyard();
+
+ while (!stop) {
+ read_cache_state();
+
+ /* sleep without racing on reap and cull with the signal
+ * handlers */
+ if (!scan && !reap && !cull) {
+ if (sigprocmask(SIG_BLOCK, &sigs, &osigs) < 0)
+ oserror("Unable to block signals");
+
+ if (!reap && !cull) {
+ sigsuspend(&osigs);
+ if (errno != EINTR)
+ oserror("Unable to suspend process");
+ }
+
+ if (sigprocmask(SIG_UNBLOCK, &sigs, NULL) < 0)
+ oserror("Unable to unblock signals");
+
+ read_cache_state();
+ }
+
+ if (cull && oldest_ready >= 0)
+ cull_objects();
+
+ if (scan)
+ build_cull_table();
+
+ if (!scan && oldest_ready < 0 && oldest_build >= 0)
+ decant_cull_table();
+
+ if (reap)
+ reap_graveyard();
+ }
+
+ notice("Daemon Terminated");
+ exit(0);
+
+} /* end cachefilesd() */
+
+/*****************************************************************************/
+/*
+ * check the graveyard directory for graves to delete
+ */
+static void reap_graveyard(void)
+{
+ /* set a one-shot notification to catch more graves appearing */
+ reap = 0;
+ signal(SIGIO, sigio);
+ if (fcntl(graveyardfd, F_NOTIFY, DN_CREATE) < 0)
+ oserror("unable to set notification on graveyard");
+
+ reap_graveyard_aux(graveyardpath);
+
+} /* end reap_graveyard() */
+
+/*****************************************************************************/
+/*
+ * recursively remove dead stuff from the graveyard
+ */
+static void reap_graveyard_aux(const char *dirname)
+{
+ struct dirent dirent, *de;
+ DIR *dir;
+ int deleted, ret;
+
+ if (chdir(dirname) < 0)
+ oserror("chdir failed");
+
+ dir = opendir(".");
+ if (!dir)
+ oserror("Unable to open grave dir %s", dirname);
+
+ do {
+ /* removing directory entries may cause us to skip when reading
+ * them */
+ rewinddir(dir);
+ deleted = 0;
+
+ while (ret = readdir_r(dir, &dirent, &de),
+ ret == 0 && de != NULL
+ ) {
+ /* ignore "." and ".." */
+ if (dirent.d_name[0] == '.') {
+ if (dirent.d_name[1] == '\0')
+ continue;
+ if (dirent.d_name[1] == '.' ||
+ dirent.d_name[1] == '\0')
+ continue;
+ }
+
+ deleted = 1;
+
+ /* attempt to unlink non-directory files */
+ if (dirent.d_type != DT_DIR) {
+ debug(1, "unlink %s", dirent.d_name);
+ if (unlink(dirent.d_name) == 0)
+ continue;
+ if (errno != EISDIR)
+ oserror("Unable to unlink file %s",
+ dirent.d_name);
+ }
+
+ /* recurse into directories */
+ memcpy(&dirent, de, sizeof(dirent));
+
+ reap_graveyard_aux(dirent.d_name);
+
+ /* which we then attempt to remove */
+ debug(1, "rmdir %s", dirent.d_name);
+ if (rmdir(dirent.d_name) < 0)
+ oserror("Unable to remove dir %s", dirent.d_name);
+ }
+
+ if (ret < 0)
+ oserror("Unable to read dir %s", dirname);
+ } while (deleted);
+
+ closedir(dir);
+
+ if (chdir("..") < 0)
+ oserror("Unable to chdir to ..");
+
+} /* end reap_graveyard_aux() */
+
+/*****************************************************************************/
+/*
+ * read the cache state
+ */
+static void read_cache_state(void)
+{
+ char buffer[4096 + 1], *tok, *next, *arg;
+ int n;
+
+ n = read(cachefd, buffer, sizeof(buffer) - 1);
+ if (n < 0)
+ oserror("Unable to read cache state");
+ buffer[n] = '\0';
+
+ tok = buffer;
+ do {
+ next = strpbrk(tok, " \t");
+ if (next)
+ *next++ = '\0';
+
+ arg = strchr(tok, '=');
+ if (arg)
+ *arg++ = '\0';
+
+ if (strcmp(tok, "cull") == 0)
+ cull = strtoul(arg, NULL, 0);
+ else if (strcmp(tok, "brun") == 0)
+ brun = strtoull(arg, NULL, 16);
+ else if (strcmp(tok, "bcull") == 0)
+ bcull = strtoull(arg, NULL, 16);
+ else if (strcmp(tok, "bstop") == 0)
+ bstop = strtoull(arg, NULL, 16);
+
+ } while ((tok = next));
+
+} /* end read_cache_state() */
+
+/*****************************************************************************/
+/*
+ * cull a file representing an object
+ * - requests CacheFiles rename the object "<dirfd>/filename" to the graveyard
+ */
+static void cull_file(int dirfd, const char *filename)
+{
+ char buffer[NAME_MAX + 30];
+ int ret, n;
+
+ n = sprintf(buffer, "cull %d %s", dirfd, filename);
+
+ /* command the module */
+ ret = write(cachefd, buffer, n);
+ if (ret < 0 && errno != ESTALE && errno != ENOENT && errno != EBUSY)
+ oserror("Failed to cull object");
+
+} /* end cull_file() */
+
+/*****************************************************************************/
+/*
+ * create an object from a name and stat details and attach to the parent, if
+ * it doesn't already exist
+ */
+static struct object *create_object(struct object *parent,
+ const char *name,
+ struct stat *st)
+{
+ struct object *object, *p, *pr;
+ int len;
+
+ /* see if the parent object already holds a representation of this
+ * one */
+ pr = NULL;
+ for (p = parent->children; p; pr = p, p = p->next) {
+ if (p->ino <= st->st_ino) {
+ if (p->ino == st->st_ino) {
+ /* it does */
+ p->usage++;
+ return p;
+ }
+
+ break;
+ }
+ }
+
+ /* allocate the object
+ * - note that struct object reserves space for NUL directly
+ */
+ len = strlen(name);
+
+ object = calloc(1, sizeof(struct object) + len);
+ if (!object)
+ oserror("Unable to alloc object");
+
+ object->usage = 1;
+ object->new = 1;
+
+ object->ino = st->st_ino;
+ object->atime = st->st_atime;
+ memcpy(object->name, name, len + 1);
+
+ switch (object->name[0]) {
+ case 'I':
+ case 'J':
+ object->type = OBJTYPE_INDEX;
+ break;
+ case 'D':
+ case 'E':
+ object->type = OBJTYPE_DATA;
+ break;
+ case 'S':
+ case 'T':
+ object->type = OBJTYPE_SPECIAL;
+ break;
+ case '+':
+ case '@':
+ object->type = OBJTYPE_INTERMEDIATE;
+ break;
+ default:
+ error("Unexpected file type '%c'", object->name[0]);
+ }
+
+ /* link into the parent's list */
+ parent->usage++;
+ object->parent = parent;
+ object->prev = pr;
+ object->next = p;
+ if (pr)
+ pr->next = object;
+ else
+ parent->children = object;
+ if (p)
+ p->prev = object;
+
+ nobjects++;
+ return object;
+
+} /* end create_object() */
+
+/*****************************************************************************/
+/*
+ * free up an object, unlinking it from its parent
+ */
+static void put_object(struct object *object)
+{
+ struct object *parent;
+
+ if (--object->usage > 0)
+ return;
+
+ nobjects--;
+
+ if (object->cullable)
+ ncullable--;
+
+ /* destroy the object */
+ if (object == &root)
+ error("Can't destroy root object representation");
+
+ if (object->children)
+ error("Destroying object with children: '%s'", object->name);
+
+ if (object->dir) {
+ closedir(object->dir);
+ nopendir--;
+ }
+
+ if (object->prev)
+ object->prev->next = object->next;
+ else
+ object->parent->children = object->next;
+
+ if (object->next)
+ object->next->prev = object->prev;
+
+ parent = object->parent;
+
+ memset(object, 0x6d, sizeof(struct object));
+ free(object);
+
+ if (parent)
+ put_object(parent);
+
+} /* end put_object() */
+
+/*****************************************************************************/
+/*
+ * destroy an unexpected object
+ */
+static void destroy_unexpected_object(struct object *parent, struct dirent *de)
+{
+ static unsigned uniquifier;
+ struct timeval tv;
+ char namebuf[40];
+ int fd;
+
+ fd = dirfd(parent->dir);
+
+ if (de->d_type != DT_DIR) {
+ if (unlinkat(fd, de->d_name, 0) < 0 &&
+ errno != ENOENT)
+ oserror("Unable to unlink unexpectedly named file: %s",
+ de->d_name);
+ }
+ else {
+ gettimeofday(&tv, NULL);
+ sprintf(namebuf, "x%lxx%xx", tv.tv_sec, uniquifier++);
+
+ if (renameat(fd, de->d_name, graveyardfd, namebuf) < 0 &&
+ errno != ENOENT)
+ oserror("Unable to rename unexpectedly named file: %s",
+ de->d_name);
+ }
+
+} /* end destroy_unexpected_object() */
+
+/*****************************************************************************/
+/*
+ * insert an object into the cull table if its old enough
+ */
+static void insert_into_cull_table(struct object *object)
+{
+ int y, o, m;
+
+ if (!object)
+ error("NULL object pointer");
+
+ /* just insert if table is empty */
+ if (oldest_build == -1) {
+ object->usage++;
+ oldest_build = 0;
+ cullbuild[0] = object;
+ return;
+ }
+
+ /* insert somewhere if table is not full */
+ if (oldest_build < CULLTABLE_SIZE - 1) {
+ object->usage++;
+ oldest_build++;
+
+ /* just insert at end if new oldest object */
+ if (object->atime <= cullbuild[oldest_build - 1]->atime) {
+ cullbuild[oldest_build] = object;
+ return;
+ }
+
+ /* insert at front if new newest object */
+ if (object->atime > cullbuild[0]->atime) {
+ memmove(&cullbuild[1],
+ &cullbuild[0],
+ oldest_build * sizeof(cullbuild[0]));
+
+ cullbuild[0] = object;
+ return;
+ }
+
+ /* if only two objects in list then insert between them */
+ if (oldest_build == 2) {
+ cullbuild[2] = cullbuild[1];
+ cullbuild[1] = object;
+ return;
+ }
+
+ /* insert somewhere in between front and back elements
+ * of a three object list
+ * - oldest_build == #objects_currently_in_list
+ */
+ y = 1;
+ o = oldest_build - 1;
+
+ do {
+ m = (y + o) / 2;
+
+ if (object->atime > cullbuild[m]->atime)
+ o = m;
+ else
+ y = m + 1;
+
+ } while (y < o);
+
+ memmove(&cullbuild[y + 1],
+ &cullbuild[y],
+ (oldest_build - y) * sizeof(cullbuild[0]));
+
+ cullbuild[y] = object;
+ return;
+ }
+
+ /* if table is full then insert only if older than newest */
+ if (oldest_build > CULLTABLE_SIZE - 1)
+ error("Cull table overfull");
+
+ if (object->atime >= cullbuild[0]->atime)
+ return;
+
+ /* newest object in table will be displaced by this one */
+ put_object(cullbuild[0]);
+ cullbuild[0] = (void *)(0x6b000000 | __LINE__);
+ object->usage++;
+
+ /* place directly in first slot if second is older */
+ if (object->atime >= cullbuild[1]->atime) {
+ cullbuild[0] = object;
+ return;
+ }
+
+ /* shift everything up one if older than oldest */
+ if (object->atime <= cullbuild[CULLTABLE_SIZE - 1]->atime) {
+ memmove(&cullbuild[0],
+ &cullbuild[1],
+ (CULLTABLE_SIZE - 1) * sizeof(cullbuild[0]));
+
+ cullbuild[CULLTABLE_SIZE - 1] = object;
+ return;
+ }
+
+ /* search the table to find the insertion point
+ * - it will be between the first and last the slots
+ * - we know second is younger
+ */
+ cullbuild[0] = cullbuild[1];
+
+ y = 2;
+ o = CULLTABLE_SIZE - 1;
+
+ do {
+ m = (y + o) / 2;
+
+ if (object->atime >= cullbuild[m]->atime)
+ o = m;
+ else
+ y = m + 1;
+
+ } while (y < o);
+
+ if (y == 2) {
+ cullbuild[1] = object;
+ return;
+ }
+
+ memmove(&cullbuild[1],
+ &cullbuild[2],
+ (y - 2) * sizeof(cullbuild[0]));
+
+ cullbuild[y - 1] = object;
+
+} /* end insert_into_cull_table() */
+
+/*****************************************************************************/
+/*
+ * do the next step in building up the cull table
+ */
+static void build_cull_table(void)
+{
+ struct dirent dirent, *de;
+ struct object *curr, *child;
+ struct stat st;
+ int loop, fd;
+
+ curr = scan;
+
+ if (!curr->dir) {
+ curr->empty = 1;
+
+ fd = openat(dirfd(curr->parent->dir), curr->name, O_DIRECTORY);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ oserror("Failed to open directory");
+ goto dir_read_complete;
+ }
+
+ curr->dir = fdopendir(fd);
+ if (!curr->dir)
+ oserror("Failed to open directory");
+
+ nopendir++;
+ }
+
+ debug(2, "--> build_cull_table({%s})", curr->name);
+
+next:
+ /* read the next directory entry */
+ if (readdir_r(curr->dir, &dirent, &de) < 0) {
+ if (errno == ENOENT)
+ goto dir_read_complete;
+ oserror("Unable to read directory");
+ }
+
+ if (de == NULL)
+ goto dir_read_complete;
+
+ if (dirent.d_name[0] == '.') {
+ if (!dirent.d_name[1] ||
+ (dirent.d_name[1] == '.' && !dirent.d_name[2]))
+ goto next;
+ }
+
+ debug(2, "readdir '%s'", dirent.d_name);
+
+ if (dirent.d_type == DT_UNKNOWN)
+ oserror("readdir returned unknown type");
+
+ /* delete any funny looking files */
+ if (memchr("IDSJET+@", dirent.d_name[0], 8) == NULL)
+ goto found_unexpected_object;
+
+ if (dirent.d_type != DT_DIR &&
+ (dirent.d_type != DT_REG ||
+ dirent.d_name[0] == 'I' ||
+ dirent.d_name[0] == 'J' ||
+ dirent.d_name[0] == '@' ||
+ dirent.d_name[0] == '+'))
+ goto found_unexpected_object;
+
+ /* see if this object is already known to us */
+ if (fstatat(dirfd(curr->dir), dirent.d_name, &st, 0) < 0) {
+ if (errno == ENOENT)
+ goto next;
+ oserror("Failed to stat directory");
+ }
+
+ /* create a representation for this object */
+ child = create_object(curr, dirent.d_name, &st);
+ if (!child && errno == ENOENT)
+ goto next;
+
+ curr->empty = 0;
+
+ if (!child)
+ oserror("Unable to create object");
+
+ /* we consider culling objects at the transition from index object to
+ * non-index object */
+ switch (child->type) {
+ case OBJTYPE_DATA:
+ case OBJTYPE_SPECIAL:
+ if (!child->new) {
+ /* the child appears to have been retained in the
+ * culling table already, so we see if it should be
+ * removed therefrom
+ */
+ debug(2, "- old child");
+
+ if (st.st_atime <= child->atime) {
+ /* file on disk hasn't been touched */
+ put_object(child);
+ goto next;
+ }
+
+ for (loop = 0; loop <= oldest_ready; loop++)
+ if (cullready[loop] == child)
+ break;
+
+ if (loop == oldest_ready) {
+ /* child was oldest object */
+ cullready[oldest_ready] = (void *)(0x6b000000 | __LINE__);
+ oldest_ready--;
+ put_object(child);
+ goto removed;
+ }
+ else if (loop < oldest_ready) {
+ /* child was somewhere in between */
+ memmove(&cullready[loop],
+ &cullready[loop + 1],
+ (oldest_ready - loop) * sizeof(cullready[0]));
+ cullready[oldest_ready] = (void *)(0x6b000000 | __LINE__);
+ oldest_ready--;
+ put_object(child);
+ goto removed;
+ }
+
+ for (loop = 0; loop <= oldest_build; loop++)
+ if (cullbuild[loop] == child)
+ break;
+
+ if (loop == oldest_build) {
+ /* child was oldest object */
+ cullbuild[oldest_build] = (void *)(0x6b000000 | __LINE__);
+ oldest_build--;
+ put_object(child);
+ }
+ else if (loop < oldest_build) {
+ /* child was somewhere in between */
+ memmove(&cullbuild[loop],
+ &cullbuild[loop + 1],
+ (oldest_build - loop) * sizeof(cullbuild[0]));
+ cullbuild[oldest_build] = (void *)(0x6b000000 | __LINE__);
+ oldest_build--;
+ put_object(child);
+ }
+
+ removed:
+ ;
+ }
+
+ debug(2, "- insert");
+ child->new = 0;
+ insert_into_cull_table(child);
+ put_object(child);
+ goto next;
+
+ /* investigate all index and index-intermediate directories */
+ case OBJTYPE_INDEX:
+ case OBJTYPE_INTERMEDIATE:
+ debug(2, "- descend");
+
+ child->new = 0;
+ scan = child;
+
+ debug(2, "<-- build_cull_table({%s})", curr->name);
+ return;
+
+ default:
+ error("Unexpected type");
+ }
+
+ /* we've finished reading a directory - see if we can cull it */
+dir_read_complete:
+ debug(2, "dir_read_complete: u=%d e=%d %s",
+ curr->usage, curr->empty, curr->name);
+
+ if (curr->dir) {
+ if (curr != &root) {
+ closedir(curr->dir);
+ curr->dir = NULL;
+ nopendir--;
+ }
+ else {
+ rewinddir(curr->dir);
+ }
+ }
+
+ if (curr->usage == 1 && curr->empty) {
+ /* attempt to cull unpinned empty intermediate and index
+ * objects */
+ switch (curr->type) {
+ case OBJTYPE_INDEX:
+ cull_file(dirfd(curr->parent->dir),
+ curr->name);
+ break;
+
+ case OBJTYPE_INTERMEDIATE:
+ unlinkat(dirfd(curr->parent->dir), curr->name,
+ AT_REMOVEDIR);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ scan = curr->parent;
+ if (!scan) {
+ info("Scan complete");
+ decant_cull_table();
+ }
+
+ debug(2, "<-- build_cull_table({%s})", curr->name);
+ put_object(curr);
+ return;
+
+ /* delete unexpected objects that we've found */
+found_unexpected_object:
+ debug(2, "found_unexpected_object");
+
+ destroy_unexpected_object(curr, &dirent);
+ goto next;
+
+} /* end build_cull_table() */
+
+/*****************************************************************************/
+/*
+ * decant cull entries from the build table to the ready table and enable them
+ */
+static void decant_cull_table(void)
+{
+ int loop, space, avail, copy, leave, n;
+
+ if (scan)
+ error("Can't decant cull table whilst scanning");
+
+ if (oldest_build < 0)
+ return;
+
+ /* mark the new entries cullable */
+ for (loop = 0; loop <= oldest_build; loop++) {
+ if (!cullbuild[loop]->cullable) {
+ cullbuild[loop]->cullable = 1;
+ ncullable++;
+ }
+ }
+
+ /* if the ready table is empty, copy the whole lot across */
+ if (oldest_ready == -1) {
+ copy = oldest_build + 1;
+
+ info("Decant (all %d)", copy);
+
+ n = copy * sizeof(cullready[0]);
+ memcpy(cullready, cullbuild, n);
+ memset(cullbuild, 0x6e, n);
+ oldest_ready = oldest_build;
+ oldest_build = -1;
+ goto check;
+ }
+
+ /* decant some of the build table if there's space */
+ space = CULLTABLE_SIZE - (oldest_ready + 1);
+ if (space <= 0) {
+ if (space < 0)
+ error("Less than zero space in ready table");
+ goto check;
+ }
+
+ /* work out how much of the build table we can copy */
+ copy = avail = oldest_build + 1;
+ if (copy > space)
+ copy = space;
+ leave = avail - copy;
+
+ info("Decant (%d/%d to %d)", copy, avail, space);
+
+ /* make a hole in the ready table and fill it */
+ n = oldest_ready + 1;
+ memmove(&cullready[space], &cullready[0], n * sizeof(cullready[0]));
+ oldest_ready += copy;
+
+ memcpy(&cullready[0], &cullbuild[leave], copy * sizeof(cullready[0]));
+ memset(&cullbuild[leave], 0x6b, copy * sizeof(cullbuild[0]));
+ oldest_build = leave - 1;
+
+ if (copy + leave > CULLTABLE_SIZE)
+ error("Scan table exceeded (%d+%d)", copy + leave);
+
+check:
+ for (loop = 0; loop < oldest_ready; loop++)
+ if (((long)cullready[loop] & 0xf0000000) == 0x60000000)
+ abort();
+
+} /* end decant_cull_table() */
+
+/*****************************************************************************/
+/*
+ * get the directory handle for the given directory
+ */
+static int get_dir_fd(struct object *dir)
+{
+ int parentfd, fd;
+
+ debug(1, "get_dir_fd(%s)", dir->name);
+
+ if (dir->dir) {
+ fd = dup(dirfd(dir->dir));
+ if (fd < 0)
+ oserror("Failed to dup fd");
+ debug(1, "cache fd to %d", fd);
+ return fd;
+ }
+
+ parentfd = get_dir_fd(dir->parent);
+
+ fd = openat(parentfd, dir->name, O_DIRECTORY);
+ if (fd < 0 && errno != ENOENT)
+ oserror("Failed to open directory");
+
+ /* return parent fd or -1 if ENOENT */
+ debug(1, "<%d>/%s to %d", parentfd, dir->name, fd);
+ close(parentfd);
+ return fd;
+
+} /* end get_dir_fd() */
+
+/*****************************************************************************/
+/*
+ * cull an object
+ */
+static void cull_object(struct object *object)
+{
+ struct stat st;
+ int dirfd;
+
+ debug(1, "CULL %s", object->name);
+
+ dirfd = get_dir_fd(object->parent);
+ if (dirfd >= 0) {
+ if (fstatat(dirfd, object->name, &st, 0) < 0) {
+ if (errno != ENOENT)
+ oserror("Failed to re-stat object");
+
+ close(dirfd);
+ goto object_already_gone;
+ }
+
+ if (object->atime >= st.st_atime)
+ cull_file(dirfd, object->name);
+
+ close(dirfd);
+ }
+
+object_already_gone:
+ put_object(object);
+
+} /* end cull_object() */
+
+/*****************************************************************************/
+/*
+ * consider starting a cull
+ */
+static void cull_objects(void)
+{
+ if (ncullable <= 0)
+ error("Cullable object count is inconsistent");
+
+ if (cullready[oldest_ready]->cullable) {
+ cull_object(cullready[oldest_ready]);
+ cullready[oldest_ready] = (void *)(0x6b000000 | __LINE__);
+ oldest_ready--;
+ }
+
+ /* must start refilling the cull table */
+ if (!scan && oldest_build <= CULLTABLE_SIZE / 2 + 2) {
+ decant_cull_table();
+
+ notice("Refilling cull table");
+ root.usage++;
+ scan = &root;
+ }
+
+} /* end cull_objects() */
diff --git a/cachefilesd.conf b/cachefilesd.conf
new file mode 100644
index 0000000..868630d
--- /dev/null
+++ b/cachefilesd.conf
@@ -0,0 +1,17 @@
+###############################################################################
+#
+# Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+# Written by David Howells (dhowells@redhat.com)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+#
+###############################################################################
+
+dir /var/fscache
+tag mycache
+brun 10%
+bcull 7%
+bstop 3%
diff --git a/cachefilesd.conf.5 b/cachefilesd.conf.5
new file mode 100644
index 0000000..692fc17
--- /dev/null
+++ b/cachefilesd.conf.5
@@ -0,0 +1,122 @@
+.\" -*- nroff -*-
+.\" Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" This program is free software; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License
+.\" as published by the Free Software Foundation; either version
+.\" 2 of the License, or (at your option) any later version.
+.\"
+.TH CACHEFILESD.CONF 5 "11 July 2005" Linux "Cache Files Utilities"
+.SH NAME
+/etc/cachefilesd.conf \- Local file caching configuration file
+.SH SYNOPSIS
+.P
+The configuration file for cachefilesd which can manage a persistent cache for
+a variety of network filesystems using a set of files on an already mounted
+filesystem as the data store.
+.SH DESCRIPTION
+.P
+This configuration file can contain a number of commands. Each one should be
+on a separate line. Blank lines and lines beginning with a '#' character are
+considered to be comments and are discarded.
+.P
+The only mandatory command is:
+.TP
+.B dir <path>
+This command specifies the directory containing the root of the cache.
+.P
+All the other commands are optional:
+.TP
+.B brun <N>%
+.TP
+.B bcull <N>%
+.TP
+.B bstop <N>%
+These commands configure the culling limits. The defaults are 7%, 5% and 1%
+respectively. See the section on cache culling for more information.
+.TP
+.B tag <name>
+This command specifies a tag to FS-Cache to use in distinguishing multiple
+caches. This is only required if more than one cache is going to be used. The
+default is "CacheFiles".
+.TP
+.B debug <mask>
+This command specifies a numeric bitmask to control debugging in the kernel
+module. The default is zero (all off). The following values can be OR'd into
+the mask to collect various information:
+.RS
+.TP
+.B 1
+Turn on trace of function entry (_enter() macros)
+.TP
+.B 2
+Turn on trace of function exit (_leave() macros)
+.TP
+.B 4
+Turn on trace of internal debug points (_debug())
+.RE
+.IP
+This mask can also be set through /proc/sys/fs/cachefiles/debug.
+.RE
+.SH EXAMPLES
+.P
+As an example, consider the following:
+.P
+.RS
+dir /var/fscache
+.br
+tag mycache
+.br
+brun 10%
+.br
+bcull 7%
+.br
+bstop 3%
+.RE
+.P
+The places the cache storage objects in a directory called "/var/fscache", names
+the cache "mycache", permits the cache to run freely as long as there's at
+least 10% free space on /var/fscache/, starts culling the cache when the free
+space drops below 7% and stops writing new stuff into the cache if the amount
+of free space drops below 3%. If the cache is suspended, it won't reactivate
+until the amount of free space rises again to 10% or better.
+.SH CACHE CULLING
+.P
+The cache may need culling occasionally to make space. This involves
+discarding objects from the cache that have been used less recently than
+anything else. Culling is based on the access time of data objects. Empty
+directories are culled if not in use.
+.P
+Cache culling is done on the basis of the percentage of blocks available in the
+underlying filesystem. There are three "limits":
+.TP
+.B brun
+If the amount of available space in the cache rises above this limit, then
+culling is turned off.
+.TP
+.B bcull
+If the amount of available space in the cache falls below this limit, then
+culling is started.
+.TP
+.B bstop
+If the amount of available space in the cache falls below this limit, then no
+further allocation of disk space is permitted until culling has raised the
+amount above this limit again.
+.P
+These must be configured thusly:
+.IP
+0 <= bstop < bcull < brun < 100
+.P
+Note that these are percentages of available space, and do \fInot\fP appear as
+100 minus the percentage displayed by the \fBdf\fP program.
+.P
+The userspace daemon scans the cache to build up a table of cullable objects.
+These are then culled in least recently used order. A new scan of the cache is
+started as soon as space is made in the table. Objects will be skipped if
+their atimes have changed or if the kernel module says it is still using them.
+.SH SEE ALSO
+\fBcachefilesd\fR(8), \fBdf\fR(1), /usr/share/docs/cachefilesd-*/README
+.SH AUTHORS
+The cachefilesd software has been developed by David Howells
+.Aq dhowells@redhat.com .
diff --git a/cachefilesd.initd b/cachefilesd.initd
new file mode 100755
index 0000000..7219410
--- /dev/null
+++ b/cachefilesd.initd
@@ -0,0 +1,85 @@
+#!/bin/bash
+#
+# cachefilesd Start up and shut down the cachefilesd daemon
+#
+# chkconfig: - 13 87
+# description: Starts user-level daemon that manages the caching files \
+# used by Network Filsystems
+
+# Source function library.
+. /etc/init.d/functions
+
+# Source networking configuration.
+if [ ! -f /etc/sysconfig/network ]; then
+ exit 0
+fi
+. /etc/sysconfig/network
+
+# Check that networking is up.
+[ "${NETWORKING}" = "no" ] && exit 0
+
+
+OPTIONS=""
+RETVAL=0
+LOCKFILE=/var/lock/subsys/cachefilesd
+MODPROBE=/sbin/modprobe
+MODPROBE_ARGS=""
+PROG="cachefilesd"
+
+[ ! -x /usr/sbin/$PROG ] && exit 0
+
+# Check for and source configuration file otherwise set defaults
+[ -f /etc/sysconfig/$PROG ] && . /etc/sysconfig/$PROG
+
+case "$1" in
+ start|condstart)
+ # Make sure the daemon is not already running.
+ if status $PROG > /dev/null ; then
+ exit 0
+ fi
+ rm -f $LOCKFILE
+
+ echo -n $"Starting $PROG: "
+
+ # Load the cachefiles module if needed
+ [ -x "$MODPROBE" ] && {
+ if ! /sbin/lsmod | grep cachefiles > /dev/null ; then
+ $MODPROBE cachefiles $MODPROBE_ARGS || exit 1
+ fi
+ }
+
+ # Start daemon.
+ daemon $PROG ${OPTIONS}
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ ;;
+ stop)
+ # Stop daemon.
+ echo -n $"Shutting down RPC $PROG: "
+ killproc $PROG
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ ;;
+ status)
+ status $PROG
+ RETVAL=$?
+ ;;
+ restart|reload)
+ $0 stop
+ $0 start
+ RETVAL=$?
+ ;;
+ condrestart)
+ if [ -f $LOCKFILE ]; then
+ $0 restart
+ RETVAL=$?
+ fi
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|restart|condstart|condrestart|status}"
+ exit 1
+esac
+
+exit $RETVAL
diff --git a/redhat/cachefilesd.spec b/redhat/cachefilesd.spec
new file mode 100644
index 0000000..c158aad
--- /dev/null
+++ b/redhat/cachefilesd.spec
@@ -0,0 +1,83 @@
+Name: cachefilesd
+Version: 0.4
+Release: 1%{?dist}
+Summary: CacheFiles userspace management daemon
+Group: System Environment/Daemons
+License: GPL
+BuildRoot: %{_tmppath}/%{name}-%{version}-root-%(%{__id_u} -n)
+Url: http://people.redhat.com/~dhowells/fscache/
+Source0: http://people.redhat.com/~dhowells/fscache/cachefilesd-0.4.tar.bz2
+Requires(post): /usr/bin/chkconfig
+Requires(post): /usr/bin/chkconfig
+
+%description
+The cachefilesd daemon manages the caching files and directory that are that
+are used by network filesystems such a AFS and NFS to do persistent caching to
+the local disk.
+
+%prep
+%setup -q
+
+
+%build
+%ifarch s390 s390x
+PIE="-fPIE"
+%else
+PIE="-fpie"
+%endif
+export PIE
+CFLAGS="`echo $RPM_OPT_FLAGS $ARCH_OPT_FLAGS $PIE`"
+
+make all
+
+
+%install
+rm -rf %{buildroot}
+mkdir -p %{buildroot}/sbin
+mkdir -p %{buildroot}%{_sysconfdir}/rc.d/init.d
+mkdir -p %{buildroot}%{_mandir}/{man5,man8}
+make DESTDIR=%{buildroot} install
+
+install -m 755 cachefilesd.initd %{buildroot}%{_sysconfdir}/rc.d/init.d/cachefilesd
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post
+/sbin/chkconfig --add %{name}
+
+%preun
+if [ $1 -eq 0 ]; then
+ /sbin/chkconfig --del %{name}
+fi
+
+
+%files
+%defattr(-,root,root)
+%doc README
+%config(noreplace) %{_sysconfdir}/cachefilesd.conf
+%attr(0755,root,root) %{_sysconfdir}/rc.d/init.d/cachefilesd
+/sbin/*
+%{_mandir}/*/*
+
+%changelog
+* Tue Aug 1 2006 David Howells <dhowells@redhat.com> 0.4-1
+- Discard use of autotools
+
+* Tue Aug 1 2006 Steve Dickson <steved@redhat.com> 0.3-3
+- Added URL to source file
+
+* Fri Jul 28 2006 Steve Dickson <steved@redhat.com> 0.3-2
+- Added post and preun rules
+- Changed init.d script to up right before portmapper.
+
+* Fri Jun 9 2006 Steve Dickson <steved@redhat.com> 0.3-1
+- Incorporated David Howells manual page updates
+
+* Thu Jun 8 2006 Steve Dickson <steved@redhat.com> 0.2-1
+- Made the daemon 64-bit application.
+- Changed the syslog logging to log the daemon's PID
+- Changed OS error logging to log errno number as well the string
+
+* Sat Apr 22 2006 Steve Dickson <steved@redhat.com> 0.1-1
+- Initial commit