/* gdbmsync.c - Sync the disk with the in memory state. */ /* This file is part of GDBM, the GNU data base manager. Copyright (C) 1990-2021 Free Software Foundation, Inc. GDBM is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GDBM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GDBM. If not, see . */ /* Include system configuration before all else. */ #include "autoconf.h" #include "gdbmdefs.h" #ifdef GDBM_FAILURE_ATOMIC #include #include #include #include #include #include #include #include #include /* Sometimes, to ensure durability, a new file *and* all directories on its full path must be fsync()'d up to the root directory. */ static int fsync_to_root (const char *f) { int flags = O_WRONLY; char path[PATH_MAX], *end; if (realpath (f, path) == NULL) return GDBM_ERR_REALPATH; end = path + strlen(path); while (path < end) { int fd; *end = 0; fd = open (path, flags); flags = O_RDONLY; if (fd == -1) return GDBM_FILE_OPEN_ERROR; if (fsync (fd)) { int ec = errno; close (fd); errno = ec; return GDBM_FILE_SYNC_ERROR; } if (close (fd)) return GDBM_FILE_CLOSE_ERROR; do --end; while (path < end && end[-1] != '/'); } return GDBM_NO_ERROR; } /* Note: Valgrind complains about ioctl() call below, but it appears that Valgrind is simply confused; it issues similar complaints about very simple and correct uses of ioctl(FICLONE). */ int _gdbm_snapshot (GDBM_FILE dbf) { int s; /* snapshot file descriptor */ int oldsnap; /* previous snapshot file descriptor */ if (dbf->snapfd[0] < 0) /* crash consistency hasn't been requested on this database */ return 0; if (!(dbf->eo == 0 || dbf->eo == 1)) { /* Shouldn't happen, but still... */ _gdbmsync_done (dbf); _gdbmsync_init (dbf); GDBM_SET_ERRNO (dbf, GDBM_ERR_USAGE, TRUE); return -1; } s = dbf->snapfd[dbf->eo]; dbf->eo = !dbf->eo; oldsnap = dbf->snapfd[dbf->eo]; /* says "DON'T recover from this snapshot, writing in progress " */ if (fchmod (s, S_IWUSR)) { GDBM_SET_ERRNO (dbf, GDBM_ERR_FILE_MODE, FALSE); return -1; } /* commit permission bits */ if (fsync (s)) { GDBM_SET_ERRNO (dbf, GDBM_FILE_SYNC_ERROR, FALSE); return -1; } /* make efficient reflink copy into snapshot file, overwrite previous contents */ if (ioctl (s, FICLONE, dbf->desc) == -1) { if (errno == EINVAL || errno == ENOSYS) { _gdbmsync_done (dbf); _gdbmsync_init (dbf); } GDBM_SET_ERRNO (dbf, GDBM_ERR_SNAPSHOT_CLONE, FALSE); return -1; } /* commit snapshot data */ if (fsync (s)) { GDBM_SET_ERRNO (dbf, GDBM_FILE_SYNC_ERROR, FALSE); return -1; } /* says "DO recover from this snapshot, writing completed successfully" */ if (fchmod (s, S_IRUSR)) { GDBM_SET_ERRNO (dbf, GDBM_ERR_FILE_MODE, FALSE); return -1; } /* commit permission bits again */ if (fsync (s)) { GDBM_SET_ERRNO (dbf, GDBM_FILE_SYNC_ERROR, FALSE); return -1; } /* * Mark the previous snapshot file write-only, indicating thereby * that it contains obsolete data. The point of this additional * operation is to reduce the time window during which a crash would * leave two readable snapshot files. */ if (fchmod (oldsnap, S_IWUSR)) { GDBM_SET_ERRNO (dbf, GDBM_ERR_FILE_MODE, FALSE); return -1; } /* commit permission bits */ if (fsync (oldsnap)) { GDBM_SET_ERRNO (dbf, GDBM_FILE_SYNC_ERROR, FALSE); return -1; } return 0; } /* Snapshot files even & odd must not exist already. */ int gdbm_failure_atomic (GDBM_FILE dbf, const char *even, const char *odd) { int r; /* Return immediately if the database needs recovery */ GDBM_ASSERT_CONSISTENCY (dbf, -1); if (!even || !odd || strcmp (even, odd) == 0) { errno = EINVAL; GDBM_SET_ERRNO (dbf, GDBM_ERR_USAGE, FALSE); return -1; } if (dbf->snapfd[0] != -1) { /* * This function has been called before for this dbf: reinitialize * the snapshot system. */ _gdbmsync_done (dbf); _gdbmsync_init (dbf); } dbf->snapfd[0] = open (even, O_WRONLY | O_CREAT | O_EXCL, S_IWUSR); if (dbf->snapfd[0] == -1) GDBM_SET_ERRNO (dbf, GDBM_FILE_OPEN_ERROR, FALSE); else { dbf->snapfd[1] = open (odd, O_WRONLY | O_CREAT | O_EXCL, S_IWUSR); if (dbf->snapfd[1] == -1) GDBM_SET_ERRNO (dbf, GDBM_FILE_OPEN_ERROR, FALSE); else if ((r = fsync_to_root (even)) != 0 || (r = fsync_to_root (odd)) != 0) { GDBM_SET_ERRNO (dbf, r, FALSE); } else { dbf->eo = 0; if (_gdbm_snapshot (dbf) == 0) return 0; } } _gdbmsync_done (dbf); _gdbmsync_init (dbf); return -1; } static inline int timespec_cmp (struct timespec const *a, struct timespec const *b) { if (a->tv_sec < b->tv_sec) return -1; if (a->tv_sec > b->tv_sec) return 1; if (a->tv_nsec < b->tv_nsec) return -1; if (a->tv_nsec > b->tv_nsec) return 1; return 0; } static int check_snapshot_mode (int mode) { if (!S_ISREG (mode)) /* file is not a regular file */ return -1; if (S_IXUSR & mode) /* file is executable */ return -1; if (S_IRUSR & mode) { if (S_IWUSR & mode) return -1; /* file is both readable and writable */ } else if (!(S_IWUSR & mode)) return -1; /* file is neither readable nor writable */ /* All OK */ return 0; } static int stat_snapshot (const char *f, struct stat *st) { if (stat (f, st)) return -1; if (check_snapshot_mode (st->st_mode)) { errno = EACCES; return -1; } return 0; } static int gdbm_numsync (const char *dbname, unsigned *numsync) { GDBM_FILE dbf; int rc = -1; dbf = gdbm_open (dbname, 0, GDBM_READER, S_IRUSR, NULL); if (dbf) { if (dbf->xheader) { *numsync = dbf->xheader->numsync; rc = 0; } gdbm_close (dbf); } return rc; } /* * Return: * 0 both numsyncs equal or result undefined * -1 a's numsync is one less than b's * -2 a's numsync is less than b's * +1 a's numsync is one greater than b's * +2 a's numsync is greater than b's * * Takes into account integer overflow. */ static int gdbm_numsync_cmp (const char *a, const char *b) { unsigned na, nb; if (gdbm_numsync (a, &na) == 0 && gdbm_numsync (b, &nb) == 0) { if (na == UINT_MAX && nb == 0) return -1; else if (na == 0 && nb == UINT_MAX) return 1; else if (na < nb) return na + 1 == nb ? -1 : -2; else if (na > nb) return na == nb + 1 ? 1 : 2; } return 0; } /* * Selects among the two given snapshot files the one to be used for * post-crash recovery. * Returns one of the GDBM_SNAPSHOT_* constants (see gdbm.h). * If GDBM_SNAPSHOT_OK is returned a pointer to the most recent snapshot * name is stored in *ret. Otherwise, *ret is untouched. */ int gdbm_latest_snapshot (const char *even, const char *odd, const char **ret) { struct stat st_even, st_odd; if (!ret || !even || !odd || strcmp (even, odd) == 0) { errno = EINVAL; return GDBM_SNAPSHOT_ERR; } if (stat_snapshot (even, &st_even)) return GDBM_SNAPSHOT_ERR; if (stat_snapshot (odd, &st_odd)) return GDBM_SNAPSHOT_ERR; if (st_even.st_mode & S_IRUSR) { int rc = GDBM_SNAPSHOT_OK; if (!(st_odd.st_mode & S_IRUSR)) { *ret = even; return GDBM_SNAPSHOT_OK; } /* Both readable: compare numsync value in the extended header. * Select the snapshot with greater numsync value. */ switch (gdbm_numsync_cmp (even, odd)) { case -1: *ret = odd; break; case -2: rc = GDBM_SNAPSHOT_SUSPICIOUS; break; case 1: *ret = even; break; case 2: rc = GDBM_SNAPSHOT_SUSPICIOUS; break; default: /* * Both readable: check mtime. * Select the newer snapshot, i.e. the one whose mtime * is greater than the other's */ switch (timespec_cmp (&st_even.st_mtim, &st_odd.st_mtim)) { case -1: *ret = odd; break; case 1: *ret = even; break; case 0: /* Shouldn't happen */ rc = GDBM_SNAPSHOT_SAME; } } return rc; } else if (st_odd.st_mode & S_IRUSR) { *ret = odd; return GDBM_SNAPSHOT_OK; } else { /* neither readable: this means the crash occurred during gdbm_failure_atomic() */ } return GDBM_SNAPSHOT_BAD; } #else int gdbm_failure_atomic (GDBM_FILE dbf, const char *even, const char *odd) { errno = ENOSYS; GDBM_SET_ERRNO (dbf, GDBM_ERR_USAGE, FALSE); return -1; } int gdbm_latest_snapshot (const char *even, const char *odd, const char **ret) { errno = ENOSYS; return GDBM_SNAPSHOT_ERR; } #endif /* GDBM_FAILURE_ATOMIC */ int gdbm_file_sync (GDBM_FILE dbf) { int r = 0; /* return value */ #if HAVE_MMAP r = _gdbm_mapped_sync (dbf); #elif HAVE_FSYNC if (fsync (dbf->desc)) { GDBM_SET_ERRNO (dbf, GDBM_FILE_SYNC_ERROR, TRUE); r = 1; } #else sync (); sync (); #endif #ifdef GDBM_FAILURE_ATOMIC /* If and only if the conventional fsync/msync/sync succeeds, attempt to clone the data file. */ if (r == 0) r = _gdbm_snapshot (dbf); #endif /* GDBM_FAILURE_ATOMIC */ return r; } /* Make sure the database is all on disk. */ int gdbm_sync (GDBM_FILE dbf) { /* Return immediately if the database needs recovery */ GDBM_ASSERT_CONSISTENCY (dbf, -1); /* Initialize the gdbm_errno variable. */ gdbm_set_errno (dbf, GDBM_NO_ERROR, FALSE); if (dbf->xheader) { dbf->xheader->numsync++; dbf->header_changed = TRUE; } _gdbm_end_update (dbf); /* Do the sync on the file. */ return gdbm_file_sync (dbf); }