Changeset 895


Ignore:
Timestamp:
Mar 18, 2016, 7:53:30 PM (9 years ago)
Author:
Silvan Scherrer
Message:

tdb: Use DosSetFileLocks directly for db locks.

This is to overcome some fcntl() API incompleteness in kLIBC
(like inability to upgrade locks or join adjacent lock regions
into one). It made the torture test run a bit better but there
are still two major problems both coming from DosSetFileLocks
impl (which fcntl() is currently based upon too): a) inability to
detect deadlocks and b) missing atomic unlock/lock support if
unlock/lock regions don't match.

With the current implementation, tdborture works fine for 1 or 2
worker processes but hangs when there are 3 or more. [Before that,
it would only work with 1 process and would likely to corrupt
the database and terminate if there were 2 or more processes].

Author: Dmitriy Kuminov (@dmik).

Location:
trunk/server/lib/tdb/common
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • TabularUnified trunk/server/lib/tdb/common/io.c

    r878 r895  
    9191                return -1;
    9292
    93 #ifdef __OS2__
    94         // YD we must upgrade read locks to write locks (exclusive), as otherwise
    95         // the owner (us) is not allowed to write to the file (different from unix)
    96         // if a wider previous lock is in effect, we cannot write lock our segment
    97         // (e.g. a lock_upgrade locks all the file), so we hope the previous lock
    98         // is a write lock: do not wait for lock.
    99         // so this is what we try here:
    100         // 1. add a write lock and see it it works
    101         // 2. if the write lock wasn't set, we try to unlock the segment
    102         //    first and add the write lock afterwards
    103         // 3. we remove the write lock further down
    104         // 4. we add the removed lock from step #2 again
    105         int upgradeLockRC = 0;
    106         int unlockRC = -1;
    107         upgradeLockRC = tdb_brlock(tdb, F_WRLCK, off, len, TDB_LOCK_NOWAIT);
    108         if (upgradeLockRC != 0) {
    109                 unlockRC = tdb_brunlock(tdb, F_RDLCK, off, 1);
    110                 upgradeLockRC = tdb_brlock(tdb, F_WRLCK, off, len, TDB_LOCK_NOWAIT);
    111         }
    112         // no need to log a successful upgrade
    113         if (upgradeLockRC != 0)
    114         TDB_LOG((tdb, TDB_DEBUG_TRACE,"upgrading lock at %d len=%d "
    115                 "before writing %s (rc=%d).\n", off, len,
    116                  upgradeLockRC ? "failed":"was successful", upgradeLockRC));
    117 #endif
    11893        if (tdb->map_ptr) {
    11994                memcpy(off + (char *)tdb->map_ptr, buf, len);
     
    141116                                 "write %d bytes at %d in two attempts\n",
    142117                                 len, off));
    143 #ifdef __OS2__ // remove our lock, if upgrade succeded
    144                         if (upgradeLockRC == 0)
    145                                 tdb_brunlock( tdb, F_WRLCK, off, len);
    146                         if (unlockRC == 0)
    147                                 tdb_brlock( tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT);
    148 #endif
    149                         return -1;
    150                 }
    151         }
    152 #ifdef __OS2__ // remove our lock, if upgrade succeded
    153         if (upgradeLockRC == 0)
    154                 tdb_brunlock( tdb, F_WRLCK, off, len);
    155         if (unlockRC == 0)
    156                 tdb_brlock( tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT);
    157 #endif
     118                        return -1;
     119                }
     120        }
    158121        return 0;
    159122}
  • TabularUnified trunk/server/lib/tdb/common/lock.c

    r878 r895  
    3333}
    3434
     35#ifdef __OS2__
     36enum os2_fl {
     37    OS2_FL_LOCK = 0x1,
     38    OS2_FL_RW = 0x2,
     39    OS2_FL_WAIT = 0x4,
     40    OS2_FL_UPGRADE = 0x8,
     41};
     42
     43static int os2_set_file_locks(struct tdb_context *tdb, enum os2_fl flags,
     44                              off_t start, off_t offset, off_t len)
     45{
     46    FILELOCKL locks[2] = {0};
     47    ULONG fl;
     48    BOOL dowait;
     49    APIRET arc;
     50
     51    if ((!(flags & OS2_FL_UPGRADE) && (start > offset || start < 0)) ||
     52        offset < 0 || offset + len < 0) {
     53        errno = EINVAL;
     54        return -1;
     55    }
     56
     57    fl = (flags & OS2_FL_RW) ? 0 : 1; /* excluive when rw, shared otherwise */
     58
     59    if (flags & OS2_FL_UPGRADE) {
     60        locks[0].lOffset = offset;
     61        locks[0].lRange = len == 0 ? OFF_MAX : len;
     62        locks[1] = locks[0];
     63        fl |= 2; /* atomic */
     64    } else {
     65        if (start != offset) {
     66            locks[0].lOffset = start;
     67            locks[0].lRange = offset - start;
     68            fl |= 2; /* atomic */
     69        }
     70        locks[1].lOffset = start;
     71        locks[1].lRange = len == 0 ? OFF_MAX : (locks[0].lRange + len);
     72    }
     73
     74    dowait = (flags & (OS2_FL_LOCK | OS2_FL_UPGRADE)) && (flags & OS2_FL_WAIT);
     75
     76    TDB_LOG((tdb, TDB_DEBUG_TRACE, "os2_file_locks: fd=%d lock_fd=%d flags=%x start=%lld offset=%lld len=%lld pid=%d\n",
     77             tdb->fd, tdb->lock_fd, flags, start, offset, len, getpid()));
     78
     79    arc = DosSetFileLocksL(tdb->lock_fd,
     80                           &locks[(flags & OS2_FL_LOCK) ? 0 : 1],
     81                           &locks[(flags & OS2_FL_LOCK) ? 1 : 0],
     82                           dowait ? SEM_INDEFINITE_WAIT : SEM_IMMEDIATE_RETURN,
     83                           fl);
     84
     85    TDB_LOG((tdb, TDB_DEBUG_TRACE, "os2_file_locks: arc=%d pid=%d\n", arc, getpid()));
     86
     87    if (arc) {
     88        switch (arc) {
     89            case ERROR_LOCK_VIOLATION:
     90                errno = EACCES;
     91                break;
     92            case ERROR_INTERRUPT:
     93                errno = EINTR;
     94                break;
     95            case ERROR_TIMEOUT:
     96                errno = EDEADLK;
     97                break;
     98            default:
     99                TDB_LOG((tdb, TDB_DEBUG_ERROR, "os2_file_locks failed, lock_fd=%d flags=%x start=%d off=%d len=%d (arc=%d)\n",
     100                         tdb->lock_fd, flags, start, offset, len, arc));
     101        }
     102        return -1;
     103    }
     104
     105    return 0;
     106}
     107#else /* __OS2__ */
    35108static int fcntl_lock(struct tdb_context *tdb,
    36109                      int rw, off_t off, off_t len, bool waitflag)
     
    44117        fl.l_pid = 0;
    45118
    46 #ifdef __OS2__
    47         int rc = 0;
    48         int lockFile = 0;
    49 
    50         if (off == ACTIVE_LOCK || off == OPEN_LOCK || off == TRANSACTION_LOCK)
    51                 lockFile = tdb->hActiveLock;
    52         else
    53                 lockFile = tdb->fd;
    54 
    55         int cmd = 0;
    56         if (waitflag)
    57                 cmd = F_SETLKW;
    58         else
    59                 cmd = F_SETLK;
    60 
    61         rc = fcntl(lockFile, cmd, &fl);
    62         // if the first lock doesn't work and it's a complete lock,
    63         // we split it in 2 parts. first hash size*4 and then the rest
    64         if (rc != 0 && off == FREELIST_TOP && len == 0) {
    65                 fl.l_len = tdb->header.hash_size * 4;
    66                 rc = fcntl(lockFile, cmd, &fl);
    67                 if (rc == 0) {
    68                         fl.l_start = off + tdb->header.hash_size * 4;
    69                         fl.l_len = 0;
    70                         rc = fcntl(lockFile, cmd, &fl);
    71                 }
    72         }
    73 
    74         TDB_LOG((tdb, TDB_DEBUG_TRACE,"fcntl_lock: (fd=%d) offset=%lld rw_type=%d len=%lld waitflag=%d (rc=%d) pid=%d\n",
    75                 lockFile, off, rw, len, waitflag, rc, getpid()));
    76 
    77         return rc;
    78 #else
    79119        if (waitflag)
    80120                return fcntl(tdb->fd, F_SETLKW, &fl);
    81121        else
    82122                return fcntl(tdb->fd, F_SETLK, &fl);
    83 #endif
    84123}
    85124
     
    151190        fl.l_pid = 0;
    152191
    153 #ifdef __OS2__
    154         int rc = 0;
    155         int lockFile = 0;
    156         if (off == ACTIVE_LOCK || off == OPEN_LOCK || off == TRANSACTION_LOCK)
    157                 lockFile = tdb->hActiveLock;
    158         else
    159                 lockFile = tdb->fd;
    160 
    161         rc = fcntl(lockFile, F_SETLKW, &fl);
    162         // if the first unlock doesn't work and it's a complete unlock,
    163         // we split it in 2 parts. first hash size*4 and then the rest
    164         // as it was locked that way as well. and it seems fcntl() doesn't care
    165         if (rc != 0 && off == FREELIST_TOP && len == 0) {
    166                 fl.l_len = tdb->header.hash_size * 4;
    167                 rc = fcntl(lockFile, F_SETLKW, &fl);
    168                 if (rc == 0) {
    169                         fl.l_start = off + tdb->header.hash_size * 4;
    170                         fl.l_len = 0;
    171                         rc = fcntl(lockFile, F_SETLKW, &fl);
    172                 }
    173         }
    174 
    175         TDB_LOG((tdb, TDB_DEBUG_TRACE,"fcntl_unlock: (fd=%d) offset=%lld rw_type=%d len=%lld (rc=%d) pid=%d\n",
    176                  lockFile, off, rw, len, rc, getpid()));
    177 
    178         return rc;
    179 #else
    180192        return fcntl(tdb->fd, F_SETLKW, &fl);
    181 #endif
    182 }
     193}
     194#endif /* __OS2__ */
    183195
    184196/* list -1 is the alloc list, otherwise a hash chain. */
     
    196208   note that a len of zero means lock to end of file
    197209*/
     210#ifdef __OS2__
     211static int tdb_brlock_ex(struct tdb_context *tdb,
     212                         int rw_type, tdb_off_t start,
     213                         tdb_off_t offset, size_t len,
     214                         enum tdb_lock_flags flags)
     215#else
    198216int tdb_brlock(struct tdb_context *tdb,
    199217               int rw_type, tdb_off_t offset, size_t len,
    200218               enum tdb_lock_flags flags)
     219#endif
    201220{
    202221        int ret;
     
    215234        }
    216235
     236#ifdef __OS2__
     237        int os2_flags = OS2_FL_LOCK;
     238        if (rw_type == F_WRLCK) {
     239            os2_flags |= OS2_FL_RW;
     240        }
     241        if (flags & TDB_LOCK_WAIT) {
     242            os2_flags |= OS2_FL_WAIT;
     243        }
     244        if (flags & TDB_LOCK_UPGRADE) {
     245            os2_flags |= OS2_FL_UPGRADE;
     246        }
     247#endif
     248
    217249        do {
     250#ifdef __OS2__
     251                ret = os2_set_file_locks(tdb, os2_flags, start, offset, len);
     252#else
    218253                ret = fcntl_lock(tdb, rw_type, offset, len,
    219254                                 flags & TDB_LOCK_WAIT);
     255#endif
    220256                /* Check for a sigalarm break. */
    221257                if (ret == -1 && errno == EINTR &&
     
    232268                 * locks. */
    233269                if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
    234                         TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d flags=%d len=%d\n",
    235                                  tdb->fd, offset, rw_type, flags, (int)len));
     270                        TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d flags=%d len=%d (errno=%d)\n",
     271                                 tdb->fd, offset, rw_type, flags, (int)len, errno));
    236272                }
    237273                return -1;
     
    240276}
    241277
     278#ifdef __OS2__
     279int tdb_brlock(struct tdb_context *tdb,
     280               int rw_type, tdb_off_t offset, size_t len,
     281               enum tdb_lock_flags flags)
     282{
     283    return tdb_brlock_ex(tdb, rw_type, offset, offset, len, flags);
     284}
     285#endif
     286
     287#ifdef __OS2__
     288static int tdb_brunlock_ex(struct tdb_context *tdb,
     289                           int rw_type, tdb_off_t start,
     290                           tdb_off_t offset, size_t len)
     291#else
    242292int tdb_brunlock(struct tdb_context *tdb,
    243293                 int rw_type, tdb_off_t offset, size_t len)
     294#endif
    244295{
    245296        int ret;
     
    250301
    251302        do {
     303#ifdef __OS2__
     304                ret = os2_set_file_locks(tdb, rw_type == F_WRLCK ? OS2_FL_RW : 0,
     305                                         start, offset, len);
     306#else
    252307                ret = fcntl_unlock(tdb, rw_type, offset, len);
     308#endif
    253309        } while (ret == -1 && errno == EINTR);
    254310
    255311        if (ret == -1) {
    256                 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %d rw_type=%d len=%d\n",
    257                          tdb->fd, offset, rw_type, (int)len));
     312                TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %d rw_type=%d len=%d (errno=%d)\n",
     313                         tdb->fd, offset, rw_type, (int)len, errno));
    258314        }
    259315        return ret;
    260316}
     317
     318#ifdef __OS2__
     319int tdb_brunlock(struct tdb_context *tdb,
     320                 int rw_type, tdb_off_t offset, size_t len)
     321{
     322        return tdb_brunlock_ex(tdb, rw_type, offset, offset, len);
     323}
     324#endif
    261325
    262326/*
     
    285349        while (count--) {
    286350                struct timeval tv;
    287 #ifdef __OS2__
    288                 // we need to remove locks, as upgrade doesn't work
    289                 tdb_brunlock(tdb, F_RDLCK, FREELIST_TOP, 0);
    290 #endif
    291351                if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
    292                                TDB_LOCK_WAIT|TDB_LOCK_PROBE) == 0) {
     352                               TDB_LOCK_WAIT|TDB_LOCK_PROBE|TDB_LOCK_UPGRADE) == 0) {
    293353                        tdb->allrecord_lock.ltype = F_WRLCK;
    294354                        tdb->allrecord_lock.off = 0;
     
    580640}
    581641
     642#ifdef __OS2__
     643#define TDB_ADJLOCK_START_DECL size_t start,
     644#define TDB_ADJLOCK_START start,
     645#define TDB_ADJLOCK(tdb, rw_type, start, offset, len, flags) \
     646        tdb_brlock_ex(tdb, rw_type, start, offset, len, flags)
     647#define TDB_ADJUNLOCK(tdb, rw_type, start, offset, len) \
     648        tdb_brunlock_ex(tdb, rw_type, start, offset, len)
     649#define TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start, offset, len) \
     650        tdb_chainlock_gradual(tdb, ltype, flags, start, offset, len)
     651#else
     652#define TDB_ADJLOCK_START_DECL
     653#define TDB_ADJLOCK_START_REF
     654#define TDB_ADJLOCK(tdb, rw_type, start, offset, len, flags) \
     655        tdb_brlock(tdb, rw_type, offset, len, flags)
     656#define TDB_ADJUNLOCK(tdb, rw_type, start, offset, len) \
     657        tdb_brunlock(tdb, rw_type, offset, len)
     658#define TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start, offset, len) \
     659        tdb_chainlock_gradual(tdb, ltype, flags, offset, len)
     660#endif
     661
    582662/* We only need to lock individual bytes, but Linux merges consecutive locks
    583663 * so we lock in contiguous ranges. */
    584664static int tdb_chainlock_gradual(struct tdb_context *tdb,
    585665                                 int ltype, enum tdb_lock_flags flags,
    586                                  size_t off, size_t len)
     666                                 TDB_ADJLOCK_START_DECL size_t off, size_t len)
    587667{
    588668        int ret;
     
    591671        if (len <= 4) {
    592672                /* Single record.  Just do blocking lock. */
    593                 return tdb_brlock(tdb, ltype, off, len, flags);
     673                return TDB_ADJLOCK(tdb, ltype, start, off, len, flags);
    594674        }
    595675
    596676        /* First we try non-blocking. */
    597         ret = tdb_brlock(tdb, ltype, off, len, nb_flags);
     677        ret = TDB_ADJLOCK(tdb, ltype, start, off, len, nb_flags);
    598678        if (ret == 0) {
    599679                return 0;
     
    601681
    602682        /* Try locking first half, then second. */
    603         ret = tdb_chainlock_gradual(tdb, ltype, flags, off, len / 2);
     683        ret = TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start, off, len / 2);
    604684        if (ret == -1)
    605685                return -1;
    606686
    607         ret = tdb_chainlock_gradual(tdb, ltype, flags,
     687        ret = TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start,
    608688                                    off + len / 2, len - len / 2);
    609689        if (ret == -1) {
    610                 tdb_brunlock(tdb, ltype, off, len / 2);
     690                TDB_ADJUNLOCK(tdb, ltype, start, off, len / 2);
    611691                return -1;
    612692        }
     
    634714         * It is (1) which cause the starvation problem, so we're only
    635715         * gradual for that. */
    636         if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP,
     716        if (TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, FREELIST_TOP, FREELIST_TOP,
    637717                                  tdb->header.hash_size * 4) == -1) {
    638718                return -1;
     
    640720
    641721        /* Grab individual record locks. */
    642         if (tdb_brlock(tdb, ltype, lock_offset(tdb->header.hash_size), 0,
    643                        flags) == -1) {
     722        if (TDB_ADJLOCK(tdb, ltype, FREELIST_TOP, lock_offset(tdb->header.hash_size), 0,
     723                        flags) == -1) {
    644724                tdb_brunlock(tdb, ltype, FREELIST_TOP,
    645725                             tdb->header.hash_size * 4);
  • TabularUnified trunk/server/lib/tdb/common/open.c

    r866 r895  
    162162        return check_header_hash(tdb, false, m1, m2);
    163163}
     164
     165#ifdef __OS2__
     166static int os2_create_lockfile(struct tdb_context *tdb, const char *name, const char *origin)
     167{
     168        /* name could be null, so handle it */
     169        if (name == NULL)
     170                return 0;
     171
     172        char lock_name[_MAX_PATH + 5];
     173        snprintf(lock_name, sizeof(lock_name), "%s.lock", name);
     174        tdb->lock_fd = open(lock_name, tdb->open_flags | O_CREAT | O_TRUNC, 0777);
     175        if (tdb->lock_fd == -1) {
     176                TDB_LOG((tdb, TDB_DEBUG_ERROR, "os2_create_lockfile: cannot create lock file %s, errno=%d\n",
     177                         lock_name, errno));
     178                return -1;
     179        }
     180
     181        return 0;
     182}
     183#endif
    164184
    165185_PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
     
    304324
    305325#ifdef __OS2__
    306         if (os2_crtActiveLock(tdb, name, "tdb_open_ex") != 0)
     326        if (os2_create_lockfile(tdb, name, "tdb_open_ex") != 0)
    307327                goto fail;
    308328#endif
     
    476496
    477497#ifdef __OS2__
    478         close(tdb->hActiveLock);
    479         tdb->hActiveLock = -1;
     498        close(tdb->lock_fd);
     499        tdb->lock_fd = -1;
    480500#endif
    481501        if (!tdb)
     
    548568
    549569#ifdef __OS2__
    550         close(tdb->hActiveLock);
    551         tdb->hActiveLock = -1;
     570        close(tdb->lock_fd);
     571        tdb->lock_fd = -1;
    552572#endif
    553573
     
    620640
    621641#ifdef __OS2__
    622         close(tdb->hActiveLock);
    623         tdb->hActiveLock = -1;
    624 
    625         if (os2_crtActiveLock(tdb, tdb->name, "tdb_reopen") != 0)
     642        close(tdb->lock_fd);
     643        tdb->lock_fd = -1;
     644
     645        if (os2_create_lockfile(tdb, tdb->name, "tdb_reopen") != 0)
    626646                goto fail;
    627647#endif
     
    679699        return 0;
    680700}
    681 #ifdef __OS2__
    682 int os2_crtActiveLock(struct tdb_context *tdb, const char *name, const char *origin)
    683 {
    684         // name could be null, so handle it
    685         if (name == NULL)
    686                 return 0;
    687 
    688         struct stat st;
    689         bool emptytdb = false;
    690         char activeLockName[_MAX_PATH];
    691         char *emptyString = "used for active lock\n\0";
    692         sprintf(activeLockName, "%s_AL", name);
    693 
    694         if ((stat(activeLockName, &st) == -1) || (st.st_size < strlen(emptyString)))
    695                 emptytdb = true;
    696 
    697         tdb->hActiveLock = open(activeLockName, tdb->open_flags | O_CREAT, 0777);
    698         if (tdb->hActiveLock == -1) {
    699                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "os2_crtActiveLock: cannot create activeLock %s called from %s %s\n",
    700                         activeLockName, origin, strerror(errno)));
    701                 errno = EINVAL;
    702                 return -1;
    703         }
    704 
    705         // we try to truncate the db when called from tdb_open_ex
    706         // but if it's not working it's ok as well
    707         if (emptytdb)
    708                 tdb_write_all(tdb->hActiveLock, emptyString, strlen(emptyString));
    709 
    710         return 0;
    711 }
    712 #endif
  • TabularUnified trunk/server/lib/tdb/common/tdb_private.h

    r857 r895  
    180180        /* If set, don't actually lock at all. */
    181181        TDB_LOCK_MARK_ONLY = 4,
     182        /* If set, upgrade the existiong lock (used only on OS/2). */
     183        TDB_LOCK_UPGRADE = 8,
    182184};
    183185
     
    220222        volatile sig_atomic_t *interrupt_sig_ptr;
    221223#ifdef __OS2__
    222         int hActiveLock;
     224        int lock_fd;
    223225#endif
    224226};
Note: See TracChangeset for help on using the changeset viewer.