summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
ce01b73)
When a client of the IDL tries to commit a read-modify-write transaction
but the database has changed in the meantime, the IDL tells its client to
wait for the IDL to change and then try the transaction again by returning
TXN_TRY_AGAIN. The "wait for the IDL to change" part is important because
there's no point in retrying the transaction before the IDL has received
the database updates (the transaction would fail in the same way all over
again).
However, the logic was incomplete: the database update can be received
*before* the reply to the transaction RPC (I think that in the current
ovsdb-server implementation this will always happen, in fact). When this
happens, the right thing to do is to retry the transaction immediately;
if we wait, then we're waiting for an additional change to the database
that may never come, causing an indefinite hang.
This commit therefore breaks the "try again" IDL commit status code
into two, one that means "try again immediately" and another that means
"wait for a change then try again". When an update is processed after a
transaction is committed but before the reply is received, the "try again
now" tells the IDL client not to wait for another database change before
retrying its transaction.
Bug #5980.
Reported-by: Ram Jothikumar <rjothikumar@nicira.com>
Reproduced-by: Alex Yip <alex@nicira.com>
char *error;
bool dry_run;
struct ds comment;
char *error;
bool dry_run;
struct ds comment;
+ unsigned int commit_seqno;
/* Increments. */
char *inc_table;
/* Increments. */
char *inc_table;
return "aborted";
case TXN_SUCCESS:
return "success";
return "aborted";
case TXN_SUCCESS:
return "success";
- case TXN_TRY_AGAIN:
- return "try again";
+ case TXN_AGAIN_WAIT:
+ return "wait then try again";
+ case TXN_AGAIN_NOW:
+ return "try again now";
case TXN_NOT_LOCKED:
return "not locked";
case TXN_ERROR:
case TXN_NOT_LOCKED:
return "not locked";
case TXN_ERROR:
txn->error = NULL;
txn->dry_run = false;
ds_init(&txn->comment);
txn->error = NULL;
txn->dry_run = false;
ds_init(&txn->comment);
+ txn->commit_seqno = txn->idl->change_seqno;
txn->inc_table = NULL;
txn->inc_column = NULL;
txn->inc_table = NULL;
txn->inc_column = NULL;
json_hash(txn->request_id, 0));
txn->status = TXN_INCOMPLETE;
} else {
json_hash(txn->request_id, 0));
txn->status = TXN_INCOMPLETE;
} else {
- txn->status = TXN_TRY_AGAIN;
+ txn->status = TXN_AGAIN_WAIT;
}
ovsdb_idl_txn_disassemble(txn);
}
ovsdb_idl_txn_disassemble(txn);
* prerequisite to completing the transaction. That is, if 'column' in 'row_'
* changed (or if 'row_' was deleted) between the time that the IDL originally
* read its contents and the time that the transaction commits, then the
* prerequisite to completing the transaction. That is, if 'column' in 'row_'
* changed (or if 'row_' was deleted) between the time that the IDL originally
* read its contents and the time that the transaction commits, then the
- * transaction aborts and ovsdb_idl_txn_commit() returns TXN_TRY_AGAIN.
+ * transaction aborts and ovsdb_idl_txn_commit() returns TXN_AGAIN_WAIT or
+ * TXN_AGAIN_NOW (depending on whether the database change has already been
+ * received).
*
* The intention is that, to ensure that no transaction commits based on dirty
* reads, an application should call ovsdb_idl_txn_verify() on each data item
*
* The intention is that, to ensure that no transaction commits based on dirty
* reads, an application should call ovsdb_idl_txn_verify() on each data item
struct ovsdb_idl_txn *txn;
HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) {
struct ovsdb_idl_txn *txn;
HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) {
- ovsdb_idl_txn_complete(txn, TXN_TRY_AGAIN);
+ ovsdb_idl_txn_complete(txn, TXN_AGAIN_WAIT);
status = (hard_errors ? TXN_ERROR
: lock_errors ? TXN_NOT_LOCKED
status = (hard_errors ? TXN_ERROR
: lock_errors ? TXN_NOT_LOCKED
- : soft_errors ? TXN_TRY_AGAIN
+ : soft_errors ? (txn->commit_seqno == idl->change_seqno
+ ? TXN_AGAIN_WAIT
+ : TXN_AGAIN_NOW)
TXN_INCOMPLETE, /* Commit in progress, please wait. */
TXN_ABORTED, /* ovsdb_idl_txn_abort() called. */
TXN_SUCCESS, /* Commit successful. */
TXN_INCOMPLETE, /* Commit in progress, please wait. */
TXN_ABORTED, /* ovsdb_idl_txn_abort() called. */
TXN_SUCCESS, /* Commit successful. */
- TXN_TRY_AGAIN, /* Commit failed because a "verify" operation
+ TXN_AGAIN_WAIT, /* Commit failed because a "verify" operation
* reported an inconsistency, due to a network
* reported an inconsistency, due to a network
- * problem, or other transient failure. */
+ * problem, or other transient failure. Wait
+ * for a change, then try again. */
+ TXN_AGAIN_NOW, /* Same as above but try again immediately. */
TXN_NOT_LOCKED, /* Server hasn't given us the lock yet. */
TXN_ERROR /* Commit failed due to a hard error. */
};
TXN_NOT_LOCKED, /* Server hasn't given us the lock yet. */
TXN_ERROR /* Commit failed due to a hard error. */
};
def __txn_abort_all(self):
while self._outstanding_txns:
txn = self._outstanding_txns.popitem()[1]
def __txn_abort_all(self):
while self._outstanding_txns:
txn = self._outstanding_txns.popitem()[1]
- txn._status = Transaction.TRY_AGAIN
+ txn._status = Transaction.AGAIN_WAIT
def __txn_process_reply(self, msg):
txn = self._outstanding_txns.pop(msg.id, None)
def __txn_process_reply(self, msg):
txn = self._outstanding_txns.pop(msg.id, None)
if 'column_name' changed in this row (or if this row was deleted)
between the time that the IDL originally read its contents and the time
that the transaction commits, then the transaction aborts and
if 'column_name' changed in this row (or if this row was deleted)
between the time that the IDL originally read its contents and the time
that the transaction commits, then the transaction aborts and
- Transaction.commit() returns Transaction.TRY_AGAIN.
+ Transaction.commit() returns Transaction.AGAIN_WAIT or
+ Transaction.AGAIN_NOW (depending on whether the database change has
+ already been received).
The intention is that, to ensure that no transaction commits based on
dirty reads, an application should call Row.verify() on each data item
The intention is that, to ensure that no transaction commits based on
dirty reads, an application should call Row.verify() on each data item
INCOMPLETE = "incomplete" # Commit in progress, please wait.
ABORTED = "aborted" # ovsdb_idl_txn_abort() called.
SUCCESS = "success" # Commit successful.
INCOMPLETE = "incomplete" # Commit in progress, please wait.
ABORTED = "aborted" # ovsdb_idl_txn_abort() called.
SUCCESS = "success" # Commit successful.
- TRY_AGAIN = "try again" # Commit failed because a "verify" operation
+ AGAIN_WAIT = "wait then try again"
+ # Commit failed because a "verify" operation
# reported an inconsistency, due to a network
# reported an inconsistency, due to a network
- # problem, or other transient failure.
+ # problem, or other transient failure. Wait
+ # for a change, then try again.
+ AGAIN_NOW = "try again now" # Same as AGAIN_WAIT but try again right away.
NOT_LOCKED = "not locked" # Server hasn't given us the lock yet.
ERROR = "error" # Commit failed due to a hard error.
NOT_LOCKED = "not locked" # Server hasn't given us the lock yet.
ERROR = "error" # Commit failed due to a hard error.
self._status = Transaction.UNCOMMITTED
self._error = None
self._comments = []
self._status = Transaction.UNCOMMITTED
self._error = None
self._comments = []
+ self._commit_seqno = self.idl.change_seqno
self._inc_table = None
self._inc_column = None
self._inc_table = None
self._inc_column = None
self.idl._outstanding_txns[self._request_id] = self
self._status = Transaction.INCOMPLETE
else:
self.idl._outstanding_txns[self._request_id] = self
self._status = Transaction.INCOMPLETE
else:
- self._status = Transaction.TRY_AGAIN
+ self._status = Transaction.AGAIN_WAIT
self.__disassemble()
return self._status
self.__disassemble()
return self._status
elif lock_errors:
self._status = Transaction.NOT_LOCKED
elif soft_errors:
elif lock_errors:
self._status = Transaction.NOT_LOCKED
elif soft_errors:
- self._status = Transaction.TRY_AGAIN
+ if self._commit_seqno == self.idl.change_seqno:
+ self._status = Transaction.AGAIN_WAIT
+ else:
+ self._status = Transaction.AGAIN_NOW
else:
self._status = Transaction.SUCCESS
else:
self._status = Transaction.SUCCESS
000: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
001: commit, status=success
002: {"error":null,"result":[{"count":1}]}
000: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
001: commit, status=success
002: {"error":null,"result":[{"count":1}]}
-003: commit, status=try again
+003: commit, status=try again now
004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
004: i=1 r=5 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
005: commit, status=success
004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
004: i=1 r=5 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
005: commit, status=success
static const struct vsctl_command_syntax *find_command(const char *name);
static void run_prerequisites(struct vsctl_command[], size_t n_commands,
struct ovsdb_idl *);
static const struct vsctl_command_syntax *find_command(const char *name);
static void run_prerequisites(struct vsctl_command[], size_t n_commands,
struct ovsdb_idl *);
-static void do_vsctl(const char *args,
- struct vsctl_command *, size_t n_commands,
- struct ovsdb_idl *);
+static enum ovsdb_idl_txn_status do_vsctl(const char *args,
+ struct vsctl_command *, size_t n,
+ struct ovsdb_idl *);
static const struct vsctl_table_class *get_table(const char *table_name);
static void set_column(const struct vsctl_table_class *,
static const struct vsctl_table_class *get_table(const char *table_name);
static void set_column(const struct vsctl_table_class *,
main(int argc, char *argv[])
{
extern struct vlog_module VLM_reconnect;
main(int argc, char *argv[])
{
extern struct vlog_module VLM_reconnect;
+ enum ovsdb_idl_txn_status status;
struct ovsdb_idl *idl;
struct vsctl_command *commands;
size_t n_commands;
struct ovsdb_idl *idl;
struct vsctl_command *commands;
size_t n_commands;
run_prerequisites(commands, n_commands, idl);
/* Now execute the commands. */
run_prerequisites(commands, n_commands, idl);
/* Now execute the commands. */
+ status = TXN_AGAIN_WAIT;
- if (ovsdb_idl_run(idl)) {
- do_vsctl(args, commands, n_commands, idl);
+ if (ovsdb_idl_run(idl) || status == TXN_AGAIN_NOW) {
+ status = do_vsctl(args, commands, n_commands, idl);
- ovsdb_idl_wait(idl);
- poll_block();
+ if (status != TXN_AGAIN_NOW) {
+ ovsdb_idl_wait(idl);
+ poll_block();
+ }
+static enum ovsdb_idl_txn_status
do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
struct ovsdb_idl *idl)
{
do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands,
struct ovsdb_idl *idl)
{
vsctl_context_done(&ctx, c);
if (ctx.try_again) {
vsctl_context_done(&ctx, c);
if (ctx.try_again) {
+ status = TXN_AGAIN_WAIT;
+ case TXN_AGAIN_WAIT:
+ case TXN_AGAIN_NOW:
goto try_again;
case TXN_ERROR:
goto try_again;
case TXN_ERROR:
free(c->table);
}
free(error);
free(c->table);
}
free(error);
}
static const struct vsctl_command_syntax all_commands[] = {
}
static const struct vsctl_command_syntax all_commands[] = {