From c1bbe79748bb1615ab116fe287b8d5d28a83b330 Mon Sep 17 00:00:00 2001 From: Richard Mudgett Date: Mon, 4 Jun 2012 19:46:33 +0000 Subject: Fix potential deadlock between masquerade and chan_local. * Restructure ast_do_masquerade() to not hold channel locks while it calls ast_indicate(). * Simplify many calls to ast_do_masquerade() since it will never return a failure now. If it does fail internally because a channel driver callback operation failed, the only thing ast_do_masquerade() can do is generate a warning message about strange things may happen and press on. * Fixed the call to ast_bridged_channel() in ast_do_masquerade(). This change fixes half of the deadlock reported in ASTERISK-19801 between masquerades and chan_iax. (closes issue ASTERISK-19537) Reported by: rmudgett Tested by: rmudgett Review: https://reviewboard.asterisk.org/r/1915/ ........ Merged revisions 368405 from http://svn.asterisk.org/svn/asterisk/branches/1.8 ........ Merged revisions 368407 from http://svn.asterisk.org/svn/asterisk/branches/10 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@368421 65c4cc65-6c06-0410-ace0-fbb531ad65f3 --- main/channel.c | 328 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 171 insertions(+), 157 deletions(-) (limited to 'main/channel.c') diff --git a/main/channel.c b/main/channel.c index 98b2f4403..8bd973501 100644 --- a/main/channel.c +++ b/main/channel.c @@ -2609,13 +2609,7 @@ int ast_hangup(struct ast_channel *chan) */ while (ast_channel_masq(chan)) { ast_channel_unlock(chan); - if (ast_do_masquerade(chan)) { - ast_log(LOG_WARNING, "Failed to perform masquerade\n"); - - /* Abort the loop or we might never leave. */ - ast_channel_lock(chan); - break; - } + ast_do_masquerade(chan); ast_channel_lock(chan); } @@ -2631,6 +2625,7 @@ int ast_hangup(struct ast_channel *chan) return 0; } + /* Mark as a zombie so a masquerade cannot be setup on this channel. */ if (!(was_zombie = ast_test_flag(ast_channel_flags(chan), AST_FLAG_ZOMBIE))) { ast_set_flag(ast_channel_flags(chan), AST_FLAG_ZOMBIE); } @@ -3008,10 +3003,8 @@ struct ast_channel *ast_waitfor_nandfds(struct ast_channel **c, int n, int *fds, /* Perform any pending masquerades */ for (x = 0; x < n; x++) { - if (ast_channel_masq(c[x]) && ast_do_masquerade(c[x])) { - ast_log(LOG_WARNING, "Masquerade failed\n"); - *ms = -1; - return NULL; + while (ast_channel_masq(c[x])) { + ast_do_masquerade(c[x]); } ast_channel_lock(c[x]); @@ -3153,10 +3146,8 @@ static struct ast_channel *ast_waitfor_nandfds_simple(struct ast_channel *chan, /* See if this channel needs to be masqueraded */ - if (ast_channel_masq(chan) && ast_do_masquerade(chan)) { - ast_log(LOG_WARNING, "Failed to perform masquerade on %s\n", ast_channel_name(chan)); - *ms = -1; - return NULL; + while (ast_channel_masq(chan)) { + ast_do_masquerade(chan); } ast_channel_lock(chan); @@ -3240,10 +3231,8 @@ static struct ast_channel *ast_waitfor_nandfds_complex(struct ast_channel **c, i struct ast_channel *winner = NULL; for (i = 0; i < n; i++) { - if (ast_channel_masq(c[i]) && ast_do_masquerade(c[i])) { - ast_log(LOG_WARNING, "Masquerade failed\n"); - *ms = -1; - return NULL; + while (ast_channel_masq(c[i])) { + ast_do_masquerade(c[i]); } ast_channel_lock(c[i]); @@ -3636,11 +3625,8 @@ static struct ast_frame *__ast_read(struct ast_channel *chan, int dropaudio) */ if (ast_channel_masq(chan)) { - if (ast_do_masquerade(chan)) - ast_log(LOG_WARNING, "Failed to perform masquerade\n"); - else - f = &ast_null_frame; - return f; + ast_do_masquerade(chan); + return &ast_null_frame; } /* if here, no masq has happened, lock the channel and proceed */ @@ -4727,12 +4713,9 @@ int ast_write(struct ast_channel *chan, struct ast_frame *fr) goto done; /* Handle any pending masquerades */ - if (ast_channel_masq(chan)) { + while (ast_channel_masq(chan)) { ast_channel_unlock(chan); - if (ast_do_masquerade(chan)) { - ast_log(LOG_WARNING, "Failed to perform masquerade\n"); - return res; /* no need to goto done: chan is already unlocked for masq */ - } + ast_do_masquerade(chan); ast_channel_lock(chan); } if (ast_channel_masqr(chan)) { @@ -6542,9 +6525,9 @@ static void masquerade_colp_transfer(struct ast_channel *transferee, struct xfer int ast_do_masquerade(struct ast_channel *original) { int x; - int res=0; int origstate; int visible_indication; + int clone_was_zombie = 0;/*!< TRUE if the clonechan was a zombie before the masquerade. */ struct ast_frame *current; const struct ast_channel_tech *t; void *t_pvt; @@ -6567,61 +6550,65 @@ int ast_do_masquerade(struct ast_channel *original) char masqn[AST_CHANNEL_NAME]; char zombn[AST_CHANNEL_NAME]; - ast_format_copy(&rformat, ast_channel_readformat(original)); - ast_format_copy(&wformat, ast_channel_writeformat(original)); - /* XXX This operation is a bit odd. We're essentially putting the guts of * the clone channel into the original channel. Start by killing off the * original channel's backend. While the features are nice, which is the * reason we're keeping it, it's still awesomely weird. XXX */ - /* The reasoning for the channels ao2_container lock here is complex. + /* + * The reasoning for the channels ao2_container lock here is + * complex. + * + * There is a race condition that exists for this function. + * Since all pvt and channel locks must be let go before calling + * ast_do_masquerade, it is possible that it could be called + * multiple times for the same channel. In order to prevent the + * race condition with competing threads to do the masquerade + * and new masquerade attempts, the channels container must be + * locked for the entire masquerade. The original and clonechan + * need to be unlocked earlier to avoid potential deadlocks with + * the chan_local deadlock avoidance method. + * + * The container lock blocks competing masquerade attempts from + * starting as well as being necessary for proper locking order + * because the channels must to be unlinked to change their + * names. * - * In order to check for a race condition, the original channel must - * be locked. If it is determined that the masquerade should proceed - * the original channel can absolutely not be unlocked until the end - * of the function. Since after determining the masquerade should - * continue requires the channels to be unlinked from the ao2_container, - * the container lock must be held first to achieve proper locking order. + * The original and clonechan locks must be held while the + * channel contents are shuffled around for the masquerade. + * + * The masq and masqr pointers need to be left alone until the + * masquerade has restabilized the channels to prevent another + * masquerade request until the AST_FLAG_ZOMBIE can be set on + * the clonechan. */ ao2_lock(channels); - /* lock the original channel to determine if the masquerade is required or not */ - ast_channel_lock(original); - /* - * This checks to see if the masquerade has already happened or - * not. There is a race condition that exists for this - * function. Since all pvt and channel locks must be let go - * before calling do_masquerade, it is possible that it could be - * called multiple times for the same channel. This check - * verifies whether or not the masquerade has already been - * completed by another thread. + * Lock the original channel to determine if the masquerade is + * still required. */ - while ((clonechan = ast_channel_masq(original)) && ast_channel_trylock(clonechan)) { - /* - * A masq is needed but we could not get the clonechan lock - * immediately. Since this function already holds the global - * container lock, unlocking original for deadlock avoidance - * will not result in any sort of masquerade race condition. If - * masq is called by a different thread while this happens, it - * will be stuck waiting until we unlock the container. - */ - CHANNEL_DEADLOCK_AVOIDANCE(original); - } + ast_channel_lock(original); - /* - * A final masq check must be done after deadlock avoidance for - * clonechan above or we could get a double masq. This is - * posible with ast_hangup at least. - */ + clonechan = ast_channel_masq(original); if (!clonechan) { - /* masq already completed by another thread, or never needed to be done to begin with */ + /* + * The masq is already completed by another thread or never + * needed to be done to begin with. + */ ast_channel_unlock(original); ao2_unlock(channels); return 0; } + /* Bump the refs to ensure that they won't dissapear on us. */ + ast_channel_ref(original); + ast_channel_ref(clonechan); + + /* unlink from channels container as name (which is the hash value) will change */ + ao2_unlink(channels, original); + ao2_unlink(channels, clonechan); + /* Get any transfer masquerade connected line exchange data. */ xfer_ds = ast_channel_datastore_find(original, &xfer_ds_info, NULL); if (xfer_ds) { @@ -6632,31 +6619,27 @@ int ast_do_masquerade(struct ast_channel *original) } /* - * Release any hold on the transferee channel before proceeding - * with the masquerade. + * Stop any visible indication on the original channel so we can + * transfer it to the clonechan taking the original's place. + */ + visible_indication = ast_channel_visible_indication(original); + ast_channel_unlock(original); + ast_indicate(original, -1); + + /* + * Release any hold on the transferee channel before going any + * further with the masquerade. */ if (xfer_colp && xfer_colp->transferee_held) { ast_indicate(clonechan, AST_CONTROL_UNHOLD); } - /* clear the masquerade channels */ - ast_channel_masq_set(original, NULL); - ast_channel_masqr_set(clonechan, NULL); - - /* unlink from channels container as name (which is the hash value) will change */ - ao2_unlink(channels, original); - ao2_unlink(channels, clonechan); + /* Start the masquerade channel contents rearangement. */ + ast_channel_lock_both(original, clonechan); ast_debug(4, "Actually Masquerading %s(%d) into the structure of %s(%d)\n", ast_channel_name(clonechan), ast_channel_state(clonechan), ast_channel_name(original), ast_channel_state(original)); - /* - * Stop any visible indiction on the original channel so we can - * transfer it to the clonechan taking the original's place. - */ - visible_indication = ast_channel_visible_indication(original); - ast_indicate(original, -1); - chans[0] = clonechan; chans[1] = original; ast_manager_event_multichan(EVENT_FLAG_CALL, "Masquerade", 2, chans, @@ -6666,8 +6649,12 @@ int ast_do_masquerade(struct ast_channel *original) "OriginalState: %s\r\n", ast_channel_name(clonechan), ast_state2str(ast_channel_state(clonechan)), ast_channel_name(original), ast_state2str(ast_channel_state(original))); - /* Having remembered the original read/write formats, we turn off any translation on either - one */ + /* + * Remember the original read/write formats. We turn off any + * translation on either one + */ + ast_format_copy(&rformat, ast_channel_readformat(original)); + ast_format_copy(&wformat, ast_channel_writeformat(original)); free_translation(clonechan); free_translation(original); @@ -6692,15 +6679,15 @@ int ast_do_masquerade(struct ast_channel *original) ast_channel_tech_set(original, ast_channel_tech(clonechan)); ast_channel_tech_set(clonechan, t); + t_pvt = ast_channel_tech_pvt(original); + ast_channel_tech_pvt_set(original, ast_channel_tech_pvt(clonechan)); + ast_channel_tech_pvt_set(clonechan, t_pvt); + /* Swap the cdrs */ cdr = ast_channel_cdr(original); ast_channel_cdr_set(original, ast_channel_cdr(clonechan)); ast_channel_cdr_set(clonechan, cdr); - t_pvt = ast_channel_tech_pvt(original); - ast_channel_tech_pvt_set(original, ast_channel_tech_pvt(clonechan)); - ast_channel_tech_pvt_set(clonechan, t_pvt); - /* Swap the alertpipes */ ast_channel_internal_alertpipe_swap(original, clonechan); @@ -6716,9 +6703,9 @@ int ast_do_masquerade(struct ast_channel *original) * old (clone) channel. */ { - AST_LIST_HEAD_NOLOCK(,ast_frame) tmp_readq; - AST_LIST_HEAD_SET_NOLOCK(&tmp_readq, NULL); + AST_LIST_HEAD_NOLOCK(, ast_frame) tmp_readq; + AST_LIST_HEAD_INIT_NOLOCK(&tmp_readq); AST_LIST_APPEND_LIST(&tmp_readq, ast_channel_readq(original), frame_list); AST_LIST_APPEND_LIST(ast_channel_readq(original), ast_channel_readq(clonechan), frame_list); @@ -6749,23 +6736,6 @@ int ast_do_masquerade(struct ast_channel *original) ast_channel_state_set(original, ast_channel_state(clonechan)); ast_channel_state_set(clonechan, origstate); - if (ast_channel_tech(clonechan)->fixup && ast_channel_tech(clonechan)->fixup(original, clonechan)) { - ast_log(LOG_WARNING, "Fixup failed on channel %s, strange things may happen.\n", ast_channel_name(clonechan)); - } - - /* Start by disconnecting the original's physical side */ - if (ast_channel_tech(clonechan)->hangup && ast_channel_tech(clonechan)->hangup(clonechan)) { - ast_log(LOG_WARNING, "Hangup failed! Strange things may happen!\n"); - res = -1; - goto done; - } - - /* - * We just hung up the physical side of the channel. Set the - * new zombie to use the kill channel driver for safety. - */ - ast_channel_tech_set(clonechan, &ast_kill_tech); - /* Mangle the name of the clone channel */ snprintf(zombn, sizeof(zombn), "%s", orig); /* quick, hide the brains! */ __ast_change_name_nolink(clonechan, zombn); @@ -6866,63 +6836,89 @@ int ast_do_masquerade(struct ast_channel *original) ast_debug(1, "Putting channel %s in %s/%s formats\n", ast_channel_name(original), ast_getformatname(&wformat), ast_getformatname(&rformat)); - /* Okay. Last thing is to let the channel driver know about all this mess, so he - can fix up everything as best as possible */ - if (ast_channel_tech(original)->fixup) { - if (ast_channel_tech(original)->fixup(clonechan, original)) { - ast_log(LOG_WARNING, "Channel for type '%s' could not fixup channel %s\n", - ast_channel_tech(original)->type, ast_channel_name(original)); - res = -1; - goto done; - } - } else - ast_log(LOG_WARNING, "Channel type '%s' does not have a fixup routine (for %s)! Bad things may happen.\n", + /* Fixup the original clonechan's physical side */ + if (ast_channel_tech(original)->fixup && ast_channel_tech(original)->fixup(clonechan, original)) { + ast_log(LOG_WARNING, "Channel type '%s' could not fixup channel %s, strange things may happen. (clonechan)\n", ast_channel_tech(original)->type, ast_channel_name(original)); + } + + /* Fixup the original original's physical side */ + if (ast_channel_tech(clonechan)->fixup && ast_channel_tech(clonechan)->fixup(original, clonechan)) { + ast_log(LOG_WARNING, "Channel type '%s' could not fixup channel %s, strange things may happen. (original)\n", + ast_channel_tech(clonechan)->type, ast_channel_name(clonechan)); + } /* - * If an indication is currently playing, maintain it on the channel - * that is taking the place of original + * Now, at this point, the "clone" channel is totally F'd up. + * We mark it as a zombie so nothing tries to touch it. If it's + * already been marked as a zombie, then we must free it (since + * it already is considered invalid). * - * This is needed because the masquerade is swapping out in the internals - * of this channel, and the new channel private data needs to be made - * aware of the current visible indication (RINGING, CONGESTION, etc.) + * This must be done before we unlock clonechan to prevent + * setting up another masquerade on the clonechan. */ - if (visible_indication) { - ast_indicate(original, visible_indication); - } - - /* Now, at this point, the "clone" channel is totally F'd up. We mark it as - a zombie so nothing tries to touch it. If it's already been marked as a - zombie, then free it now (since it already is considered invalid). */ if (ast_test_flag(ast_channel_flags(clonechan), AST_FLAG_ZOMBIE)) { - ast_debug(1, "Destroying channel clone '%s'\n", ast_channel_name(clonechan)); - ast_channel_unlock(clonechan); - ast_manager_event(clonechan, EVENT_FLAG_CALL, "Hangup", - "Channel: %s\r\n" - "Uniqueid: %s\r\n" - "Cause: %d\r\n" - "Cause-txt: %s\r\n", - ast_channel_name(clonechan), - ast_channel_uniqueid(clonechan), - ast_channel_hangupcause(clonechan), - ast_cause2str(ast_channel_hangupcause(clonechan)) - ); - clonechan = ast_channel_release(clonechan); + clone_was_zombie = 1; } else { - ast_debug(1, "Released clone lock on '%s'\n", ast_channel_name(clonechan)); ast_set_flag(ast_channel_flags(clonechan), AST_FLAG_ZOMBIE); ast_queue_frame(clonechan, &ast_null_frame); } + /* clear the masquerade channels */ + ast_channel_masq_set(original, NULL); + ast_channel_masqr_set(clonechan, NULL); + + /* + * When we unlock original here, it can be immediately setup to + * masquerade again or hungup. The new masquerade or hangup + * will not actually happen until we release the channels + * container lock. + */ + ast_channel_unlock(original); + + /* Disconnect the original original's physical side */ + if (ast_channel_tech(clonechan)->hangup && ast_channel_tech(clonechan)->hangup(clonechan)) { + ast_log(LOG_WARNING, "Hangup failed! Strange things may happen!\n"); + } else { + /* + * We just hung up the original original's physical side of the + * channel. Set the new zombie to use the kill channel driver + * for safety. + */ + ast_channel_tech_set(clonechan, &ast_kill_tech); + } + + ast_channel_unlock(clonechan); + + /* + * If an indication is currently playing, maintain it on the + * channel that is taking the place of original. + * + * This is needed because the masquerade is swapping out the + * internals of the channel, and the new channel private data + * needs to be made aware of the current visible indication + * (RINGING, CONGESTION, etc.) + */ + if (visible_indication) { + ast_indicate(original, visible_indication); + } + + ast_channel_lock(original); + /* Signal any blocker */ - if (ast_test_flag(ast_channel_flags(original), AST_FLAG_BLOCKING)) + if (ast_test_flag(ast_channel_flags(original), AST_FLAG_BLOCKING)) { pthread_kill(ast_channel_blocker(original), SIGURG); + } + ast_debug(1, "Done Masquerading %s (%d)\n", ast_channel_name(original), ast_channel_state(original)); if ((bridged = ast_bridged_channel(original))) { - ast_channel_lock(bridged); + ast_channel_ref(bridged); + ast_channel_unlock(original); ast_indicate(bridged, AST_CONTROL_SRCCHANGE); - ast_channel_unlock(bridged); + ast_channel_unref(bridged); + } else { + ast_channel_unlock(original); } ast_indicate(original, AST_CONTROL_SRCCHANGE); @@ -6935,24 +6931,42 @@ int ast_do_masquerade(struct ast_channel *original) masquerade_colp_transfer(original, xfer_colp); } -done: if (xfer_ds) { ast_datastore_free(xfer_ds); } - /* it is possible for the clone channel to disappear during this */ - if (clonechan) { - ast_channel_unlock(original); + + if (clone_was_zombie) { + ast_channel_lock(clonechan); + ast_debug(1, "Destroying channel clone '%s'\n", ast_channel_name(clonechan)); + ast_manager_event(clonechan, EVENT_FLAG_CALL, "Hangup", + "Channel: %s\r\n" + "Uniqueid: %s\r\n" + "Cause: %d\r\n" + "Cause-txt: %s\r\n", + ast_channel_name(clonechan), + ast_channel_uniqueid(clonechan), + ast_channel_hangupcause(clonechan), + ast_cause2str(ast_channel_hangupcause(clonechan)) + ); ast_channel_unlock(clonechan); - ao2_link(channels, clonechan); - ao2_link(channels, original); + + /* + * Drop the system reference to destroy the channel since it is + * already unlinked. + */ + ast_channel_unref(clonechan); } else { - ast_channel_unlock(original); - ao2_link(channels, original); + ao2_link(channels, clonechan); } + ao2_link(channels, original); ao2_unlock(channels); - return res; + /* Release our held safety references. */ + ast_channel_unref(original); + ast_channel_unref(clonechan); + + return 0; } void ast_set_callerid(struct ast_channel *chan, const char *cid_num, const char *cid_name, const char *cid_ani) -- cgit v1.2.3