diff --git a/main/channel.c b/main/channel.c
index ecebc70625aa2881f882cb9e94e374c3d310210c..a7e158d32924342f783c904e905597f76fdb029f 100644
--- a/main/channel.c
+++ b/main/channel.c
@@ -2537,9 +2537,16 @@ static struct ast_frame *__ast_read(struct ast_channel *chan, int dropaudio)
 			}
 			break;
 		case AST_FRAME_NULL:
+			/* The EMULATE_DTMF flag must be cleared here as opposed to when the duration
+			 * is reached , because we want to make sure we pass at least one
+			 * voice frame through before starting the next digit, to ensure a gap
+			 * between DTMF digits. */
 			if (ast_test_flag(chan, AST_FLAG_EMULATE_DTMF)) {
 				struct timeval now = ast_tvnow();
-				if (ast_tvdiff_ms(now, chan->dtmf_tv) >= chan->emulate_dtmf_duration) {
+				if (!chan->emulate_dtmf_duration) {
+					ast_clear_flag(chan, AST_FLAG_EMULATE_DTMF);
+					chan->emulate_dtmf_digit = 0;
+				} else if (ast_tvdiff_ms(now, chan->dtmf_tv) >= chan->emulate_dtmf_duration) {
 					chan->emulate_dtmf_duration = 0;
 					ast_frfree(f);
 					f = &chan->dtmff;