diff --git a/channels/chan_sip.c b/channels/chan_sip.c
index 3fdc3caaeb9e17c98287fed79bbe93c939f50eeb..8f76e9cc31c3aba13b454cec934bd2cf65252b11 100644
--- a/channels/chan_sip.c
+++ b/channels/chan_sip.c
@@ -11095,7 +11095,7 @@ static int process_sdp_a_audio(const char *a, struct sip_pvt *p, struct ast_rtp_
 
 		if (framing && p->autoframing) {
 			ast_debug(1, "Setting framing to %ld\n", framing);
-			ast_rtp_codecs_set_framing(ast_rtp_instance_get_codecs(p->rtp), framing);
+			ast_format_cap_set_framing(p->caps, framing);
 		}
 		found = TRUE;
 	} else if (sscanf(a, "rtpmap: %30u %127[^/]/%30u", &codec, mimeSubtype, &sample_rate) == 3) {
@@ -13384,6 +13384,11 @@ static enum sip_result add_sdp(struct sip_request *resp, struct sip_pvt *p, int
 			ast_str_append(&a_audio, 0, "a=maxptime:%d\r\n", max_audio_packet_size);
 		}
 
+		if (!ast_test_flag(&p->flags[0], SIP_OUTGOING)) {
+			ast_debug(1, "Setting framing on incoming call: %u\n", min_audio_packet_size);
+			ast_rtp_codecs_set_framing(ast_rtp_instance_get_codecs(p->rtp), min_audio_packet_size);
+		}
+
 		if (!doing_directmedia) {
 			if (ast_test_flag(&p->flags[2], SIP_PAGE3_ICE_SUPPORT)) {
 				add_ice_to_sdp(p->rtp, &a_audio);
@@ -13676,10 +13681,6 @@ static int transmit_response_with_sdp(struct sip_pvt *p, const char *msg, const
 		add_cc_call_info_to_response(p, &resp);
 	}
 	if (p->rtp) {
-		if (!p->autoframing && !ast_test_flag(&p->flags[0], SIP_OUTGOING)) {
-			ast_debug(1, "Setting framing from config on incoming call\n");
-			ast_rtp_codecs_set_framing(ast_rtp_instance_get_codecs(p->rtp), ast_format_cap_get_framing(p->caps));
-		}
 		ast_rtp_instance_activate(p->rtp);
 		try_suggested_sip_codec(p);
 		if (p->t38.state == T38_ENABLED) {