From c3b6fcf028c55a9cba5c8fdaa743a993dce3c7fa Mon Sep 17 00:00:00 2001
From: Mark Michelson <mmichelson@digium.com>
Date: Thu, 10 Sep 2015 17:19:26 -0500
Subject: [PATCH] scheduler: Use queue for allocating sched IDs.

It has been observed that on long-running busy systems, a scheduler
context can eventually hit INT_MAX for its assigned IDs and end up
overflowing into a very low negative number. When this occurs, this can
result in odd behaviors, because a negative return is interpreted by
callers as being a failure. However, the item actually was successfully
scheduled. The result may be that a freed item remains in the scheduler,
resulting in a crash at some point in the future.

The scheduler can overflow because every time that an item is added to
the scheduler, a counter is bumped and that counter's current value is
assigned as the new item's ID.

This patch introduces a new method for assigning scheduler IDs. Instead
of assigning from a counter, a queue of available IDs is maintained.
When assigning a new ID, an ID is pulled from the queue. When a
scheduler item is released, its ID is pushed back onto the queue. This
way, IDs may be reused when they become available, and the growth of ID
numbers is directly related to concurrent activity within a scheduler
context rather than the uptime of the system.

Change-Id: I532708eef8f669d823457d7fefdad9a6078b99b2
---
 main/sched.c | 168 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 138 insertions(+), 30 deletions(-)

diff --git a/main/sched.c b/main/sched.c
index fa809e1722..4b52456c8a 100644
--- a/main/sched.c
+++ b/main/sched.c
@@ -65,9 +65,26 @@ ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
 
 AST_THREADSTORAGE(last_del_id);
 
+/*!
+ * \brief Scheduler ID holder
+ *
+ * These form a queue on a scheduler context. When a new
+ * scheduled item is created, a sched_id is popped off the
+ * queue and its id is assigned to the new scheduled item.
+ * When the scheduled task is complete, the sched_id on that
+ * task is then pushed to the back of the queue to be re-used
+ * on some future scheduled item.
+ */
+struct sched_id {
+	/*! Immutable ID number that is copied onto the scheduled task */
+	int id;
+	AST_LIST_ENTRY(sched_id) list;
+};
+
 struct sched {
 	AST_LIST_ENTRY(sched) list;
-	int id;                       /*!< ID number of event */
+	/*! The ID that has been popped off the scheduler context's queue */
+	struct sched_id *sched_id;
 	struct timeval when;          /*!< Absolute time event should take place */
 	int resched;                  /*!< When to reschedule */
 	int variable;                 /*!< Use return value from callback to reschedule */
@@ -95,6 +112,10 @@ struct ast_sched_context {
 	AST_LIST_HEAD_NOLOCK(, sched) schedc;   /*!< Cache of unused schedule structures and how many */
 	unsigned int schedccnt;
 #endif
+	/*! Queue of scheduler task IDs to assign */
+	AST_LIST_HEAD_NOLOCK(, sched_id) id_queue;
+	/*! The number of IDs in the id_queue */
+	int id_queue_size;
 };
 
 static void *sched_run(void *data)
@@ -192,13 +213,13 @@ static int sched_cmp(const void *a, const void *b)
 {
 	const struct sched *as = a;
 	const struct sched *bs = b;
-	return as->id != bs->id; /* return 0 on a match like strcmp would */
+	return as->sched_id->id != bs->sched_id->id; /* return 0 on a match like strcmp would */
 }
 
 static unsigned int sched_hash(const void *obj)
 {
 	const struct sched *s = obj;
-	unsigned int h = s->id;
+	unsigned int h = s->sched_id->id;
 	return h;
 }
 
@@ -219,6 +240,7 @@ struct ast_sched_context *ast_sched_context_create(void)
 	tmp->eventcnt = 1;
 
 	tmp->schedq_ht = ast_hashtab_create(23, sched_cmp, ast_hashtab_resize_java, ast_hashtab_newsize_java, sched_hash, 1);
+	AST_LIST_HEAD_INIT_NOLOCK(&tmp->id_queue);
 
 	if (!(tmp->sched_heap = ast_heap_create(8, sched_time_cmp,
 			offsetof(struct sched, __heap_index)))) {
@@ -229,9 +251,20 @@ struct ast_sched_context *ast_sched_context_create(void)
 	return tmp;
 }
 
+static void sched_free(struct sched *task)
+{
+	/* task->sched_id will be NULL most of the time, but when the
+	 * scheduler context shuts down, it will free all scheduled
+	 * tasks, and in that case, the task->sched_id will be non-NULL
+	 */
+	ast_free(task->sched_id);
+	ast_free(task);
+}
+
 void ast_sched_context_destroy(struct ast_sched_context *con)
 {
 	struct sched *s;
+	struct sched_id *sid;
 
 	sched_thread_destroy(con);
 	con->sched_thread = NULL;
@@ -240,13 +273,13 @@ void ast_sched_context_destroy(struct ast_sched_context *con)
 
 #ifdef SCHED_MAX_CACHE
 	while ((s = AST_LIST_REMOVE_HEAD(&con->schedc, list))) {
-		ast_free(s);
+		sched_free(s);
 	}
 #endif
 
 	if (con->sched_heap) {
 		while ((s = ast_heap_pop(con->sched_heap))) {
-			ast_free(s);
+			sched_free(s);
 		}
 		ast_heap_destroy(con->sched_heap);
 		con->sched_heap = NULL;
@@ -254,6 +287,9 @@ void ast_sched_context_destroy(struct ast_sched_context *con)
 
 	ast_hashtab_destroy(con->schedq_ht, NULL);
 	con->schedq_ht = NULL;
+	while ((sid = AST_LIST_REMOVE_HEAD(&con->id_queue, list))) {
+		ast_free(sid);
+	}
 
 	ast_mutex_unlock(&con->lock);
 	ast_mutex_destroy(&con->lock);
@@ -261,31 +297,65 @@ void ast_sched_context_destroy(struct ast_sched_context *con)
 	ast_free(con);
 }
 
-static struct sched *sched_alloc(struct ast_sched_context *con)
-{
-	struct sched *tmp;
+#define ID_QUEUE_INCREMENT 16
 
-	/*
-	 * We keep a small cache of schedule entries
-	 * to minimize the number of necessary malloc()'s
+/*!
+ * \brief Add new scheduler IDs to the queue.
+ *
+ * \retval The number of IDs added to the queue
+ */
+static int add_ids(struct ast_sched_context *con)
+{
+	int new_size;
+	int original_size;
+	int i;
+
+	original_size = con->id_queue_size;
+	/* So we don't go overboard with the mallocs here, we'll just up
+	 * the size of the list by a fixed amount each time instead of
+	 * multiplying the size by any particular factor
 	 */
-#ifdef SCHED_MAX_CACHE
-	if ((tmp = AST_LIST_REMOVE_HEAD(&con->schedc, list)))
-		con->schedccnt--;
-	else
-#endif
-		tmp = ast_calloc(1, sizeof(*tmp));
+	new_size = original_size + ID_QUEUE_INCREMENT;
+	if (new_size < 0) {
+		/* Overflow. Cap it at INT_MAX. */
+		new_size = INT_MAX;
+	}
+	for (i = original_size; i < new_size; ++i) {
+		struct sched_id *new_id;
 
-	return tmp;
+		new_id = ast_calloc(1, sizeof(*new_id));
+		if (!new_id) {
+			break;
+		}
+		new_id->id = i;
+		AST_LIST_INSERT_TAIL(&con->id_queue, new_id, list);
+		++con->id_queue_size;
+	}
+
+	return con->id_queue_size - original_size;
+}
+
+static int set_sched_id(struct ast_sched_context *con, struct sched *new_sched)
+{
+	if (AST_LIST_EMPTY(&con->id_queue) && (add_ids(con) == 0)) {
+		return -1;
+	}
+
+	new_sched->sched_id = AST_LIST_REMOVE_HEAD(&con->id_queue, list);
+	return 0;
 }
 
 static void sched_release(struct ast_sched_context *con, struct sched *tmp)
 {
+	if (tmp->sched_id) {
+		AST_LIST_INSERT_TAIL(&con->id_queue, tmp->sched_id, list);
+		tmp->sched_id = NULL;
+	}
+
 	/*
 	 * Add to the cache, or just free() if we
 	 * already have too many cache entries
 	 */
-
 #ifdef SCHED_MAX_CACHE
 	if (con->schedccnt < SCHED_MAX_CACHE) {
 		AST_LIST_INSERT_HEAD(&con->schedc, tmp, list);
@@ -295,6 +365,34 @@ static void sched_release(struct ast_sched_context *con, struct sched *tmp)
 		ast_free(tmp);
 }
 
+static struct sched *sched_alloc(struct ast_sched_context *con)
+{
+	struct sched *tmp;
+
+	/*
+	 * We keep a small cache of schedule entries
+	 * to minimize the number of necessary malloc()'s
+	 */
+#ifdef SCHED_MAX_CACHE
+	if ((tmp = AST_LIST_REMOVE_HEAD(&con->schedc, list))) {
+		con->schedccnt--;
+	} else
+#endif
+	{
+		tmp = ast_calloc(1, sizeof(*tmp));
+		if (!tmp) {
+			return NULL;
+		}
+	}
+
+	if (set_sched_id(con, tmp)) {
+		sched_release(con, tmp);
+		return NULL;
+	}
+
+	return tmp;
+}
+
 /*! \brief
  * Return the number of milliseconds
  * until the next scheduled event
@@ -331,7 +429,8 @@ static void schedule(struct ast_sched_context *con, struct sched *s)
 	ast_heap_push(con->sched_heap, s);
 
 	if (!ast_hashtab_insert_safe(con->schedq_ht, s)) {
-		ast_log(LOG_WARNING,"Schedule Queue entry %d is already in table!\n", s->id);
+		ast_log(LOG_WARNING,"Schedule Queue entry %d is already in table!\n",
+			s->sched_id->id);
 	}
 
 	con->schedcnt++;
@@ -380,7 +479,7 @@ int ast_sched_add_variable(struct ast_sched_context *con, int when, ast_sched_cb
 
 	ast_mutex_lock(&con->lock);
 	if ((tmp = sched_alloc(con))) {
-		tmp->id = con->eventcnt++;
+		con->eventcnt++;
 		tmp->callback = callback;
 		tmp->data = data;
 		tmp->resched = when;
@@ -390,7 +489,7 @@ int ast_sched_add_variable(struct ast_sched_context *con, int when, ast_sched_cb
 			sched_release(con, tmp);
 		} else {
 			schedule(con, tmp);
-			res = tmp->id;
+			res = tmp->sched_id->id;
 		}
 	}
 #ifdef DUMP_SCHEDULER
@@ -421,8 +520,10 @@ int ast_sched_add(struct ast_sched_context *con, int when, ast_sched_cb callback
 
 const void *ast_sched_find_data(struct ast_sched_context *con, int id)
 {
+	struct sched_id tmp_id;
 	struct sched tmp,*res;
-	tmp.id = id;
+	tmp_id.id = id;
+	tmp.sched_id = &tmp_id;
 	res = ast_hashtab_lookup(con->schedq_ht, &tmp);
 	if (res)
 		return res->data;
@@ -441,9 +542,12 @@ int ast_sched_del(struct ast_sched_context *con, int id)
 int _ast_sched_del(struct ast_sched_context *con, int id, const char *file, int line, const char *function)
 #endif
 {
-	struct sched *s, tmp = {
+	struct sched_id tmp_id = {
 		.id = id,
 	};
+	struct sched *s, tmp = {
+		.sched_id = &tmp_id,
+	};
 	int *last_id = ast_threadstorage_get(&last_del_id, sizeof(int));
 
 	DEBUG(ast_debug(1, "ast_sched_del(%d)\n", id));
@@ -456,11 +560,12 @@ int _ast_sched_del(struct ast_sched_context *con, int id, const char *file, int
 	s = ast_hashtab_lookup(con->schedq_ht, &tmp);
 	if (s) {
 		if (!ast_heap_remove(con->sched_heap, s)) {
-			ast_log(LOG_WARNING,"sched entry %d not in the sched heap?\n", s->id);
+			ast_log(LOG_WARNING,"sched entry %d not in the sched heap?\n", s->sched_id->id);
 		}
 
 		if (!ast_hashtab_remove_this_object(con->schedq_ht, s)) {
-			ast_log(LOG_WARNING,"Found sched entry %d, then couldn't remove it?\n", s->id);
+			ast_log(LOG_WARNING,"Found sched entry %d, then couldn't remove it?\n",
+				s->sched_id->id);
 		}
 
 		con->schedcnt--;
@@ -558,7 +663,7 @@ void ast_sched_dump(struct ast_sched_context *con)
 		q = ast_heap_peek(con->sched_heap, x);
 		delta = ast_tvsub(q->when, when);
 		ast_debug(1, "|%.4d | %-15p | %-15p | %.6ld : %.6ld |\n",
-			q->id,
+			q->sched_id->id,
 			q->callback,
 			q->data,
 			(long)delta.tv_sec,
@@ -596,7 +701,8 @@ int ast_sched_runq(struct ast_sched_context *con)
 		current = ast_heap_pop(con->sched_heap);
 
 		if (!ast_hashtab_remove_this_object(con->schedq_ht, current)) {
-			ast_log(LOG_ERROR,"Sched entry %d was in the schedq list but not in the hashtab???\n", current->id);
+			ast_log(LOG_ERROR,"Sched entry %d was in the schedq list but not in the hashtab???\n",
+				current->sched_id->id);
 		}
 
 		con->schedcnt--;
@@ -637,14 +743,16 @@ int ast_sched_runq(struct ast_sched_context *con)
 
 long ast_sched_when(struct ast_sched_context *con,int id)
 {
+	struct sched_id tmp_id;
 	struct sched *s, tmp;
 	long secs = -1;
 	DEBUG(ast_debug(1, "ast_sched_when()\n"));
 
 	ast_mutex_lock(&con->lock);
 
-	/* these next 2 lines replace a lookup loop */
-	tmp.id = id;
+	/* these next 3 lines replace a lookup loop */
+	tmp_id.id = id;
+	tmp.sched_id = &tmp_id;
 	s = ast_hashtab_lookup(con->schedq_ht, &tmp);
 
 	if (s) {
-- 
GitLab