mintrac: anatomy of a simple C webserver

This project was a way for me to learn more about socket programming and to practice other (relatively) low-level backend stuff (a fair bit of hand-written code for deserialisation, validation, parsing, etc…). It’s really two servers in one binary:

  • receiver: receives positional data via HTTP, my use case is to record trips via mobile applications like OsmAnd.
  • webapp: a web application that displays the positional data on a map, the frontend is vanilla JS using the Leaflet.js library.

Both servers use event loops, and we use libev to help with this. For the database, we use sqlite3.

Let’s start with main():

int main(int argc, char **argv)
{
	process_args(argc, argv);

	// default to applying start/stop to both webapp and receiver
	if (!RUN_TARGET) RUN_TARGET = BOTH;

	switch (RUN_MODE) {
	case INIT:
		mintrac_init();
		break;

	case START:
		if (mintrac_start() != MINTRAC_OK)
			return EXIT_FAILURE;
		break;

	case STOP:
		if (RUN_TARGET & WEBAPP)
			mintrac_term(WEBAPP_PIDFILE_PATH, "webapp");

		if (RUN_TARGET & RECEIVER)
			mintrac_term(RECEIVER_PIDFILE_PATH, "receiver");
		break;

	default:
		printf("%s", usage);
		return EXIT_FAILURE;
	}

	return EXIT_SUCCESS;
}

process_args() sets some global flags, among which is RUN_MODE, it can be INIT, START or STOP. INIT is just a first-time setup (creates the database). STOP sends a TERM signal to the process(es) running START, which cleans up and exits. Let’s look at START.

NB: I have left most error checking and logging in these snippets. die(), log_msg() and debug() are logging macros(die() logs and exits).

Flags set from the command line control whether the receiver, the webapp or both are started. The default is to start both, which is the most interesting case:

pid_t child = fork();
if (child < 0) die("[ERROR] failed to fork()!\n");

if (child) {
	if (sigaction(SIGINT, &webapp_sigact, NULL) < 0)
		die("[ERROR] webapp: failed to install SIGINT handler\n");

	if (mintrac_webapp_run() != MINTRAC_OK)
		return MINTRAC_ERR;
} else {
	if (sigaction(SIGINT, &receiver_sigact, NULL) < 0)
		die("[ERROR] receiver: failed to install SIGINT handler\n");

	if (mintrac_receiver_run() != MINTRAC_OK)
		return MINTRAC_ERR;
}

Since there is no data shared directly between the receiver and the webapp, we fork and run the webapp in the child process. This way if the webapp crashes, the receiver stays live, which is important since a receiver crash might mean a trip not getting recorded - clients usually do some form of caching and/or local recording, but still…

At this point we also want to handle SIGINT, so that we can exit gracefully from here on out. The callbacks simply call the appropriate termination command, they look something like this: (the webapp version is the same, just 's/receiver/webapp/'):

static void receiver_handle_sigint(int signo, siginfo_t *info, void *context)
{
	...
	mintrac_term(RECEIVER_PIDFILE_PATH, "receiver");
}

mintrac_term() is the main STOP function, it finds the pid of the process(es) running START from a .pid file and sends them a TERM signal.

The next step in startup are the _run functions (again, the webapp version is very similar):

int mintrac_receiver_run(void)
{
	pid_t pid = getpid();
	if (pidfile_create(RECEIVER_PIDFILE_PATH, pid, "receiver") != MINTRAC_OK)
		return MINTRAC_ERR;

	struct server *receiver = server_create("receiver", RECEIVER_PORT,
			RECEIVER_BUFFER_SIZE, RECEIVER_BACKLOG, RECEIVER_TIMEOUT);

	if (!receiver) {
		log_msg("[ERROR] receiver: failed to start\n");
		if (remove(RECEIVER_PIDFILE_PATH) < 0) {
			log_msg("[ERROR] receiver: failed to delete file (%s): %s\n",
			         RECEIVER_PIDFILE_PATH, strerror(errno));
		}
		return MINTRAC_ERR;
	}
	log_msg("[INFO] receiver: running on port %s\n", RECEIVER_PORT);

	server_start_accept_loop(receiver, receiver_connection_cb);

	free(receiver);
	return MINTRAC_OK;
}

First, we create the pidfile mintrac_term() looks for, we then instantiate a server with some parameters and start the main event loop. For reference, here is the server struct:

struct server {
	ev_io accept_watcher;
	const char *name; // for logging
	const char *port;
	struct ev_loop *accept_loop;
	struct connection *conns;
	void (*conn_cb)(struct ev_loop *loop, ev_io *evio_conn, int revents);
	size_t conn_buf_size;
	int fd, backlog;
	ev_tstamp timeout;
};

This is server creation:

struct server *server_create(const char *name, const char *port,
		size_t conn_buf_size, int backlog, ev_tstamp timeout)
{
	assert(name && port && conn_buf_size && backlog && timeout);
	struct server *srv = malloc(sizeof(*srv));

	if (!srv) return NULL;

	srv->name = name;
	srv->port = port;
	srv->backlog = backlog;
	srv->timeout = timeout;
	srv->conn_buf_size = conn_buf_size;

	struct addrinfo hints, *gai_result, *res;

	memset(&hints, 0, sizeof(hints));
	hints.ai_family    = AF_UNSPEC;   // IPV4 or IPV6
	hints.ai_socktype  = SOCK_STREAM; // TCP
	hints.ai_flags     = AI_PASSIVE;  // wildcard IP address
	hints.ai_protocol  = 0;           // any
	hints.ai_canonname = NULL;
	hints.ai_addr      = NULL;
	hints.ai_next      = NULL;

	int ret = getaddrinfo(NULL, port, &hints, &gai_result);
	if (ret != 0) {
		log_msg("[ERROR] %s: %s\n", name, gai_strerror(ret));
		return NULL;
	}

	for (res = gai_result; res; res = res->ai_next) {
		if ((srv->fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) < 0)
			continue; // try next address
		
		// setsockopt(srv->fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))

		if (bind(srv->fd, res->ai_addr, res->ai_addrlen) == 0)
			break;

		// failed bind: close socket and try next addr
		close(srv->fd);
	}
	freeaddrinfo(gai_result);

	// No address succeeded
	if (!res) {
		log_msg("[ERROR] %s: could not bind()\n", name);
		return NULL;
	}
	if (listen(srv->fd, backlog) != 0) {
		log_msg("[ERROR] %s: failed to listen() on socket: %s\n", name, strerror(errno));
		return NULL;
	}
	return srv;
}

For more detail on this section especially, see Beej’s Guide to Network Programming for a great guide on internet socket programming, here’s the short version:

  • We allocate our server structure and initialise it with the parameters we passed in.
  • We fill out a struct with some information to pass to getaddrinfo(), which will perform domain name translation: we pass it a host name (or string representing an IP address) and port (as a string) or service name (“http”, “ftp”, “ssh”, …) and it returns a linked list of struct addrinfos containing information on addresses matching our specifications.
  • We traverse the linked list, trying for each address to create a socket (socket()) and assign it a name (bind()). The setsockopt() is useful for testing and when the server might crash, more details in man pages and Beej’s Guide
  • When we manage to bind(), or we run out of addresses, we break the loop and free the heap allocated linked list. If we did manage to bind(), we now start to listen() for connections on the socket and return our initialised server.

So now we have our server, back to the caller:

int mintrac_receiver_run(void)
{
	pid_t pid = getpid();
	if (pidfile_create(RECEIVER_PIDFILE_PATH, pid, "receiver") != MINTRAC_OK)
		return MINTRAC_ERR;

	struct server *receiver = server_create("receiver", RECEIVER_PORT,
			RECEIVER_BUFFER_SIZE, RECEIVER_BACKLOG, RECEIVER_TIMEOUT);

	if (!receiver) {
		log_msg("[ERROR] receiver: failed to start\n");
		if (remove(RECEIVER_PIDFILE_PATH) < 0) {
			log_msg("[ERROR] receiver: failed to delete file (%s): %s\n",
			         RECEIVER_PIDFILE_PATH, strerror(errno));
		}
		return MINTRAC_ERR;
	}
	log_msg("[INFO] receiver: running on port %s\n", RECEIVER_PORT);

	server_start_accept_loop(receiver, receiver_connection_cb);

	free(receiver);
	return MINTRAC_OK;
}

We check for failure (in which case we delete the pidfile and exit) and log the result. server_start_accept_loop() will start our first event loop (for more info on libev see the great documentation). We pass it our server instance and a callback function, which we will look at in a moment. Note that we will keep polling for events indefinitely, so when we free() our server instance and return, it means we have received a termination signal and are shutting down.

void server_start_accept_loop(struct server *srv, void (*conn_cb)(struct ev_loop *, ev_io *, int))
{
	srv->accept_loop = EV_DEFAULT;
	srv->conn_cb = conn_cb;

	ev_signal sigterm_watcher;
	ev_signal_init(&sigterm_watcher, sigterm_cb, SIGTERM);
	ev_signal_start(srv->accept_loop, &sigterm_watcher);

	ev_io_init(&(srv->accept_watcher), server_accept_cb, srv->fd, EV_READ);
	ev_io_start(srv->accept_loop, &(srv->accept_watcher));
	ev_run(srv->accept_loop, 0);

	log_msg("[INFO] %s: received SIGTERM, exiting\n", srv->name);
}

We have libev create an instance of its default event loop and store a pointer to it in our struct server, we also store the callback we passed in. We set up a couple of watchers and associate them with our event loop:

  • one to poll for TERM signals, which is our cue to stop accept()ing connections (break the event loop).
  • the main one triggers our server_accept_cb callback when we can read from our server’s file descriptor.

We then start the event loop (note that again we only return on termination).

The termination callback simply breaks the loop:

static void sigterm_cb(EV_P_ ev_signal *sig, int events_received)
{
	ev_break(EV_A_ EVBREAK_ALL);
}

The accept() is more interesting:

static void server_accept_cb(EV_P_ ev_io *watcher, int events_received)
{
	int cfd = -1;
	struct sockaddr_storage caddr;
	socklen_t caddr_len = sizeof(caddr);

	struct server *srv = (struct server *)watcher;

	if ((cfd = accept(srv->fd, (struct sockaddr *)&caddr, &caddr_len)) < 0) {
		log_msg("[ERROR] %s: failed to accept()\n", srv->name);
		goto err;
	}
	debug("%s: got client\n", srv->name);
	
	struct connection conn;
	conn.loop = EV_DEFAULT;
	conn.fd = cfd;
	conn.buf_size = srv->conn_buf_size;
	conn.buf = malloc(conn.buf_size);
	if (!(conn.buf = malloc(conn.buf_size))) {
		log_msg("[ERROR] %s: failed to allocate memory for main connection buffer\n", srv->name);
		goto err;
	}
	ev_timer_init(&conn.timer, timeout_cb, 0., srv->timeout);
	ev_timer_again(conn.loop, &conn.timer);

	ev_io_init(&conn.watcher, srv->conn_cb, cfd, EV_READ);
	ev_io_start(conn.loop, &conn.watcher);
	ev_run(conn.loop, 0);

	ev_io_stop(conn.loop, &conn.watcher);
	ev_timer_stop(conn.loop, &conn.timer);

	close(cfd);
	free(conn.buf);
	return;

err:
	if (cfd > -1) close(cfd);
	if (conn.buf) free(conn.buf);
	return;
}

One of the first things we do is get our struct server * back, we can simply cast the ev_io * since it points to the first member of the struct, and therefore to the struct itself:

struct server {
	ev_io accept_watcher;
	...
};

Next we try to accept() a connection on the server’s socket. On success we initialise a struct connection, which will hold the newly acquired connected socket file descriptor and data related to the handling of requests and responses associated with it. Here is the struct:

struct connection {
	ev_io watcher;
	ev_timer timer;
	struct ev_loop *loop;
	char *buf;
	size_t buf_size;
	int fd;
};

The watcher is responsible for telling us when we can do IO on the connected socket. The timer is responsible for handling connection timeouts: we start a countdown when we run our connection event loop and reset it (ev_timer_again()) whenever we read from or write to the socket. loop is the event loop that the watcher and timer operate within; the buffer will hold the raw bytes from each client request; finally we have the file descriptor referring to the connected socket.

After initialising our connection struct, we set our timer and IO watcher and run the event loop.

The timeout callback is like the termination signal callback before, it just stops the event loop, although note that in this case we use EVBREAK_ONE to break out of only the innermost (connection) loop, not the server’s accept() loop:

static void timeout_cb(EV_P_ ev_timer *timer, int events_received)
{
	...
	ev_break(EV_A_ EVBREAK_ONE);
}

The connection callback is held by the server struct, and is set at initialisation, this of course differs a fair bit between webapp and receiver, here is what we do in the receiver:

static void receiver_connection_cb(struct ev_loop *loop, ev_io *watcher, int events_received)
{
	struct connection *conn = (struct connection *)watcher;
	struct http_request req;

	int res = http_populate_request(conn, &req, "receiver");
	if (res == MINTRAC_DONE || res == MINTRAC_ERR) return;

	struct datapoint dp;
	if (datapoint_deserialise(&dp, req.query, req.query_len) != MINTRAC_OK) {
		log_msg("[WARN] receiver: ignoring invalid datapoint\n");
		return;
	}
	if (DEBUG_MODE) {
		char dpstring[2048]; // big arbitrary size, guaranteed to be big enough
		debug("receiver: got datapoint:\n%s\n", datapoint_tostring(&dp, dpstring));
	}
	if (db_add_datapoint(&dp) != MINTRAC_OK)
		log_msg("[ERROR] receiver: failed to save datapoint to database\n");
}

First we use the same trick as we did with the server to get our struct connection * back from the ev_io *. We then try to read data from our connected socket, validate and parse the http request.

A valid request will include the positional data we are looking for: latitude and longitude, a timestamp, a username and some optional nice-to-have information, dilution of precision, device battery level, etc.

datapoint_deserialise() takes care of validating the data and uses it to construct an instance of our internal representation (struct datapoint), we then save the data point to the database (db_add_datapoint()).

This callback is the heart of the server, and for each connected client we will either be waiting to be able to read a request or be in here, acting on it. Once we are done (client disconnects or we timeout), we stop the event loop and clean up:

static void server_accept_cb(EV_P_ ev_io *watcher, int events_received)
{
	...
	ev_io_stop(conn.loop, &conn.watcher);
	ev_timer_stop(conn.loop, &conn.timer);

	close(cfd);
	free(conn.buf);
	return;
	...
}