Sockets & Networking
The socket API is how programs communicate over a network. Every web server, database client, chat application, and networked tool uses sockets. The API was designed for C and has barely changed since the 1980s. Every modern networking library — in any language — wraps these same system calls. Understanding sockets means understanding how all network communication works.
The Socket API
A socket is a file descriptor that represents a network endpoint. The core functions are:
socket— create a socketbind— assign an address to a socketlisten— mark a socket as passive (server)accept— wait for and accept a connectionconnect— initiate a connection (client)send/recv— transfer dataclose— close the socket
A TCP Server
The canonical TCP server in C follows this sequence: create, bind, listen, accept, communicate, close.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#define PORT 8080
#define BACKLOG 10
#define BUFFER_SIZE 1024
int main(void) {
/* Create a TCP socket */
int server_fd = socket(AF_INET, SOCK_STREAM, 0);
if (server_fd < 0) {
perror("socket");
return 1;
}
/* Allow address reuse (avoids "address already in use" error) */
int opt = 1;
setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
/* Bind to port */
struct sockaddr_in addr;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = INADDR_ANY; /* Listen on all interfaces */
addr.sin_port = htons(PORT);
if (bind(server_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
perror("bind");
close(server_fd);
return 1;
}
/* Start listening */
if (listen(server_fd, BACKLOG) < 0) {
perror("listen");
close(server_fd);
return 1;
}
printf("Server listening on port %d\n", PORT);
/* Accept and handle connections */
while (1) {
struct sockaddr_in client_addr;
socklen_t client_len = sizeof(client_addr);
int client_fd = accept(server_fd,
(struct sockaddr *)&client_addr,
&client_len);
if (client_fd < 0) {
perror("accept");
continue;
}
char client_ip[INET_ADDRSTRLEN];
inet_ntop(AF_INET, &client_addr.sin_addr, client_ip, sizeof(client_ip));
printf("Connection from %s:%d\n", client_ip, ntohs(client_addr.sin_port));
/* Echo received data back */
char buffer[BUFFER_SIZE];
ssize_t bytes_read;
while ((bytes_read = recv(client_fd, buffer, sizeof(buffer) - 1, 0)) > 0) {
buffer[bytes_read] = '\0';
printf("Received: %s", buffer);
send(client_fd, buffer, bytes_read, 0);
}
close(client_fd);
printf("Client disconnected\n");
}
close(server_fd);
return 0;
}
Building & Testing
gcc -o server server.c
./server &
echo "Hello" | nc localhost 8080
Server listening on port 8080
Connection from 127.0.0.1:54321
Received: Hello
Client disconnected
A TCP Client
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
int main(void) {
int sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock < 0) {
perror("socket");
return 1;
}
struct sockaddr_in server_addr;
memset(&server_addr, 0, sizeof(server_addr));
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(8080);
inet_pton(AF_INET, "127.0.0.1", &server_addr.sin_addr);
if (connect(sock, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0) {
perror("connect");
close(sock);
return 1;
}
const char *message = "Hello, server!\n";
send(sock, message, strlen(message), 0);
char buffer[1024];
ssize_t bytes = recv(sock, buffer, sizeof(buffer) - 1, 0);
if (bytes > 0) {
buffer[bytes] = '\0';
printf("Server replied: %s", buffer);
}
close(sock);
return 0;
}
IPv4 & IPv6 with getaddrinfo
Hardcoding AF_INET and struct sockaddr_in locks you to IPv4. The modern approach uses getaddrinfo, which handles both IPv4 and IPv6 transparently.
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
int connect_to(const char *host, const char *port) {
struct addrinfo hints, *res, *p;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_UNSPEC; /* IPv4 or IPv6 */
hints.ai_socktype = SOCK_STREAM; /* TCP */
int status = getaddrinfo(host, port, &hints, &res);
if (status != 0) {
fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(status));
return -1;
}
int sock = -1;
for (p = res; p != NULL; p = p->ai_next) {
sock = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
if (sock < 0) continue;
if (connect(sock, p->ai_addr, p->ai_addrlen) == 0) {
break; /* Connected */
}
close(sock);
sock = -1;
}
freeaddrinfo(res);
return sock;
}
getaddrinfo resolves hostnames, handles DNS, and returns a linked list of addresses to try. This is the correct way to write network code that works with both IPv4 and IPv6.
Handling Partial Reads & Writes
send and recv do not guarantee they transfer the entire buffer. You must handle partial transfers.
ssize_t send_all(int sock, const void *buf, size_t len) {
const char *ptr = buf;
size_t remaining = len;
while (remaining > 0) {
ssize_t sent = send(sock, ptr, remaining, 0);
if (sent <= 0) {
return sent; /* Error or connection closed */
}
ptr += sent;
remaining -= sent;
}
return len;
}
ssize_t recv_all(int sock, void *buf, size_t len) {
char *ptr = buf;
size_t remaining = len;
while (remaining > 0) {
ssize_t received = recv(sock, ptr, remaining, 0);
if (received <= 0) {
return received;
}
ptr += received;
remaining -= received;
}
return len;
}
TCP is a stream protocol, not a message protocol. There are no message boundaries. If you send 1000 bytes, the receiver might get 500 bytes in the first recv and 500 in the second, or 1000 in one call, or any other split.
Non-Blocking I/O
The server above handles one client at a time. While serving one client, all others wait. Non-blocking I/O with select, poll, or epoll handles multiple clients concurrently in a single thread.
select
#include <sys/select.h>
fd_set read_fds;
FD_ZERO(&read_fds);
FD_SET(server_fd, &read_fds);
int max_fd = server_fd;
/* Add all connected clients */
for (int i = 0; i < num_clients; i++) {
FD_SET(clients[i], &read_fds);
if (clients[i] > max_fd) max_fd = clients[i];
}
/* Wait for activity on any socket */
int activity = select(max_fd + 1, &read_fds, NULL, NULL, NULL);
if (FD_ISSET(server_fd, &read_fds)) {
/* New connection ready to accept */
}
for (int i = 0; i < num_clients; i++) {
if (FD_ISSET(clients[i], &read_fds)) {
/* Client has data to read */
}
}
poll
poll improves on select by removing the file descriptor limit.
#include <poll.h>
struct pollfd fds[MAX_CLIENTS + 1];
fds[0].fd = server_fd;
fds[0].events = POLLIN;
int nfds = 1;
int activity = poll(fds, nfds, -1); /* -1 = block indefinitely */
if (fds[0].revents & POLLIN) {
/* New connection */
}
epoll (Linux)
epoll scales to hundreds of thousands of connections. It is the foundation of high-performance servers like Nginx and Redis.
#include <sys/epoll.h>
int epoll_fd = epoll_create1(0);
struct epoll_event ev;
ev.events = EPOLLIN;
ev.data.fd = server_fd;
epoll_ctl(epoll_fd, EPOLL_CTL_ADD, server_fd, &ev);
struct epoll_event events[MAX_EVENTS];
int n = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
for (int i = 0; i < n; i++) {
if (events[i].data.fd == server_fd) {
/* Accept new connection */
} else {
/* Handle client data */
}
}
The C10K Problem
The C10K problem asks: how do you handle 10,000 simultaneous connections? A thread-per-connection model fails at this scale because each thread consumes memory for its stack. The solution is event-driven I/O: a single thread multiplexes all connections using epoll (Linux), kqueue (macOS/BSD), or IOCP (Windows).
This is why every high-performance network library — libevent, libuv, Nginx — wraps these same system calls. Node.js's event loop is built on libuv. Redis is single-threaded with epoll. Understanding sockets and I/O multiplexing explains how all of them work.
Common Pitfalls
- Forgetting htons/htonl — Network byte order is big-endian. Port numbers and IP addresses must be converted with
htons(host to network short) andhtonl(host to network long). Forgetting this produces wrong port numbers. - Not handling partial send/recv — TCP does not guarantee message boundaries. Always loop on
sendandrecvuntil all data is transferred. - Ignoring SIGPIPE — Writing to a closed connection sends SIGPIPE, which kills your server by default. Either ignore SIGPIPE (
signal(SIGPIPE, SIG_IGN)) or useMSG_NOSIGNALflag withsend. - Address already in use — A server that crashes and restarts immediately gets "address already in use." Set
SO_REUSEADDRbeforebindto allow reuse. - Hardcoding IPv4 — Use
getaddrinfowithAF_UNSPECinstead ofAF_INET. IPv6 is the present, not the future. - Blocking accept in a single-threaded server — Without I/O multiplexing,
acceptblocks the entire server. Useselect,poll, orepollfor concurrent connection handling. - Not closing file descriptors — Every
acceptcreates a new file descriptor. Failing tocloseclient sockets leaks file descriptors until the process hits its limit.
Key Takeaways
- The socket API (
socket,bind,listen,accept,connect,send,recv) is the foundation of all network programming in every language. - A TCP server creates a socket, binds to a port, listens for connections, and accepts clients. A TCP client creates a socket and connects.
- Use
getaddrinfoinstead of hardcoded IPv4 addresses. It handles DNS resolution and IPv4/IPv6 transparently. - TCP is a stream protocol. Always handle partial reads and writes with loops.
- I/O multiplexing (
select,poll,epoll) handles thousands of concurrent connections in a single thread. This is how high-performance servers work. - Every networking library wraps these same C system calls. Understanding sockets means understanding how Nginx, Redis, Node.js, and every HTTP client work underneath.