3 min read
On this page

Unions & Enums

Unions: Shared Memory

A union looks like a struct, but all its fields occupy the same memory. The size of a union equals the size of its largest field.

#include <stdio.h>

union Value {
    int i;
    float f;
    char bytes[4];
};

int main(void) {
    printf("sizeof(union Value): %zu\n", sizeof(union Value));

    union Value v;
    v.i = 42;
    printf("As int: %d\n", v.i);

    v.f = 3.14f;
    printf("As float: %f\n", v.f);
    printf("As int after setting float: %d (garbage)\n", v.i);

    return 0;
}
sizeof(union Value): 4
As int: 42
As float: 3.140000
As int after setting float: 1078523331 (garbage)

Only the last-written field contains a meaningful value. Reading a different field produces whatever bit pattern happens to be there.

Enums: Named Integer Constants

An enum defines a set of named integer constants:

enum Color {
    COLOR_RED,      /* 0 */
    COLOR_GREEN,    /* 1 */
    COLOR_BLUE,     /* 2 */
    COLOR_COUNT     /* 3 - useful for array sizing */
};

By default, values start at 0 and increment. You can set explicit values:

enum HttpStatus {
    HTTP_OK = 200,
    HTTP_NOT_FOUND = 404,
    HTTP_INTERNAL_ERROR = 500,
};

Enums are integers under the hood. The compiler does not enforce that a variable of an enum type holds a valid enumerator, so you must validate inputs yourself.

const char *color_name(enum Color c) {
    switch (c) {
    case COLOR_RED:   return "red";
    case COLOR_GREEN: return "green";
    case COLOR_BLUE:  return "blue";
    default:          return "unknown";
    }
}

Tagged Unions: Type-Safe Variants

A tagged union pairs an enum (the tag) with a union (the data). This is the C idiom for a value that can be one of several types.

#include <stdio.h>
#include <string.h>

enum ValueType {
    VAL_INT,
    VAL_FLOAT,
    VAL_STRING,
};

struct Value {
    enum ValueType type;
    union {
        int i;
        double f;
        char s[64];
    } data;
};

void print_value(const struct Value *v) {
    switch (v->type) {
    case VAL_INT:
        printf("int: %d\n", v->data.i);
        break;
    case VAL_FLOAT:
        printf("float: %f\n", v->data.f);
        break;
    case VAL_STRING:
        printf("string: \"%s\"\n", v->data.s);
        break;
    }
}

int main(void) {
    struct Value values[3];

    values[0].type = VAL_INT;
    values[0].data.i = 42;

    values[1].type = VAL_FLOAT;
    values[1].data.f = 2.718;

    values[2].type = VAL_STRING;
    strncpy(values[2].data.s, "hello", sizeof(values[2].data.s) - 1);
    values[2].data.s[sizeof(values[2].data.s) - 1] = '\0';

    for (int i = 0; i < 3; i++) {
        print_value(&values[i]);
    }

    return 0;
}
int: 42
float: 2.718000
string: "hello"

The tag tells you which union field is valid. Always check the tag before accessing data.

Bit Fields for Compact Flags

Bit fields let you pack multiple small values into a single integer:

#include <stdio.h>

struct Permissions {
    unsigned int read    : 1;
    unsigned int write   : 1;
    unsigned int execute : 1;
    unsigned int admin   : 1;
};

int main(void) {
    struct Permissions p = {.read = 1, .write = 1, .execute = 0, .admin = 0};

    printf("sizeof(Permissions): %zu\n", sizeof(struct Permissions));
    printf("read=%u write=%u exec=%u admin=%u\n",
           p.read, p.write, p.execute, p.admin);

    p.admin = 1;
    printf("After granting admin: admin=%u\n", p.admin);

    return 0;
}
sizeof(Permissions): 4
read=1 write=1 exec=0 admin=0
After granting admin: admin=1

Bit fields save memory when you have many boolean or small-integer flags. The exact layout is implementation-defined, so do not use them in file formats or network protocols.

The Union Trick for Type Punning

Type punning means reinterpreting the bits of one type as another. A common trick is to use a union:

#include <stdio.h>
#include <stdint.h>

union FloatBits {
    float f;
    uint32_t u;
};

void inspect_float(float val) {
    union FloatBits fb;
    fb.f = val;

    uint32_t sign = (fb.u >> 31) & 1;
    uint32_t exponent = (fb.u >> 23) & 0xFF;
    uint32_t mantissa = fb.u & 0x7FFFFF;

    printf("%.6f => sign=%u exponent=%u mantissa=0x%06X\n",
           val, sign, exponent, mantissa);
}

int main(void) {
    inspect_float(1.0f);
    inspect_float(-0.5f);
    inspect_float(0.0f);
    return 0;
}
1.000000 => sign=0 exponent=127 mantissa=0x000000
-0.500000 => sign=1 exponent=126 mantissa=0x000000
0.000000 => sign=0 exponent=0 mantissa=0x000000

Strictly speaking, reading a union member that was not the last one written is undefined behavior in C99, though most compilers define it. C11 allows type punning through unions as a common extension. The safe portable alternative is memcpy.

Real-World Example: Network Protocol Parsing

Network protocols often have a fixed header followed by variable payloads. Tagged unions model this naturally.

#include <stdio.h>
#include <stdint.h>
#include <string.h>

enum MessageType {
    MSG_LOGIN = 1,
    MSG_CHAT = 2,
    MSG_DISCONNECT = 3,
};

struct LoginPayload {
    char username[32];
    char password[32];
};

struct ChatPayload {
    char sender[32];
    char text[256];
};

struct Message {
    uint8_t version;
    uint8_t type;
    uint16_t length;
    union {
        struct LoginPayload login;
        struct ChatPayload chat;
        /* disconnect has no payload */
    } payload;
};

void handle_message(const struct Message *msg) {
    switch (msg->type) {
    case MSG_LOGIN:
        printf("Login from: %s\n", msg->payload.login.username);
        break;
    case MSG_CHAT:
        printf("[%s]: %s\n", msg->payload.chat.sender,
               msg->payload.chat.text);
        break;
    case MSG_DISCONNECT:
        printf("Client disconnected\n");
        break;
    default:
        printf("Unknown message type: %d\n", msg->type);
        break;
    }
}

int main(void) {
    struct Message msg = {0};
    msg.version = 1;
    msg.type = MSG_CHAT;
    strncpy(msg.payload.chat.sender, "Alice", 31);
    strncpy(msg.payload.chat.text, "Hello, server!", 255);

    handle_message(&msg);
    return 0;
}
[Alice]: Hello, server!

Real-World Example: A Simple JSON-Like Variant Type

#include <stdio.h>
#include <string.h>

enum JsonType {
    JSON_NULL,
    JSON_BOOL,
    JSON_NUMBER,
    JSON_STRING,
};

struct JsonValue {
    enum JsonType type;
    union {
        int boolean;
        double number;
        char string[128];
    } as;
};

struct JsonValue json_null(void) {
    return (struct JsonValue){.type = JSON_NULL};
}

struct JsonValue json_number(double n) {
    return (struct JsonValue){.type = JSON_NUMBER, .as.number = n};
}

struct JsonValue json_string(const char *s) {
    struct JsonValue v = {.type = JSON_STRING};
    strncpy(v.as.string, s, sizeof(v.as.string) - 1);
    return v;
}

void json_print(const struct JsonValue *v) {
    switch (v->type) {
    case JSON_NULL:   printf("null");            break;
    case JSON_BOOL:   printf(v->as.boolean ? "true" : "false"); break;
    case JSON_NUMBER: printf("%g", v->as.number); break;
    case JSON_STRING: printf("\"%s\"", v->as.string); break;
    }
}

int main(void) {
    struct JsonValue vals[] = {
        json_null(),
        json_number(3.14),
        json_string("hello"),
    };

    for (int i = 0; i < 3; i++) {
        json_print(&vals[i]);
        printf("\n");
    }
    return 0;
}
null
3.14
"hello"

Common Pitfalls

  • Reading the wrong union field. Only the last-written field is valid. Always use a tag to track which field is active.
  • Assuming enum values are contiguous. If you assign explicit values (like HTTP status codes), you cannot iterate over them with a simple loop.
  • Bit field portability. The layout, signedness, and maximum width of bit fields are implementation-defined. Do not use them for serialization.
  • Forgetting the default case. When switching on an enum, always include a default case to handle unexpected values, especially when the enum comes from external input.
  • Union size surprises. A union with a large field (like a char[256]) makes every instance that large, even when the active field is a single int.

Key Takeaways

  • A union stores all its fields in the same memory; only the last-written field is valid.
  • An enum defines named integer constants but does not enforce valid values at runtime.
  • Tagged unions (enum + union) are the C idiom for variant or sum types. Always check the tag before accessing data.
  • Bit fields pack small values tightly but are not portable across compilers or architectures.
  • Type punning through unions is widely supported but technically undefined behavior in strict C99. Use memcpy for portable code.
  • Tagged unions appear everywhere in real systems: protocol parsers, interpreters, serialization formats, and configuration structures.