← Back to Guide
Platform Engineering L1 · FOUNDATIONAL ~60 min

Build a Reusable Terraform Module for AKS

Design and implement a composable Terraform module that provisions an AKS cluster with configurable node pools, outputs the cluster endpoint and kubeconfig path, and can be consumed by multiple environment root modules with a single source reference.

Objective

A Terraform module encapsulates a set of resources behind a clean interface (variables and outputs). This exercise builds the module file structure, implements the core resources, writes comprehensive variable definitions with validation blocks, and demonstrates consumption from both a staging and production root module.

Prerequisites

Steps

01

Create the module directory structure

# Module lives under modules/ — consumed by environment root modules
mkdir -p terraform/modules/aks
mkdir -p terraform/environments/staging
mkdir -p terraform/environments/production

tree terraform/
## terraform/
## ├── environments/
## │   ├── staging/
## │   │   └── main.tf
## │   └── production/
## │       └── main.tf
## └── modules/
##     └── aks/
##         ├── main.tf       ← resources
##         ├── variables.tf  ← input interface
##         ├── outputs.tf    ← output interface
##         └── versions.tf   ← provider requirements
02

Write versions.tf — provider requirements

terraform/modules/aks/versions.tf
terraform {
  required_version = ">= 1.6.0"
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~> 3.85"
    }
  }
}
03

Write variables.tf — typed inputs with validation

terraform/modules/aks/variables.tf
variable "cluster_name" {
  description = "Name of the AKS cluster"
  type        = string
  validation {
    condition     = can(regex("^[a-z0-9-]{3,40}$", var.cluster_name))
    error_message = "cluster_name must be lowercase alphanumeric and hyphens, 3-40 chars."
  }
}

variable "resource_group_name" {
  description = "Azure resource group to deploy into"
  type        = string
}

variable "location" {
  description = "Azure region (e.g. eastus2)"
  type        = string
  default     = "eastus2"
}

variable "kubernetes_version" {
  description = "AKS Kubernetes version"
  type        = string
  default     = "1.29"
}

variable "system_node_pool" {
  description = "System node pool configuration"
  type = object({
    vm_size    = string
    node_count = number
    zones      = list(string)
  })
  default = {
    vm_size    = "Standard_D4s_v5"
    node_count = 3
    zones      = ["1", "2", "3"]
  }
  validation {
    condition     = var.system_node_pool.node_count >= 1
    error_message = "System node pool must have at least 1 node."
  }
}

variable "additional_node_pools" {
  description = "Map of additional node pools to create"
  type = map(object({
    vm_size    = string
    node_count = number
    zones      = list(string)
    node_labels = optional(map(string), {})
    node_taints = optional(list(string), [])
  }))
  default = {}
}

variable "tags" {
  description = "Tags applied to all resources"
  type        = map(string)
  default     = {}
}

variable "enable_azure_policy" {
  description = "Enable Azure Policy add-on for governance"
  type        = bool
  default     = false
}

variable "log_analytics_workspace_id" {
  description = "Log Analytics workspace ID for Container Insights (optional)"
  type        = string
  default     = null
}
04

Write main.tf — the AKS resources

terraform/modules/aks/main.tf
resource "azurerm_kubernetes_cluster" "this" {
  name                = var.cluster_name
  resource_group_name = var.resource_group_name
  location            = var.location
  kubernetes_version  = var.kubernetes_version
  dns_prefix          = var.cluster_name
  sku_tier            = "Standard"   # Standard for production SLA

  default_node_pool {
    name                = "system"
    vm_size             = var.system_node_pool.vm_size
    node_count          = var.system_node_pool.node_count
    zones               = var.system_node_pool.zones
    only_critical_addons_enabled = true   # taint: CriticalAddonsOnly
    os_disk_type        = "Ephemeral"
    os_disk_size_gb     = 100
    upgrade_settings {
      max_surge = "33%"
    }
  }

  identity {
    type = "SystemAssigned"
  }

  network_profile {
    network_plugin     = "azure"
    network_policy     = "azure"
    load_balancer_sku  = "standard"
  }

  azure_active_directory_role_based_access_control {
    managed            = true
    azure_rbac_enabled = true
  }

  dynamic "azure_policy" {
    for_each = var.enable_azure_policy ? [1] : []
    content {}
  }

  dynamic "oms_agent" {
    for_each = var.log_analytics_workspace_id != null ? [1] : []
    content {
      log_analytics_workspace_id = var.log_analytics_workspace_id
    }
  }

  tags = var.tags
}

# Additional node pools (e.g. "user", "gpu", "spot")
resource "azurerm_kubernetes_cluster_node_pool" "additional" {
  for_each = var.additional_node_pools

  name                  = each.key
  kubernetes_cluster_id = azurerm_kubernetes_cluster.this.id
  vm_size               = each.value.vm_size
  node_count            = each.value.node_count
  zones                 = each.value.zones
  node_labels           = each.value.node_labels
  node_taints           = each.value.node_taints
  os_disk_type          = "Ephemeral"
  upgrade_settings {
    max_surge = "33%"
  }
  tags = var.tags
}
05

Write outputs.tf — the public interface

terraform/modules/aks/outputs.tf
output "cluster_id" {
  description = "AKS cluster resource ID"
  value       = azurerm_kubernetes_cluster.this.id
}

output "cluster_name" {
  description = "AKS cluster name"
  value       = azurerm_kubernetes_cluster.this.name
}

output "kube_config_raw" {
  description = "Raw kubeconfig (use az aks get-credentials for production)"
  value       = azurerm_kubernetes_cluster.this.kube_config_raw
  sensitive   = true
}

output "host" {
  description = "Kubernetes API server endpoint"
  value       = azurerm_kubernetes_cluster.this.kube_config[0].host
  sensitive   = true
}

output "oidc_issuer_url" {
  description = "OIDC issuer URL for Workload Identity / IRSA equivalent"
  value       = azurerm_kubernetes_cluster.this.oidc_issuer_url
}

output "node_resource_group" {
  description = "Resource group where AKS creates managed node resources"
  value       = azurerm_kubernetes_cluster.this.node_resource_group
}

output "kubelet_identity" {
  description = "Kubelet managed identity (for ACR pull, storage access)"
  value = {
    object_id = azurerm_kubernetes_cluster.this.kubelet_identity[0].object_id
    client_id = azurerm_kubernetes_cluster.this.kubelet_identity[0].client_id
  }
}
06

Consume the module from a staging environment

terraform/environments/staging/main.tf
terraform {
  required_version = ">= 1.6.0"
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~> 3.85"
    }
  }
  backend "azurerm" {
    resource_group_name  = "tf-state-rg"
    storage_account_name = "tfstatestorage"
    container_name       = "tfstate"
    key                  = "staging/aks.tfstate"
  }
}

provider "azurerm" {
  features {}
}

module "aks" {
  source = "../../modules/aks"   # local path — use git URL for shared modules

  cluster_name        = "platform-staging"
  resource_group_name = "platform-staging-rg"
  location            = "eastus2"
  kubernetes_version  = "1.29"

  system_node_pool = {
    vm_size    = "Standard_D2s_v5"   # smaller for staging
    node_count = 2
    zones      = ["1", "2"]
  }

  additional_node_pools = {
    user = {
      vm_size     = "Standard_D4s_v5"
      node_count  = 2
      zones       = ["1", "2"]
      node_labels = { role = "user-workloads" }
      node_taints = []
    }
  }

  tags = {
    environment = "staging"
    managed_by  = "terraform"
    team        = "platform"
  }
}

output "cluster_name" {
  value = module.aks.cluster_name
}

output "oidc_issuer_url" {
  value = module.aks.oidc_issuer_url
}
07

Initialize, plan, and apply

# Initialize the staging environment
cd terraform/environments/staging
terraform init

## Initializing modules...
## - aks in ../../modules/aks
## Initializing provider plugins...
## - Finding hashicorp/azurerm versions matching "~> 3.85"...

# Preview the plan
terraform plan -out=staging.tfplan

## Plan: 3 to add, 0 to change, 0 to destroy.
##
## + module.aks.azurerm_kubernetes_cluster.this
## + module.aks.azurerm_kubernetes_cluster_node_pool.additional["user"]

# Apply (requires az login with Contributor)
terraform apply staging.tfplan

# Retrieve outputs
terraform output cluster_name
## "platform-staging"

terraform output oidc_issuer_url
## "https://eastus2.oic.prod-aks.azure.com/<tenant-id>/<uuid>/"

# Configure kubectl
az aks get-credentials \
  --resource-group platform-staging-rg \
  --name platform-staging \
  --overwrite-existing

kubectl get nodes
## NAME                                STATUS   ROLES   AGE   VERSION
## aks-system-12345678-vmss000000      Ready    agent   3m    v1.29.x
## aks-system-12345678-vmss000001      Ready    agent   3m    v1.29.x
## aks-user-87654321-vmss000000        Ready    agent   2m    v1.29.x
## aks-user-87654321-vmss000001        Ready    agent   2m    v1.29.x

# Validate production environment uses the same module with different inputs
# (production/main.tf would use node_count=3, Standard tier, more zones)
cd ../production
terraform init && terraform plan
For shared platform modules consumed across multiple teams, publish the module to a Terraform registry (Terraform Cloud private registry or a git tag with the pattern vMAJOR.MINOR.PATCH). Reference it as source = "git::https://github.com/org/tf-modules.git//aks?ref=v1.2.0".

Success Criteria

Further Reading