【Terraform】Terraform ~ AWS EMR ~

■ はじめに

AWS EMR を Terraform で構築する

目次

【1】API
 1)Resource
 2)Module
【2】サンプル

【1】API

1)Resource

[1] EMR
https://registry.terraform.io/providers/figma/aws-4-49-0/latest/docs/resources/emr_cluster

[2] EMR Containers

cf. EMR コンテナ(emr-containers)  = Amazon EMR on EKS

https://registry.terraform.io/providers/figma/aws-4-49-0/latest/docs/resources/emrcontainers_virtual_cluster

[3] EMR Serverless
https://registry.terraform.io/providers/figma/aws-4-49-0/latest/docs/resources/emrserverless_application

2)Module

https://registry.terraform.io/modules/terraform-aws-modules/emr/aws/latest

【2】サンプル

例1:EMRのみ

* S3/VPCなどその他周辺コンポーネントが既にできている場合

locals.tf

locals {
  subnet_id = "subnet-xxxxxx"
  emr_security_group_id = "sg-xxxxxx"
  emr_instance_profile_arn = "xxxxx"
  emr_service_role_arn ="xxxxxxxxxxxxx"
  common_bucket_name = "demo-s3-bacuket"
}

emr.tf

resource "aws_emr_cluster" "demo_emr_cluster" {
  name          = "demo-emr-cluster"
  release_label = "emr-5.17.0"
  applications  = ["Spark"]

  termination_protection            = false
  keep_job_flow_alive_when_no_steps = true

  ec2_attributes {
    subnet_id = local.subnet_id
    emr_managed_master_security_group = local.emr_security_group_id
    emr_managed_slave_security_group  = local.emr_security_group_id
    instance_profile = local.emr_instance_profile_arn
  }

  master_instance_group {
    instance_type = "m5.xlarge"
  }

  core_instance_group {
    instance_type  = "m5.xlarge"
    instance_count = 1

    ebs_config {
      size                 = "40"
      type                 = "gp2"
      volumes_per_instance = 1
    }

    bid_price = "0.30"
  }

  ebs_root_volume_size = 100

  tags = {
    for-use-with-amazon-emr-managed-policies = true
    env  = "dev"
  }

  service_role = local.emr_service_role_arn
}

例2:メモ

locals.tf

locals {
  common_bucket_name = "demo-s3-bacuket"
}

aws.tf

provider "aws" {
  region              = "${var.region}"
  profile             = "${var.names["profile"]}"
  allowed_account_ids = ["${var.names["account_id"]}"]
}

vpc.tf

resource "aws_vpc" "demo_emr_vpc" {
  cidr_block           = "${var.vpc_cidr}"
  instance_tenancy     = "default"
  enable_dns_support   = true
  enable_dns_hostnames = true

  tags {
    Name = "demo-emr-vpc"
  }
}

## Internet GW
resource "aws_internet_gateway" "demo_emr_igw" {
  vpc_id = "${aws_vpc.demo_emr_vpc.id}"

  tags {
    Name = "demo-emr-igw"
  }
}

## Subnet
resource "aws_subnet" "public" {
  count = 2
  vpc_id = "${aws_vpc.demo_emr_vpc.id}"
  cidr_block = "${cidrsubnet(var.vpc_cidr, 8, count.index)}"
  availability_zone       = "${data.aws_availability_zones.available.names[count.index]}"
  map_public_ip_on_launch = true

  tags {
    Name = "${format("${var.names["prefix"]}-public-subnet%02d", count.index + 1)}"
  }
}

## Route Table
resource "aws_route_table" "public" {
  vpc_id = "${aws_vpc.demo_emr_vpc.id}"

  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = "${aws_internet_gateway.demo_emr_igw.id}"
  }

  tags {
    Name = "${var.names["prefix"]}-public"
  }
}

resource "aws_route_table_association" "public" {
  count          = 2
  subnet_id      = "${element(aws_subnet.public.*.id, count.index)}"
  route_table_id = "${aws_route_table.public.id}"
}

iam.tf

# [1] IAM for service role

# Step1: 信頼ポリシーを作成
data "aws_iam_policy_document" "demo_emr_assume_role" {
  statement {
    effect = "Allow"

    principals {
      type        = "Service"
      identifiers = ["elasticmapreduce.amazonaws.com"]
    }

    actions = ["sts:AssumeRole"]
  }
}

# Step2: IAM role
resource "aws_iam_role" "demo_emr_iam_role" {
  name = "demo-emr-iam-role"
  assume_role_policy = data.demo_emr_assume_role.assume_role.json
}

# Step3: IAM Policy for Systems Manager/EMR
data "aws_iam_policy" "ssm_service_policy" {
  name = "AmazonSSMManagedInstanceCore"
}

data "aws_iam_policy" "emr_service_policy" {
  name = "AmazonEMRServicePolicy_v2"
}

# Step4: Systems Manager/EMR用のIAM PolicyをRoleに紐づける 
resource "aws_iam_role_policy_attachment" "for_ec2_ssm" {
  role       = aws_iam_role.demo_emr_iam_role.name
  policy_arn = data.aws_iam_policy.ssm_service_policy.arn
}

resource "aws_iam_role_policy_attachment" "for_emr" {
  role       = aws_iam_role.demo_emr_iam_role.name
  policy_arn = data.aws_iam_policy.emr_service_policy.arn
}

resource "aws_iam_policy" "demo_emr_instance_policy" {
  name = "demo-emr-instance-policy"
  description = "This is a policy for Demo EMR instance"
  policy = jsonencode(
  {
    Version = "2012-10-17"
    sid = "CreateInNetworkInterface"
    Effect = "Allow"
    Statement = [
      {
        Action: [
          "ec2:CreateNetworkInterface",
          "ec2:RunInstances",
          "ec2:CreateFleet",
          "ec2:CreateLaunchTemplate",
          "ec2:CreateLaunchTemplateVersion"
         ],
        Resource: [
          "arn:aws::ec2:*:*:subnet/subnet-*"
        ]
      }
    ]
  },
  {
    Version = "2012-10-17"
    sid = "CreateDefaultSecurityGroupInVpc"
    Effect = "Allow"
    Statement = [
      {
        Action: [
          "ec2:CreateSecrityGroup"
         ],
        Resource: [
          "arn:aws::ec2:*:*:vpc/vpc-*"
        ]
      }
    ]
  },
  {
    Version = "2012-10-17"
    sid = "PassRoleForEC2"
    Effect = "Allow"
    Statement = [
      {
        Action: [
          "iam:PassRole"
         ],
        Resource: [
          "arn:aws::iam::1111111:role/service-role/*"
        ],
        Condition: {
          "StringLike": {
            "iam:PassedToService": "ec2.amazon.com"
          }
        }
      }
    ]
  })
}

# [2] IAM for EC2 instance profile
# Step1: 信頼ポリシーを作成
data "aws_iam_policy_document" "demo_ec2_assume_role" {
  statement {
    effect  = "Allow"
    actions = ["sts:AssumeRole"]

    principals = {
      type        = "Service"
      identifiers = ["ec2.amazonaws.com"]
    }
  }
}

resource "aws_iam_role" "demo_emr_instance_role" {
  name               = "demo-emr-instance-role"
  description = "This is for demo EMR instance role."
  assume_role_policy = "${data.aws_iam_policy_document.demo_ec2_assume_role.json}"
  managed_policy_arns = [
    aws_iam_policy.demo_emr_instance_policy.arn
  ]
}

resource "aws_iam_role_policy_attachment" "emr-ec2-role" {
  role       = "${aws_iam_role.emr-ec2-role.name}"
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role"
}

resource "aws_iam_policy" "demo_emr_instance_policy" {
  name = "demo-emr-instance-policy"
  description = "This is a policy for Demo EMR instance"

  policy = jsonencode({
    Version = "2012-10-17"
    sid = "AllowS3BucketReadonly"
    Statement = [
      {
        Action: [
          "s3:AbortMultipartUpload",
          "s3:CreateBucket",
          "s3:DeleteObject",
          "s3:GetBucketVersioning",
          "s3:GetObject",
          "s3:GetObjectTagging",
          "s3:GetObjectVersion",
          "s3:ListBucket",
          "s3:ListBucketMultipartUploads",
          "s3:ListBucketVersions",
          "s3:ListBucketMultipartUploadParts",
          "s3:ListMultipartUploadParts",
          "s3:PutBucketVersioning",
          "s3:PutObject",
          "s3:PutObjectTagging"
         ],
        Resource: [
          "arn:aws::s3::::*"
        ],
        Effect = "Allow"
      },
    ]
  })
}

emr.tf

## EMR Cluster
resource "aws_emr_cluster" "demo_emr_cluster" {
  name          = "demo-emr-cluster"
  release_label = "emr-6.12.0"
  applications  = ["Hadoop", "Hive"]
  log_uri       = "s3://${locals.common_bucket_name}/emr/logs/"
  service_role  = "${aws_iam_role.emr-service-role.arn}"
  master_instance_type = "m5.xlarge"
  core_instance_type   = "m5.xlarge"
  core_instance_count  = 2

  ec2_attributes {
    key_name = "${var.ssh_key_name}"
    subnet_id = "${aws_subnet.public.0.id}"
    instance_profile = "${aws_iam_instance_profile.emr-ec2-profile.name}"
  }

  tags = {
    for-use-with-amazon-emr-managed-policies = true
  }
}

参考文献

https://dev.classmethod.jp/articles/create-amazon-emr-cluster-with-terraform/

関連記事

Terraform ~ 環境構築編 ~
https://dk521123.hatenablog.com/entry/2023/04/05/000224
Terraform ~ 入門編 ~
https://dk521123.hatenablog.com/entry/2019/12/09/222057
Terraform ~ 基本編 ~
https://dk521123.hatenablog.com/entry/2023/05/03/000000
Terraform ~ AWS IAM ~
https://dk521123.hatenablog.com/entry/2023/04/12/214311
Terraform ~ AWS EC2 ~
https://dk521123.hatenablog.com/entry/2023/05/21/003048
Amazon EMR ~ Flink ~
https://dk521123.hatenablog.com/entry/2023/07/16/000000
Amazon EMR ~ boto3 編 ~
https://dk521123.hatenablog.com/entry/2020/06/24/173334
Amazon EMR ~IAM Role周り ~
https://dk521123.hatenablog.com/entry/2023/07/24/160124
Amazon EMR ~ ネットワーク周り ~
https://dk521123.hatenablog.com/entry/2022/05/13/155755
Amazon EMR ~ Auto scaling ~
https://dk521123.hatenablog.com/entry/2022/05/16/115229
Amazon EMR ~ チュートリアルのすすめ ~
https://dk521123.hatenablog.com/entry/2023/07/29/000000