■ はじめに
AWS EMR を Terraform で構築する
目次
【1】API 1)Resource 2)Module 【2】サンプル
【1】API
1)Resource
[1] EMR
https://registry.terraform.io/providers/figma/aws-4-49-0/latest/docs/resources/emr_cluster
[2] EMR Containers
cf. EMR コンテナ(emr-containers) = Amazon EMR on EKS
[3] EMR Serverless
https://registry.terraform.io/providers/figma/aws-4-49-0/latest/docs/resources/emrserverless_application
2)Module
https://registry.terraform.io/modules/terraform-aws-modules/emr/aws/latest
【2】サンプル
例1:EMRのみ
* S3/VPCなどその他周辺コンポーネントが既にできている場合
locals.tf
locals { subnet_id = "subnet-xxxxxx" emr_security_group_id = "sg-xxxxxx" emr_instance_profile_arn = "xxxxx" emr_service_role_arn ="xxxxxxxxxxxxx" common_bucket_name = "demo-s3-bacuket" }
emr.tf
resource "aws_emr_cluster" "demo_emr_cluster" { name = "demo-emr-cluster" release_label = "emr-5.17.0" applications = ["Spark"] termination_protection = false keep_job_flow_alive_when_no_steps = true ec2_attributes { subnet_id = local.subnet_id emr_managed_master_security_group = local.emr_security_group_id emr_managed_slave_security_group = local.emr_security_group_id instance_profile = local.emr_instance_profile_arn } master_instance_group { instance_type = "m5.xlarge" } core_instance_group { instance_type = "m5.xlarge" instance_count = 1 ebs_config { size = "40" type = "gp2" volumes_per_instance = 1 } bid_price = "0.30" } ebs_root_volume_size = 100 tags = { for-use-with-amazon-emr-managed-policies = true env = "dev" } service_role = local.emr_service_role_arn }
例2:メモ
locals.tf
locals { common_bucket_name = "demo-s3-bacuket" }
aws.tf
provider "aws" { region = "${var.region}" profile = "${var.names["profile"]}" allowed_account_ids = ["${var.names["account_id"]}"] }
vpc.tf
resource "aws_vpc" "demo_emr_vpc" { cidr_block = "${var.vpc_cidr}" instance_tenancy = "default" enable_dns_support = true enable_dns_hostnames = true tags { Name = "demo-emr-vpc" } } ## Internet GW resource "aws_internet_gateway" "demo_emr_igw" { vpc_id = "${aws_vpc.demo_emr_vpc.id}" tags { Name = "demo-emr-igw" } } ## Subnet resource "aws_subnet" "public" { count = 2 vpc_id = "${aws_vpc.demo_emr_vpc.id}" cidr_block = "${cidrsubnet(var.vpc_cidr, 8, count.index)}" availability_zone = "${data.aws_availability_zones.available.names[count.index]}" map_public_ip_on_launch = true tags { Name = "${format("${var.names["prefix"]}-public-subnet%02d", count.index + 1)}" } } ## Route Table resource "aws_route_table" "public" { vpc_id = "${aws_vpc.demo_emr_vpc.id}" route { cidr_block = "0.0.0.0/0" gateway_id = "${aws_internet_gateway.demo_emr_igw.id}" } tags { Name = "${var.names["prefix"]}-public" } } resource "aws_route_table_association" "public" { count = 2 subnet_id = "${element(aws_subnet.public.*.id, count.index)}" route_table_id = "${aws_route_table.public.id}" }
iam.tf
# [1] IAM for service role # Step1: 信頼ポリシーを作成 data "aws_iam_policy_document" "demo_emr_assume_role" { statement { effect = "Allow" principals { type = "Service" identifiers = ["elasticmapreduce.amazonaws.com"] } actions = ["sts:AssumeRole"] } } # Step2: IAM role resource "aws_iam_role" "demo_emr_iam_role" { name = "demo-emr-iam-role" assume_role_policy = data.demo_emr_assume_role.assume_role.json } # Step3: IAM Policy for Systems Manager/EMR data "aws_iam_policy" "ssm_service_policy" { name = "AmazonSSMManagedInstanceCore" } data "aws_iam_policy" "emr_service_policy" { name = "AmazonEMRServicePolicy_v2" } # Step4: Systems Manager/EMR用のIAM PolicyをRoleに紐づける resource "aws_iam_role_policy_attachment" "for_ec2_ssm" { role = aws_iam_role.demo_emr_iam_role.name policy_arn = data.aws_iam_policy.ssm_service_policy.arn } resource "aws_iam_role_policy_attachment" "for_emr" { role = aws_iam_role.demo_emr_iam_role.name policy_arn = data.aws_iam_policy.emr_service_policy.arn } resource "aws_iam_policy" "demo_emr_instance_policy" { name = "demo-emr-instance-policy" description = "This is a policy for Demo EMR instance" policy = jsonencode( { Version = "2012-10-17" sid = "CreateInNetworkInterface" Effect = "Allow" Statement = [ { Action: [ "ec2:CreateNetworkInterface", "ec2:RunInstances", "ec2:CreateFleet", "ec2:CreateLaunchTemplate", "ec2:CreateLaunchTemplateVersion" ], Resource: [ "arn:aws::ec2:*:*:subnet/subnet-*" ] } ] }, { Version = "2012-10-17" sid = "CreateDefaultSecurityGroupInVpc" Effect = "Allow" Statement = [ { Action: [ "ec2:CreateSecrityGroup" ], Resource: [ "arn:aws::ec2:*:*:vpc/vpc-*" ] } ] }, { Version = "2012-10-17" sid = "PassRoleForEC2" Effect = "Allow" Statement = [ { Action: [ "iam:PassRole" ], Resource: [ "arn:aws::iam::1111111:role/service-role/*" ], Condition: { "StringLike": { "iam:PassedToService": "ec2.amazon.com" } } } ] }) } # [2] IAM for EC2 instance profile # Step1: 信頼ポリシーを作成 data "aws_iam_policy_document" "demo_ec2_assume_role" { statement { effect = "Allow" actions = ["sts:AssumeRole"] principals = { type = "Service" identifiers = ["ec2.amazonaws.com"] } } } resource "aws_iam_role" "demo_emr_instance_role" { name = "demo-emr-instance-role" description = "This is for demo EMR instance role." assume_role_policy = "${data.aws_iam_policy_document.demo_ec2_assume_role.json}" managed_policy_arns = [ aws_iam_policy.demo_emr_instance_policy.arn ] } resource "aws_iam_role_policy_attachment" "emr-ec2-role" { role = "${aws_iam_role.emr-ec2-role.name}" policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role" } resource "aws_iam_policy" "demo_emr_instance_policy" { name = "demo-emr-instance-policy" description = "This is a policy for Demo EMR instance" policy = jsonencode({ Version = "2012-10-17" sid = "AllowS3BucketReadonly" Statement = [ { Action: [ "s3:AbortMultipartUpload", "s3:CreateBucket", "s3:DeleteObject", "s3:GetBucketVersioning", "s3:GetObject", "s3:GetObjectTagging", "s3:GetObjectVersion", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:ListBucketVersions", "s3:ListBucketMultipartUploadParts", "s3:ListMultipartUploadParts", "s3:PutBucketVersioning", "s3:PutObject", "s3:PutObjectTagging" ], Resource: [ "arn:aws::s3::::*" ], Effect = "Allow" }, ] }) }
emr.tf
## EMR Cluster resource "aws_emr_cluster" "demo_emr_cluster" { name = "demo-emr-cluster" release_label = "emr-6.12.0" applications = ["Hadoop", "Hive"] log_uri = "s3://${locals.common_bucket_name}/emr/logs/" service_role = "${aws_iam_role.emr-service-role.arn}" master_instance_type = "m5.xlarge" core_instance_type = "m5.xlarge" core_instance_count = 2 ec2_attributes { key_name = "${var.ssh_key_name}" subnet_id = "${aws_subnet.public.0.id}" instance_profile = "${aws_iam_instance_profile.emr-ec2-profile.name}" } tags = { for-use-with-amazon-emr-managed-policies = true } }
参考文献
https://dev.classmethod.jp/articles/create-amazon-emr-cluster-with-terraform/
関連記事
Terraform ~ 環境構築編 ~
https://dk521123.hatenablog.com/entry/2023/04/05/000224
Terraform ~ 入門編 ~
https://dk521123.hatenablog.com/entry/2019/12/09/222057
Terraform ~ 基本編 ~
https://dk521123.hatenablog.com/entry/2023/05/03/000000
Terraform ~ AWS IAM ~
https://dk521123.hatenablog.com/entry/2023/04/12/214311
Terraform ~ AWS EC2 ~
https://dk521123.hatenablog.com/entry/2023/05/21/003048
Amazon EMR ~ Flink ~
https://dk521123.hatenablog.com/entry/2023/07/16/000000
Amazon EMR ~ boto3 編 ~
https://dk521123.hatenablog.com/entry/2020/06/24/173334
Amazon EMR ~IAM Role周り ~
https://dk521123.hatenablog.com/entry/2023/07/24/160124
Amazon EMR ~ ネットワーク周り ~
https://dk521123.hatenablog.com/entry/2022/05/13/155755
Amazon EMR ~ Auto scaling ~
https://dk521123.hatenablog.com/entry/2022/05/16/115229
Amazon EMR ~ チュートリアルのすすめ ~
https://dk521123.hatenablog.com/entry/2023/07/29/000000