Unboxing AWS DataSync

Unboxing AWS DataSync

Your toolbox should contain AWS DataSync, a service to synchronize data between all kinds of locations. Copy data between S3, EFS, and FSx. On top of that, DataSync works with on-premises locations like NFS, SMB, and more.

[wpcc-element _tag=”source” type=”image/webp” srcset=”/images/2020/12/move@730w.webp 730w, /images/2020/12/move@730w2x.webp 1460w, /images/2020/12/move@610w.webp 610w, /images/2020/12/move@610w2x.webp 1220w, /images/2020/12/move@450w.webp 450w, /images/2020/12/move@450w2x.webp 900w, /images/2020/12/move@330w.webp 330w, /images/2020/12/move@330w2x.webp 660w, /images/2020/12/move@545w.webp 545w, /images/2020/12/move@545w2x.webp 1090w” sizes=”(min-width: 1200px) 730px, (min-width: 992px) 610px, (min-width: 768px) 450px, (min-width: 576px) 330px, 545px” _close=”0″]

In this week’s video Andreas unboxes AWS DataSync for you, explains the core concepts, and demos how to copy data between S3 and EFS.

[wpcc-iframe class=”embed-responsive-item lozad” data-src=”https://www.youtube-nocookie.com/embed/-ru39l6qljk” allow=”accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture” allowfullscreen=””]

Here is the Terraform configuration code, that we used for our example to synchronize data between S3 and EFS. The Terraform configuration creates the following resources:

  • S3 bucket
  • EFS file system
  • DataSync configuration
  • EC2 instance allowing you to access the EFS file system
  • Security Groups for DataSync and EC2
  • IAM roles for Data Sync and EC2

Use the AWS Systems Manager Session Manager to connect with the EC2 instance to mount and inspect the EFS file system.

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.0"
}
}
}

provider "aws" {
region = "eu-west-1"
}

data "aws_vpc" "default" {
default = true
}

data "aws_subnet_ids" "public" {
vpc_id = data.aws_vpc.default.id
}

data "aws_subnet" "selected" {
vpc_id = data.aws_vpc.default.id
id = sort(data.aws_subnet_ids.public.ids)[0]
}

data "aws_ami" "amzn2" {
most_recent = true

filter {
name = "name"
values = ["amzn2-ami-hvm-2.0.*-x86_64-gp2"]
}

filter {
name = "virtualization-type"
values = ["hvm"]
}

owners = ["137112412989"]
}

resource "aws_datasync_location_s3" "demo" {
s3_bucket_arn = aws_s3_bucket.demo.arn
subdirectory = "/"

s3_config {
bucket_access_role_arn = aws_iam_role.datasync.arn
}
}

resource "aws_datasync_location_efs" "demo" {
efs_file_system_arn = aws_efs_file_system.demo.arn

ec2_config {
security_group_arns = [ aws_security_group.datasync.arn ]
subnet_arn = data.aws_subnet.selected.arn
}
}

resource "aws_datasync_task" "demo" {
name = "demo-s3-to-efs"
source_location_arn = aws_datasync_location_s3.demo.arn
destination_location_arn = aws_datasync_location_efs.demo.arn
}


resource "aws_s3_bucket" "demo" {
bucket_prefix = "demo-"
}

resource "aws_efs_file_system" "demo" {
tags = {
Name = "Demo"
}
}

resource "aws_iam_role" "datasync" {
name = "demo-datasync"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "datasync.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_role_policy" "datasync" {
role = aws_iam_role.datasync.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListBucketMultipartUploads",
"s3:HeadBucket"
]
Resource = aws_s3_bucket.demo.arn
},
{
Effect = "Allow"
Action = [
"s3:AbortMultipartUpload",
"s3:DeleteObject",
"s3:GetObject",
"s3:ListMultipartUploadParts",
"s3:GetObjectTagging",
"s3:PutObjectTagging",
"s3:PutObject"
]
Resource = "${aws_s3_bucket.demo.arn}/*"
}
]
})
}

resource "aws_efs_mount_target" "demo" {
for_each = data.aws_subnet_ids.public.ids
file_system_id = aws_efs_file_system.demo.id
subnet_id = each.value
security_groups = [ aws_security_group.efs.id ]
}

resource "aws_security_group" "efs" {
name_prefix = "demo-efs-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "efs_datasync" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.datasync.id
}

resource "aws_security_group_rule" "efs_ec2" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.ec2.id
}

resource "aws_security_group" "datasync" {
name_prefix = "demo-datasync-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "datasync" {
type = "egress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.datasync.id
source_security_group_id = aws_security_group.efs.id
}

resource "aws_instance" "demo" {
ami = data.aws_ami.amzn2.id
instance_type = "t3.micro"
vpc_security_group_ids = [ aws_security_group.ec2.id ]
subnet_id = data.aws_subnet.selected.id
iam_instance_profile = aws_iam_instance_profile.ec2.name

tags = {
Name = "demo-datasync"
}
}

resource "aws_security_group" "ec2" {
name_prefix = "demo-ec2-"
vpc_id = data.aws_vpc.default.id

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}

resource "aws_iam_role" "ec2" {
name = "demo-ec2"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_instance_profile" "ec2" {
name = "demo-ec2"
role = aws_iam_role.ec2.name
}

resource "aws_iam_role_policy" "ec2" {
role = aws_iam_role.ec2.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"ec2messages:*",
"ssmmessages:*",
"ssm:UpdateInstanceInformation",
"ssm:GetDeployablePatchSnapshotForInstance",
"ssm:ListAssociations"
]
Resource = "*"
}
]
})
}