You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

449 lines
21 KiB

  1. import configparser
  2. from os.path import expanduser
  3. # import libcloud.security
  4. import time
  5. from libcloud.compute.base import NodeImage
  6. from libcloud.compute.base import NodeState
  7. from libcloud.compute.providers import get_driver as compute_get_driver
  8. from libcloud.compute.types import Provider as compute_Provider
  9. from libcloud.loadbalancer.base import Member, Algorithm
  10. from libcloud.loadbalancer.types import Provider as loadbalancer_Provider
  11. from libcloud.loadbalancer.providers import get_driver as loadbalancer_get_driver
  12. home = expanduser("~")
  13. # requirements:
  14. # services: EC2, ELB
  15. # resources: 2 instances, (1 keypair, 2 security groups),
  16. # 1 (Classic) Elastic Load Balancer, expensive! delete it after you used it!
  17. # The image to look for and use for the started instance
  18. # aws ec2 describe-images --owner amazon | grep ubuntu | grep jammy | grep hvm | grep ssd |grep amd64 | grep -v minimal | grep -v pro | grep -v testing | grep -v k8s | grep "Name"
  19. ubuntu_image_name = 'ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-20240319'
  20. # ubuntu_image_name = 'ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20210128'
  21. # The public key to be used for SSH connection, please make sure, that you have the corresponding private key
  22. # use existing vockey in AWS Lab env from vocareum, enables login directly
  23. # from the lab's terminal:
  24. #
  25. # ssh -i ~/.ssh/labuser.pem ubuntu@<public-ip>
  26. keypair_name = "vockey"
  27. # keypair_name = 'srieger-pub'
  28. pub_key_file = home + '/.ssh/id_rsa.pub'
  29. # id_rsa.pub should look like this (standard sshd pubkey format):
  30. # ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAw+J...F3w2mleybgT1w== user@HOSTNAME
  31. # flavor_name = 't2.nano'
  32. # t2.nano only has 512 MB RAM, pip install will cause out of memory (OOM), install-aws.sh cloud-init script uses swap
  33. # to circumvent this issue, but t2.micro is also cheap and has 1 GB RAM which is sufficient for faafo example
  34. flavor_name = 't2.micro'
  35. # default region
  36. # region_name = 'eu-central-1'
  37. # region_name = 'ap-south-1'
  38. # AWS Academy Labs only allow us-east-1 and us-west-1 see our AWS Academy Lab Guide, https://awsacademy.instructure.com/login/
  39. region_name = 'us-east-1'
  40. # region_name = 'us-west-1'
  41. # starting instances in AWS Academy takes significantly longer compared to paid AWS accounts, allow ~ >2 minutes timeout
  42. timeout = 600
  43. def main():
  44. ###########################################################################
  45. #
  46. # get credentials
  47. #
  48. ###########################################################################
  49. # see AWS Academy Lab for Account Details
  50. # read credentials from file
  51. config = configparser.ConfigParser()
  52. config.read_file(open(home + '/.aws/credentials'))
  53. aws_access_key_id = config['default']['aws_access_key_id']
  54. aws_secret_access_key = config['default']['aws_secret_access_key']
  55. aws_session_token = config['default']['aws_session_token']
  56. # hard coded AWS credentials using vars
  57. # aws_access_key_id = "ASIAX..."
  58. # aws_secret_access_key = "WLxxXK+..."
  59. # aws_session_token = "FwoGZXIvYXdzEMb//////////wEaDE5rX.......0SleZ+L75I9iEri9LA4hovWul8HvexhCBK8.......................Ae/T+VkUbcQRtJEDwg+gYCABuk0JlSj5Wk7YA65r3BSNJXZFpkhbek6VBjvE/cEt5fKZEhENcdFxjAcAJLd6bOWi/oGXU5e3PX3mcXgm0oJpz6h3wqD1LvSDtw5GDwn0BHiF1Mu.......................cm/VukK5F"
  60. ###########################################################################
  61. #
  62. # create connection
  63. #
  64. ###########################################################################
  65. provider = compute_get_driver(compute_Provider.EC2)
  66. conn = provider(key=aws_access_key_id,
  67. secret=aws_secret_access_key,
  68. token=aws_session_token,
  69. region=region_name)
  70. ###########################################################################
  71. #
  72. # get image, flavor, network for instance creation
  73. #
  74. ###########################################################################
  75. # print("Search for AMI...")
  76. # image = conn.list_images(ex_filters={"name": ubuntu_image_name})[0]
  77. # print("Using image: %s" % image)
  78. # print("Fetching images (AMI) list from AWS region. This will take a lot of seconds (AWS has a very long list of "
  79. # "supported operating systems and versions)... please be patient...")
  80. # image = ''
  81. # for img in images:
  82. # # if img.name == ubuntu_image_name:
  83. # if img.extra['owner_alias'] == 'amazon':
  84. # print(img)
  85. # if img.id == ubuntu_image_name:
  86. # image = img
  87. # select image directly to save time, as retrieving the image list takes several minutes now,
  88. # need to change ami id here if updated or for other regions, id is working for course in
  89. # summer term 2022, in region: us-east-1 and pointing to ubuntu 18.04 used in the instance wizard,
  90. # to update AMI id use the create instance wizard and copy amd64 image id for ubuntu 18.04 in the
  91. # desired region
  92. #
  93. print("Selecting AMI...")
  94. # us-east-1 examples as of 9.5.2022:
  95. #
  96. # Canonical, Ubuntu, 18.04 LTS, amd64 bionic image build on 2022-04-11
  97. image_id = "ami-005de95e8ff495156"
  98. #
  99. # Canonical, Ubuntu, 20.04 LTS, amd64 focal image build on 2022-04-19
  100. # image_id = "ami-0c4f7023847b90238"
  101. #
  102. # Canonical, Ubuntu, 22.04 LTS, amd64 jammy image build on 2022-04-20
  103. # image_id = "ami-09d56f8956ab235b3"
  104. #
  105. image = conn.list_images(ex_image_ids=[image_id])[0]
  106. print("Using image: %s" % image)
  107. flavors = conn.list_sizes()
  108. flavor = [s for s in flavors if s.id == flavor_name][0]
  109. print(flavor)
  110. # networks = conn.ex_list_networks()
  111. # network = ''
  112. # for net in networks:
  113. # if net.name == project_network:
  114. # network = net
  115. ###########################################################################
  116. #
  117. # create keypair dependency
  118. #
  119. ###########################################################################
  120. print('Checking for existing SSH key pair...')
  121. keypair_exists = False
  122. for keypair in conn.list_key_pairs():
  123. if keypair.name == keypair_name:
  124. keypair_exists = True
  125. if keypair_exists:
  126. print('Keypair ' + keypair_name + ' already exists. Skipping import.')
  127. else:
  128. print('adding keypair...')
  129. conn.import_key_pair_from_file(keypair_name, pub_key_file)
  130. for keypair in conn.list_key_pairs():
  131. print(keypair)
  132. ###########################################################################
  133. #
  134. # clean up resources from previous demos
  135. #
  136. ###########################################################################
  137. # destroy running demo instances
  138. for instance in conn.list_nodes():
  139. if instance.name in ['all-in-one', 'app-worker-1', 'app-worker-2', 'app-worker-3', 'app-controller',
  140. 'app-services', 'app-api-1', 'app-api-2']:
  141. if instance.state is not NodeState.TERMINATED:
  142. print('Destroying Instance: %s' % instance.name)
  143. conn.destroy_node(instance)
  144. # wait until all nodes are destroyed to be able to remove dependent security groups
  145. nodes_still_running = True
  146. while nodes_still_running:
  147. nodes_still_running = False
  148. time.sleep(3)
  149. instances = conn.list_nodes()
  150. for instance in instances:
  151. # if we see any demo instances still running continue to wait for them to stop
  152. if instance.name in ['all-in-one', 'app-worker-1', 'app-worker-2', 'app-worker-3', 'app-controller',
  153. 'app-services', 'app-api-1', 'app-api-2']:
  154. if instance.state is not NodeState.TERMINATED:
  155. nodes_still_running = True
  156. if nodes_still_running is True:
  157. print('There are still instances running, waiting for them to be destroyed...')
  158. else:
  159. print('No instances running')
  160. # delete security groups, respecting dependencies (hence deleting 'control' and 'services' first)
  161. for group in conn.ex_list_security_groups():
  162. if group in ['control', 'services']:
  163. print('Deleting security group: %s' % group)
  164. conn.ex_delete_security_group(group)
  165. # now we can delete security groups 'api' and 'worker', as 'control' and 'api' depended on them, otherwise AWS will
  166. # throw DependencyViolation: resource has a dependent object
  167. for group in conn.ex_list_security_groups():
  168. if group in ['api', 'worker']:
  169. print('Deleting security group: %s' % group)
  170. conn.ex_delete_security_group(group)
  171. ###########################################################################
  172. #
  173. # create security group dependency
  174. #
  175. ###########################################################################
  176. def get_security_group(connection, security_group_name):
  177. """A helper function to check if security group already exists"""
  178. print('Checking for existing ' + security_group_name + ' security group...')
  179. for security_grp in connection.ex_list_security_groups():
  180. if security_grp == security_group_name:
  181. print('Security Group ' + security_group_name + ' already exists. Skipping creation.')
  182. return security_grp['group_id']
  183. return False
  184. if not get_security_group(conn, "api"):
  185. api_security_group_result = conn.ex_create_security_group('api', 'for API services only')
  186. api_security_group_id = api_security_group_result['group_id']
  187. conn.ex_authorize_security_group_ingress(api_security_group_id, 22, 22, cidr_ips=['0.0.0.0/0'],
  188. protocol='tcp')
  189. conn.ex_authorize_security_group_ingress(api_security_group_id, 80, 80, cidr_ips=['0.0.0.0/0'],
  190. protocol='tcp')
  191. else:
  192. api_security_group_id = get_security_group(conn, "api")
  193. if not get_security_group(conn, "worker"):
  194. worker_security_group_result = conn.ex_create_security_group('worker', 'for services that run on a worker node')
  195. worker_security_group_id = worker_security_group_result['group_id']
  196. conn.ex_authorize_security_group_ingress(worker_security_group_id, 22, 22, cidr_ips=['0.0.0.0/0'],
  197. protocol='tcp')
  198. else:
  199. worker_security_group_id = get_security_group(conn, "worker")
  200. if not get_security_group(conn, "control"):
  201. controller_security_group_result = conn.ex_create_security_group('control',
  202. 'for services that run on a control node')
  203. controller_security_group_id = controller_security_group_result['group_id']
  204. conn.ex_authorize_security_group_ingress(controller_security_group_id, 22, 22, cidr_ips=['0.0.0.0/0'],
  205. protocol='tcp')
  206. conn.ex_authorize_security_group_ingress(controller_security_group_id, 80, 80, cidr_ips=['0.0.0.0/0'],
  207. protocol='tcp')
  208. conn.ex_authorize_security_group_ingress(controller_security_group_id, 5672, 5672,
  209. group_pairs=[{'group_id': worker_security_group_id}], protocol='tcp')
  210. else:
  211. controller_security_group_id = get_security_group(conn, "control")
  212. if not get_security_group(conn, "services"):
  213. services_security_group_result = conn.ex_create_security_group('services', 'for DB and AMQP services only')
  214. services_security_group_id = services_security_group_result['group_id']
  215. conn.ex_authorize_security_group_ingress(services_security_group_id, 22, 22, cidr_ips=['0.0.0.0/0'],
  216. protocol='tcp')
  217. # conn.ex_authorize_security_group_ingress(services_security_group_id, 3306, 3306, cidr_ips=['0.0.0.0/0'],
  218. # group_pairs=[{'group_id': api_security_group_id}], protocol='tcp')
  219. conn.ex_authorize_security_group_ingress(services_security_group_id, 3306, 3306,
  220. group_pairs=[{'group_id': api_security_group_id}], protocol='tcp')
  221. conn.ex_authorize_security_group_ingress(services_security_group_id, 5672, 5672,
  222. group_pairs=[{'group_id': worker_security_group_id}], protocol='tcp')
  223. conn.ex_authorize_security_group_ingress(services_security_group_id, 5672, 5672,
  224. group_pairs=[{'group_id': api_security_group_id}], protocol='tcp')
  225. else:
  226. services_security_group_id = get_security_group(conn, "services")
  227. for security_group in conn.ex_list_security_groups():
  228. print(security_group)
  229. # get availability zones
  230. az = conn.list_locations()
  231. print(az)
  232. ###########################################################################
  233. #
  234. # create app-services instance (database & messaging) (Amazon AWS EC2)
  235. #
  236. ###########################################################################
  237. # https://git.openstack.org/cgit/openstack/faafo/plain/contrib/install-aws.sh
  238. # is currently broken, hence the "rabbitctl" lines were added in the example
  239. # below, see also https://bugs.launchpad.net/faafo/+bug/1679710
  240. #
  241. # Thanks to Stefan Friedmann for finding this fix ;)
  242. userdata_service = '''#!/usr/bin/env bash
  243. curl -L -s https://gogs.informatik.hs-fulda.de/srieger/cloud-computing-msc-ai-examples/raw/master/faafo/contrib/install-aws.sh | bash -s -- \
  244. -i database -i messaging
  245. rabbitmqctl add_user faafo guest
  246. rabbitmqctl set_user_tags faafo administrator
  247. rabbitmqctl set_permissions -p / faafo ".*" ".*" ".*"
  248. '''
  249. print('Starting new app-services instance and wait until it is running...')
  250. instance_services = conn.create_node(location=az[0],
  251. name='app-services',
  252. image=image,
  253. size=flavor,
  254. ex_keyname=keypair_name,
  255. ex_userdata=userdata_service,
  256. ex_security_groups=["services"])
  257. instance_services = conn.wait_until_running(nodes=[instance_services], timeout=timeout, ssh_interface='public_ips')
  258. services_ip = instance_services[0][0].private_ips[0]
  259. print(instance_services)
  260. ###########################################################################
  261. #
  262. # create app-api instances (Amazon AWS EC2)
  263. #
  264. ###########################################################################
  265. userdata_api = '''#!/usr/bin/env bash
  266. curl -L -s https://gogs.informatik.hs-fulda.de/srieger/cloud-computing-msc-ai-examples/raw/master/faafo/contrib/install-aws.sh | bash -s -- \
  267. -i faafo -r api -m 'amqp://faafo:guest@%(services_ip)s:5672/' \
  268. -d 'mysql+pymysql://faafo:password@%(services_ip)s:3306/faafo'
  269. ''' % {'services_ip': services_ip}
  270. print('Starting new app-api-1 instance and wait until it is running...')
  271. instance_api_1 = conn.create_node(location=az[0],
  272. name='app-api-1',
  273. image=image,
  274. size=flavor,
  275. ex_keyname=keypair_name,
  276. ex_userdata=userdata_api,
  277. ex_security_groups=["api"])
  278. print('Starting new app-api-2 instance and wait until it is running...')
  279. instance_api_2 = conn.create_node(location=az[1],
  280. name='app-api-2',
  281. image=image,
  282. size=flavor,
  283. ex_keyname=keypair_name,
  284. ex_userdata=userdata_api,
  285. ex_security_groups=["api"])
  286. instance_api_1 = conn.wait_until_running(nodes=[instance_api_1], timeout=timeout, ssh_interface='public_ips')
  287. api_1_ip = instance_api_1[0][0].private_ips[0]
  288. print("app-api-1 public ip: " + instance_api_1[0][1][0])
  289. instance_api_2 = conn.wait_until_running(nodes=[instance_api_2], timeout=timeout, ssh_interface='public_ips')
  290. # currently only api_1_ip is used
  291. api_2_ip = instance_api_2[0][0].private_ips[0]
  292. print("app-api-2 public ip: " + instance_api_2[0][1][0])
  293. ###########################################################################
  294. #
  295. # create worker instances (Amazon AWS EC2)
  296. #
  297. ###########################################################################
  298. userdata_worker = '''#!/usr/bin/env bash
  299. curl -L -s https://gogs.informatik.hs-fulda.de/srieger/cloud-computing-msc-ai-examples/raw/master/faafo/contrib/install-aws.sh | bash -s -- \
  300. -i faafo -r worker -e 'http://%(api_1_ip)s' -m 'amqp://faafo:guest@%(services_ip)s:5672/'
  301. ''' % {'api_1_ip': api_1_ip, 'services_ip': services_ip}
  302. # userdata_api-api-2 = '''#!/usr/bin/env bash
  303. # curl -L -s https://gogs.informatik.hs-fulda.de/srieger/cloud-computing-msc-ai-examples/raw/master/faafo/contrib/install-aws.sh | bash -s -- \
  304. # -i faafo -r worker -e 'http://%(api_2_ip)s' -m 'amqp://faafo:guest@%(services_ip)s:5672/'
  305. # ''' % {'api_2_ip': api_2_ip, 'services_ip': services_ip}
  306. print('Starting new app-worker-1 instance and wait until it is running...')
  307. instance_worker_1 = conn.create_node(location=az[0],
  308. name='app-worker-1',
  309. image=image, size=flavor,
  310. ex_keyname=keypair_name,
  311. ex_userdata=userdata_worker,
  312. ex_security_groups=["worker"])
  313. print('Starting new app-worker-2 instance and wait until it is running...')
  314. instance_worker_2 = conn.create_node(location=az[1],
  315. name='app-worker-2',
  316. image=image, size=flavor,
  317. ex_keyname=keypair_name,
  318. ex_userdata=userdata_worker,
  319. ex_security_groups=["worker"])
  320. # do not start worker 3 initially, can be started using scale-out-add-worker.py demo
  321. # print('Starting new app-worker-3 instance and wait until it is running...')
  322. # instance_worker_3 = conn.create_node(name='app-worker-3',
  323. # image=image, size=flavor,
  324. # networks=[network],
  325. # ex_keyname=keypair_name,
  326. # ex_userdata=userdata_worker,
  327. # ex_security_groups=[worker_security_group])
  328. print(instance_worker_1)
  329. print(instance_worker_2)
  330. # print(instance_worker_3)
  331. ###########################################################################
  332. #
  333. # create load balancer (Amazon AWS ELB)
  334. #
  335. ###########################################################################
  336. elb_provider = loadbalancer_get_driver(loadbalancer_Provider.ELB)
  337. elb_conn = elb_provider(aws_access_key_id,
  338. aws_secret_access_key,
  339. token=aws_session_token,
  340. region=region_name)
  341. print("Deleting previously created load balancers in: " + str(elb_conn.list_balancers()))
  342. for loadbalancer in elb_conn.list_balancers():
  343. if loadbalancer.name == "lb1":
  344. print("Deleting Load Balancer: " + str(loadbalancer))
  345. elb_conn.destroy_balancer(loadbalancer)
  346. # get suffix (a, b, c, ...) from all availability zones, available in the selected region
  347. all_availability_zones_in_region = []
  348. for az in conn.ex_list_availability_zones():
  349. all_availability_zones_in_region.append(az.name[-1])
  350. # create new load balancer
  351. # example uses "classic" ELB with default HTTP health. monitor, you can see the result in the EC2 console, after
  352. # running this script
  353. new_load_balancer = elb_conn.create_balancer(
  354. name='lb1',
  355. algorithm=Algorithm.ROUND_ROBIN,
  356. port=80,
  357. protocol='http',
  358. members=[],
  359. ex_members_availability_zones=all_availability_zones_in_region)
  360. # attach api instances as members to load balancer
  361. elb_conn.balancer_attach_compute_node(balancer=new_load_balancer, node=instance_api_1[0][0])
  362. elb_conn.balancer_attach_compute_node(balancer=new_load_balancer, node=instance_api_2[0][0])
  363. print("Created load balancer: " + str(new_load_balancer))
  364. # wait for the load balancer to be ready
  365. while new_load_balancer.state != 2:
  366. time.sleep(3)
  367. new_load_balancer = elb_conn.get_balancer(new_load_balancer.id)
  368. print("\n\nYou can see the instances created in EC2 in AWS Console. You'll also find the load balancer under ELB "
  369. "there.\n"
  370. " You can access the faafo application deployed to the loadbalancer at: http://" + new_load_balancer.ip +
  371. " as soon as instances are detected to be deployed and healthy by the load balancer.")
  372. if __name__ == '__main__':
  373. main()