1#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
28# pylint: disable=too-many-lines
29import copy
30import functools
31import getpass
32import logging
33import os
34import re
35
36from acloud import errors
37from acloud.internal import constants
38from acloud.internal.lib import base_cloud_client
39from acloud.internal.lib import utils
40from acloud.internal.lib.ssh import IP
41
42
43logger = logging.getLogger(__name__)
44
45_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
46_METADATA_KEY = "key"
47_METADATA_KEY_VALUE = "value"
48_SSH_KEYS_NAME = "sshKeys"
49_ITEMS = "items"
50_METADATA = "metadata"
51_ZONE_RE = re.compile(r"^zones/(?P<zone>.+)")
52# Quota metrics
53_METRIC_CPUS = "CPUS"
54_METRIC_DISKS_GB = "DISKS_TOTAL_GB"
55_METRIC_USE_ADDRESSES = "IN_USE_ADDRESSES"
56_METRICS = [_METRIC_CPUS, _METRIC_DISKS_GB, _METRIC_USE_ADDRESSES]
57_USAGE = "usage"
58_LIMIT = "limit"
59# The minimum requirement to create an instance.
60_REQUIRE_METRICS = {_METRIC_CPUS: 8,
61                    _METRIC_DISKS_GB: 1000,
62                    _METRIC_USE_ADDRESSES: 1}
63
64BASE_DISK_ARGS = {
65    "type": "PERSISTENT",
66    "boot": True,
67    "mode": "READ_WRITE",
68    "autoDelete": True,
69    "initializeParams": {},
70}
71
72
73class OperationScope():
74    """Represents operation scope enum."""
75    ZONE = "zone"
76    REGION = "region"
77    GLOBAL = "global"
78
79
80class PersistentDiskType():
81    """Represents different persistent disk types.
82
83    pd-standard for regular hard disk.
84    pd-ssd for solid state disk.
85    """
86    STANDARD = "pd-standard"
87    SSD = "pd-ssd"
88
89
90class ImageStatus():
91    """Represents the status of an image."""
92    PENDING = "PENDING"
93    READY = "READY"
94    FAILED = "FAILED"
95
96
97def _IsFingerPrintError(exc):
98    """Determine if the exception is a HTTP error with code 412.
99
100    Args:
101        exc: Exception instance.
102
103    Returns:
104        Boolean. True if the exception is a "Precondition Failed" error.
105    """
106    return isinstance(exc, errors.HttpError) and exc.code == 412
107
108
109# pylint: disable=too-many-public-methods
110class ComputeClient(base_cloud_client.BaseCloudApiClient):
111    """Client that manages GCE."""
112
113    # API settings, used by BaseCloudApiClient.
114    API_NAME = "compute"
115    API_VERSION = "v1"
116    SCOPE = " ".join([
117        "https://www.googleapis.com/auth/compute",
118        "https://www.googleapis.com/auth/devstorage.read_write"
119    ])
120    # Default settings for gce operations
121    DEFAULT_INSTANCE_SCOPE = [
122        "https://www.googleapis.com/auth/androidbuild.internal",
123        "https://www.googleapis.com/auth/devstorage.read_only",
124        "https://www.googleapis.com/auth/logging.write"
125    ]
126    OPERATION_TIMEOUT_SECS = 30 * 60  # 30 mins
127    OPERATION_POLL_INTERVAL_SECS = 20
128    MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
129    ACCESS_DENIED_CODE = 403
130
131    def __init__(self, acloud_config, oauth2_credentials):
132        """Initialize.
133
134        Args:
135            acloud_config: An AcloudConfig object.
136            oauth2_credentials: An oauth2client.OAuth2Credentials instance.
137        """
138        super().__init__(oauth2_credentials)
139        self._project = acloud_config.project
140
141    def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
142        """Get status of an operation.
143
144        Args:
145            operation: An Operation resource in the format of json.
146            operation_scope: A value from OperationScope, "zone", "region",
147                             or "global".
148            scope_name: If operation_scope is "zone" or "region", this should be
149                        the name of the zone or region, e.g. "us-central1-f".
150
151        Returns:
152            Status of the operation, one of "DONE", "PENDING", "RUNNING".
153
154        Raises:
155            errors.DriverError: if the operation fails.
156        """
157        operation_name = operation["name"]
158        if operation_scope == OperationScope.GLOBAL:
159            api = self.service.globalOperations().get(
160                project=self._project, operation=operation_name)
161            result = self.Execute(api)
162        elif operation_scope == OperationScope.ZONE:
163            api = self.service.zoneOperations().get(
164                project=self._project,
165                operation=operation_name,
166                zone=scope_name)
167            result = self.Execute(api)
168        elif operation_scope == OperationScope.REGION:
169            api = self.service.regionOperations().get(
170                project=self._project,
171                operation=operation_name,
172                region=scope_name)
173            result = self.Execute(api)
174
175        if result.get("error"):
176            errors_list = result["error"]["errors"]
177            raise errors.DriverError(
178                "Get operation state failed, errors: %s" % str(errors_list))
179        return result["status"]
180
181    def WaitOnOperation(self, operation, operation_scope, scope_name=None):
182        """Wait for an operation to finish.
183
184        Args:
185            operation: An Operation resource in the format of json.
186            operation_scope: A value from OperationScope, "zone", "region",
187                             or "global".
188            scope_name: If operation_scope is "zone" or "region", this should be
189                        the name of the zone or region, e.g. "us-central1-f".
190        """
191        timeout_exception = errors.GceOperationTimeoutError(
192            "Operation hits timeout, did not complete within %d secs." %
193            self.OPERATION_TIMEOUT_SECS)
194        utils.PollAndWait(
195            func=self._GetOperationStatus,
196            expected_return="DONE",
197            timeout_exception=timeout_exception,
198            timeout_secs=self.OPERATION_TIMEOUT_SECS,
199            sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
200            operation=operation,
201            operation_scope=operation_scope,
202            scope_name=scope_name)
203
204    def GetProject(self):
205        """Get project information.
206
207        Returns:
208            A project resource in json.
209        """
210        api = self.service.projects().get(project=self._project)
211        return self.Execute(api)
212
213    def GetRegionInfo(self):
214        """Get region information that includes all quotas limit.
215
216        The region info example:
217        {"items":
218            [{"status": "UP",
219              "name": "asia-east1",
220              "quotas":
221                [{"usage": 92, "metric": "CPUS", "limit": 100},
222                 {"usage": 640, "metric": "DISKS_TOTAL_GB", "limit": 10240},
223              ...]]}
224        }
225
226        Returns:
227            A region resource in json.
228        """
229        api = self.service.regions().list(project=self._project)
230        return self.Execute(api)
231
232    @staticmethod
233    def GetMetricQuota(regions_info, zone, metric):
234        """Get CPU quota limit in specific zone and project.
235
236        Args:
237            regions_info: Dict, regions resource in json.
238            zone: String, name of zone.
239            metric: String, name of metric, e.g. "CPUS".
240
241        Returns:
242            A dict of quota information. Such as
243            {"usage": 100, "metric": "CPUS", "limit": 200}
244        """
245        for region_info in regions_info["items"]:
246            if region_info["name"] in zone:
247                for quota in region_info["quotas"]:
248                    if quota["metric"] == metric:
249                        return quota
250        logger.info("Can't get %s quota info from zone(%s)", metric, zone)
251        return None
252
253    def EnoughMetricsInZone(self, zone):
254        """Check the zone have enough metrics to create instance.
255
256        The metrics include CPUS and DISKS.
257
258        Args:
259            zone: String, name of zone.
260
261        Returns:
262            Boolean. True if zone have enough quota.
263        """
264        regions_info = self.GetRegionInfo()
265        for metric in _METRICS:
266            quota = self.GetMetricQuota(regions_info, zone, metric)
267            if not quota:
268                logger.debug(
269                    "Can't query the metric(%s) in zone(%s)", metric, zone)
270                return False
271            if quota[_LIMIT] - quota[_USAGE] < _REQUIRE_METRICS[metric]:
272                logger.debug(
273                    "The metric(%s) is over limit in zone(%s)", metric, zone)
274                return False
275        return True
276
277    def GetDisk(self, disk_name, zone):
278        """Get disk information.
279
280        Args:
281          disk_name: A string.
282          zone: String, name of zone.
283
284        Returns:
285          An disk resource in json.
286          https://cloud.google.com/compute/docs/reference/latest/disks#resource
287        """
288        api = self.service.disks().get(
289            project=self._project, zone=zone, disk=disk_name)
290        return self.Execute(api)
291
292    def CheckDiskExists(self, disk_name, zone):
293        """Check if disk exists.
294
295        Args:
296          disk_name: A string
297          zone: String, name of zone.
298
299        Returns:
300          True if disk exists, otherwise False.
301        """
302        try:
303            self.GetDisk(disk_name, zone)
304            exists = True
305        except errors.ResourceNotFoundError:
306            exists = False
307        logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
308                     exists)
309        return exists
310
311    def CreateDisk(self,
312                   disk_name,
313                   source_image,
314                   size_gb,
315                   zone,
316                   source_project=None,
317                   disk_type=PersistentDiskType.STANDARD):
318        """Create a gce disk.
319
320        Args:
321            disk_name: String
322            source_image: String, name of the image.
323            size_gb: Integer, size in gb.
324            zone: String, name of the zone, e.g. us-central1-b.
325            source_project: String, required if the image is located in a different
326                            project.
327            disk_type: String, a value from PersistentDiskType, STANDARD
328                       for regular hard disk or SSD for solid state disk.
329        """
330        source_project = source_project or self._project
331        source_image = "projects/%s/global/images/%s" % (
332            source_project, source_image) if source_image else None
333        logger.info("Creating disk %s, size_gb: %d, source_image: %s",
334                    disk_name, size_gb, str(source_image))
335        body = {
336            "name": disk_name,
337            "sizeGb": size_gb,
338            "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
339                                                           disk_type),
340        }
341        api = self.service.disks().insert(
342            project=self._project,
343            sourceImage=source_image,
344            zone=zone,
345            body=body)
346        operation = self.Execute(api)
347        try:
348            self.WaitOnOperation(
349                operation=operation,
350                operation_scope=OperationScope.ZONE,
351                scope_name=zone)
352        except errors.DriverError:
353            logger.error("Creating disk failed, cleaning up: %s", disk_name)
354            if self.CheckDiskExists(disk_name, zone):
355                self.DeleteDisk(disk_name, zone)
356            raise
357        logger.info("Disk %s has been created.", disk_name)
358
359    def DeleteDisk(self, disk_name, zone):
360        """Delete a gce disk.
361
362        Args:
363            disk_name: A string, name of disk.
364            zone: A string, name of zone.
365        """
366        logger.info("Deleting disk %s", disk_name)
367        api = self.service.disks().delete(
368            project=self._project, zone=zone, disk=disk_name)
369        operation = self.Execute(api)
370        self.WaitOnOperation(
371            operation=operation,
372            operation_scope=OperationScope.ZONE,
373            scope_name=zone)
374        logger.info("Deleted disk %s", disk_name)
375
376    def DeleteDisks(self, disk_names, zone):
377        """Delete multiple disks.
378
379        Args:
380            disk_names: A list of disk names.
381            zone: A string, name of zone.
382
383        Returns:
384            A tuple, (deleted, failed, error_msgs)
385            deleted: A list of names of disks that have been deleted.
386            failed: A list of names of disks that we fail to delete.
387            error_msgs: A list of failure messages.
388        """
389        if not disk_names:
390            logger.warning("Nothing to delete. Arg disk_names is not provided.")
391            return [], [], []
392        # Batch send deletion requests.
393        logger.info("Deleting disks: %s", disk_names)
394        delete_requests = {}
395        for disk_name in set(disk_names):
396            request = self.service.disks().delete(
397                project=self._project, disk=disk_name, zone=zone)
398            delete_requests[disk_name] = request
399        return self._BatchExecuteAndWait(
400            delete_requests, OperationScope.ZONE, scope_name=zone)
401
402    def ListDisks(self, zone, disk_filter=None):
403        """List disks.
404
405        Args:
406            zone: A string, representing zone name. e.g. "us-central1-f"
407            disk_filter: A string representing a filter in format of
408                             FIELD_NAME COMPARISON_STRING LITERAL_STRING
409                             e.g. "name ne example-instance"
410                             e.g. "name eq "example-instance-[0-9]+""
411
412        Returns:
413            A list of disks.
414        """
415        return self.ListWithMultiPages(
416            api_resource=self.service.disks().list,
417            project=self._project,
418            zone=zone,
419            filter=disk_filter)
420
421    def CreateImage(self,
422                    image_name,
423                    source_uri=None,
424                    source_disk=None,
425                    labels=None):
426        """Create a Gce image.
427
428        Args:
429            image_name: String, name of image
430            source_uri: Full Google Cloud Storage URL where the disk image is
431                        stored.  e.g. "https://storage.googleapis.com/my-bucket/
432                        avd-system-2243663.tar.gz"
433            source_disk: String, this should be the disk's selfLink value
434                         (including zone and project), rather than the disk_name
435                         e.g. https://www.googleapis.com/compute/v1/projects/
436                              google.com:android-builds-project/zones/
437                              us-east1-d/disks/<disk_name>
438            labels: Dict, will be added to the image's labels.
439
440        Raises:
441            errors.DriverError: For malformed request or response.
442            errors.GceOperationTimeoutError: Operation takes too long to finish.
443        """
444        if self.CheckImageExists(image_name):
445            return
446        if (source_uri and source_disk) or (not source_uri
447                                            and not source_disk):
448            raise errors.DriverError(
449                "Creating image %s requires either source_uri %s or "
450                "source_disk %s but not both" % (image_name, source_uri,
451                                                 source_disk))
452        if source_uri:
453            logger.info("Creating image %s, source_uri %s", image_name,
454                        source_uri)
455            body = {
456                "name": image_name,
457                "rawDisk": {
458                    "source": source_uri,
459                },
460            }
461        else:
462            logger.info("Creating image %s, source_disk %s", image_name,
463                        source_disk)
464            body = {
465                "name": image_name,
466                "sourceDisk": source_disk,
467            }
468        if labels is not None:
469            body["labels"] = labels
470        api = self.service.images().insert(project=self._project, body=body)
471        operation = self.Execute(api)
472        try:
473            self.WaitOnOperation(
474                operation=operation, operation_scope=OperationScope.GLOBAL)
475        except errors.DriverError:
476            logger.error("Creating image failed, cleaning up: %s", image_name)
477            if self.CheckImageExists(image_name):
478                self.DeleteImage(image_name)
479            raise
480        logger.info("Image %s has been created.", image_name)
481
482    @utils.RetryOnException(_IsFingerPrintError,
483                            _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
484    def SetImageLabels(self, image_name, new_labels):
485        """Update image's labels. Retry for finger print conflict.
486
487        Note: Decorator RetryOnException will retry the call for FingerPrint
488          conflict (HTTP error code 412). The fingerprint is used to detect
489          conflicts of GCE resource updates. The fingerprint is initially generated
490          by Compute Engine and changes after every request to modify or update
491          resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
492          updates).
493
494        Args:
495            image_name: A string, the image name.
496            new_labels: Dict, will be added to the image's labels.
497
498        Returns:
499            A GlobalOperation resouce.
500            https://cloud.google.com/compute/docs/reference/latest/globalOperations
501        """
502        image = self.GetImage(image_name)
503        labels = image.get("labels", {})
504        labels.update(new_labels)
505        body = {
506            "labels": labels,
507            "labelFingerprint": image["labelFingerprint"]
508        }
509        api = self.service.images().setLabels(
510            project=self._project, resource=image_name, body=body)
511        return self.Execute(api)
512
513    def CheckImageExists(self, image_name):
514        """Check if image exists.
515
516        Args:
517            image_name: A string
518
519        Returns:
520            True if image exists, otherwise False.
521        """
522        try:
523            self.GetImage(image_name)
524            exists = True
525        except errors.ResourceNotFoundError:
526            exists = False
527        logger.debug("CheckImageExists: image_name: %s, result: %s",
528                     image_name, exists)
529        return exists
530
531    def GetImage(self, image_name, image_project=None):
532        """Get image information.
533
534        Args:
535            image_name: A string
536            image_project: A string
537
538        Returns:
539            An image resource in json.
540            https://cloud.google.com/compute/docs/reference/latest/images#resource
541        """
542        api = self.service.images().get(
543            project=image_project or self._project, image=image_name)
544        return self.Execute(api)
545
546    def GetImageFromFamily(self, image_family, image_project=None):
547        """Get image information from image_family.
548
549        Args:
550            image_family: String of image family.
551            image_project: String of image project.
552
553        Returns:
554            An image resource in json.
555            https://cloud.google.com/compute/docs/reference/latest/images#resource
556        """
557        api = self.service.images().getFromFamily(
558            project=image_project or self._project, family=image_family)
559        return self.Execute(api)
560
561    def DeleteImage(self, image_name):
562        """Delete an image.
563
564        Args:
565            image_name: A string
566        """
567        logger.info("Deleting image %s", image_name)
568        api = self.service.images().delete(
569            project=self._project, image=image_name)
570        operation = self.Execute(api)
571        self.WaitOnOperation(
572            operation=operation, operation_scope=OperationScope.GLOBAL)
573        logger.info("Deleted image %s", image_name)
574
575    def DeleteImages(self, image_names):
576        """Delete multiple images.
577
578        Args:
579            image_names: A list of image names.
580
581        Returns:
582            A tuple, (deleted, failed, error_msgs)
583            deleted: A list of names of images that have been deleted.
584            failed: A list of names of images that we fail to delete.
585            error_msgs: A list of failure messages.
586        """
587        if not image_names:
588            return [], [], []
589        # Batch send deletion requests.
590        logger.info("Deleting images: %s", image_names)
591        delete_requests = {}
592        for image_name in set(image_names):
593            request = self.service.images().delete(
594                project=self._project, image=image_name)
595            delete_requests[image_name] = request
596        return self._BatchExecuteAndWait(delete_requests,
597                                         OperationScope.GLOBAL)
598
599    def ListImages(self, image_filter=None, image_project=None):
600        """List images.
601
602        Args:
603            image_filter: A string representing a filter in format of
604                          FIELD_NAME COMPARISON_STRING LITERAL_STRING
605                          e.g. "name ne example-image"
606                          e.g. "name eq "example-image-[0-9]+""
607            image_project: String. If not provided, will list images from the default
608                           project. Otherwise, will list images from the given
609                           project, which can be any arbitrary project where the
610                           account has read access
611                           (i.e. has the role "roles/compute.imageUser")
612
613        Read more about image sharing across project:
614        https://cloud.google.com/compute/docs/images/sharing-images-across-projects
615
616        Returns:
617            A list of images.
618        """
619        return self.ListWithMultiPages(
620            api_resource=self.service.images().list,
621            project=image_project or self._project,
622            filter=image_filter)
623
624    def GetInstance(self, instance, zone):
625        """Get information about an instance.
626
627        Args:
628            instance: A string, representing instance name.
629            zone: A string, representing zone name. e.g. "us-central1-f"
630
631        Returns:
632            An instance resource in json.
633            https://cloud.google.com/compute/docs/reference/latest/instances#resource
634        """
635        api = self.service.instances().get(
636            project=self._project, zone=zone, instance=instance)
637        return self.Execute(api)
638
639    def AttachAccelerator(self, instance, zone, accelerator_count,
640                          accelerator_type):
641        """Attach a GPU accelerator to the instance.
642
643        Note: In order for this to succeed the following must hold:
644        - The machine schedule must be set to "terminate" i.e:
645          SetScheduling(self, instance, zone, on_host_maintenance="terminate")
646          must have been called.
647        - The machine is not starting or running. i.e.
648          StopInstance(self, instance) must have been called.
649
650        Args:
651            instance: A string, representing instance name.
652            zone: String, name of zone.
653            accelerator_count: The number accelerators to be attached to the instance.
654             a value of 0 will detach all accelerators.
655            accelerator_type: The type of accelerator to attach. e.g.
656              "nvidia-tesla-k80"
657        """
658        body = {
659            "guestAccelerators": [{
660                "acceleratorType":
661                self.GetAcceleratorUrl(accelerator_type, zone),
662                "acceleratorCount":
663                accelerator_count
664            }]
665        }
666        api = self.service.instances().setMachineResources(
667            project=self._project, zone=zone, instance=instance, body=body)
668        operation = self.Execute(api)
669        try:
670            self.WaitOnOperation(
671                operation=operation,
672                operation_scope=OperationScope.ZONE,
673                scope_name=zone)
674        except errors.GceOperationTimeoutError:
675            logger.error("Attach instance failed: %s", instance)
676            raise
677        logger.info("%d x %s have been attached to instance %s.",
678                    accelerator_count, accelerator_type, instance)
679
680    def AttachDisk(self, instance, zone, **kwargs):
681        """Attach the external disk to the instance.
682
683        Args:
684            instance: A string, representing instance name.
685            zone: String, name of zone.
686            **kwargs: The attachDisk request body. See "https://cloud.google.com/
687              compute/docs/reference/latest/instances/attachDisk" for detail.
688              {
689                "kind": "compute#attachedDisk",
690                "type": string,
691                "mode": string,
692                "source": string,
693                "deviceName": string,
694                "index": integer,
695                "boot": boolean,
696                "initializeParams": {
697                  "diskName": string,
698                  "sourceImage": string,
699                  "diskSizeGb": long,
700                  "diskType": string,
701                  "sourceImageEncryptionKey": {
702                    "rawKey": string,
703                    "sha256": string
704                  }
705                },
706                "autoDelete": boolean,
707                "licenses": [
708                  string
709                ],
710                "interface": string,
711                "diskEncryptionKey": {
712                  "rawKey": string,
713                  "sha256": string
714                }
715              }
716
717        Returns:
718            An disk resource in json.
719            https://cloud.google.com/compute/docs/reference/latest/disks#resource
720
721
722        Raises:
723            errors.GceOperationTimeoutError: Operation takes too long to finish.
724        """
725        api = self.service.instances().attachDisk(
726            project=self._project, zone=zone, instance=instance, body=kwargs)
727        operation = self.Execute(api)
728        try:
729            self.WaitOnOperation(
730                operation=operation,
731                operation_scope=OperationScope.ZONE,
732                scope_name=zone)
733        except errors.GceOperationTimeoutError:
734            logger.error("Attach instance failed: %s", instance)
735            raise
736        logger.info("Disk has been attached to instance %s.", instance)
737
738    def DetachDisk(self, instance, zone, disk_name):
739        """Attach the external disk to the instance.
740
741        Args:
742            instance: A string, representing instance name.
743            zone: String, name of zone.
744            disk_name: A string, the name of the detach disk.
745
746        Returns:
747            A ZoneOperation resource.
748            See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
749
750        Raises:
751            errors.GceOperationTimeoutError: Operation takes too long to finish.
752        """
753        api = self.service.instances().detachDisk(
754            project=self._project,
755            zone=zone,
756            instance=instance,
757            deviceName=disk_name)
758        operation = self.Execute(api)
759        try:
760            self.WaitOnOperation(
761                operation=operation,
762                operation_scope=OperationScope.ZONE,
763                scope_name=zone)
764        except errors.GceOperationTimeoutError:
765            logger.error("Detach instance failed: %s", instance)
766            raise
767        logger.info("Disk has been detached to instance %s.", instance)
768
769    def StartInstance(self, instance, zone):
770        """Start |instance| in |zone|.
771
772        Args:
773            instance: A string, representing instance name.
774            zone: A string, representing zone name. e.g. "us-central1-f"
775
776        Raises:
777            errors.GceOperationTimeoutError: Operation takes too long to finish.
778        """
779        api = self.service.instances().start(
780            project=self._project, zone=zone, instance=instance)
781        operation = self.Execute(api)
782        try:
783            self.WaitOnOperation(
784                operation=operation,
785                operation_scope=OperationScope.ZONE,
786                scope_name=zone)
787        except errors.GceOperationTimeoutError:
788            logger.error("Start instance failed: %s", instance)
789            raise
790        logger.info("Instance %s has been started.", instance)
791
792    def StartInstances(self, instances, zone):
793        """Start |instances| in |zone|.
794
795        Args:
796            instances: A list of strings, representing instance names's list.
797            zone: A string, representing zone name. e.g. "us-central1-f"
798
799        Returns:
800            A tuple, (done, failed, error_msgs)
801            done: A list of string, representing the names of instances that
802              have been executed.
803            failed: A list of string, representing the names of instances that
804              we failed to execute.
805            error_msgs: A list of string, representing the failure messages.
806        """
807        action = functools.partial(
808            self.service.instances().start, project=self._project, zone=zone)
809        return self._BatchExecuteOnInstances(instances, zone, action)
810
811    def StopInstance(self, instance, zone):
812        """Stop |instance| in |zone|.
813
814        Args:
815            instance: A string, representing instance name.
816            zone: A string, representing zone name. e.g. "us-central1-f"
817
818        Raises:
819            errors.GceOperationTimeoutError: Operation takes too long to finish.
820        """
821        api = self.service.instances().stop(
822            project=self._project, zone=zone, instance=instance)
823        operation = self.Execute(api)
824        try:
825            self.WaitOnOperation(
826                operation=operation,
827                operation_scope=OperationScope.ZONE,
828                scope_name=zone)
829        except errors.GceOperationTimeoutError:
830            logger.error("Stop instance failed: %s", instance)
831            raise
832        logger.info("Instance %s has been terminated.", instance)
833
834    def StopInstances(self, instances, zone):
835        """Stop |instances| in |zone|.
836
837        Args:
838            instances: A list of strings, representing instance names's list.
839            zone: A string, representing zone name. e.g. "us-central1-f"
840
841        Returns:
842            A tuple, (done, failed, error_msgs)
843            done: A list of string, representing the names of instances that
844                  have been executed.
845            failed: A list of string, representing the names of instances that
846                    we failed to execute.
847            error_msgs: A list of string, representing the failure messages.
848        """
849        action = functools.partial(
850            self.service.instances().stop, project=self._project, zone=zone)
851        return self._BatchExecuteOnInstances(instances, zone, action)
852
853    def SetScheduling(self,
854                      instance,
855                      zone,
856                      automatic_restart=True,
857                      on_host_maintenance="MIGRATE"):
858        """Update scheduling config |automatic_restart| and |on_host_maintenance|.
859
860        Args:
861            instance: A string, representing instance name.
862            zone: A string, representing zone name. e.g. "us-central1-f".
863            automatic_restart: Boolean, determine whether the instance will
864                               automatically restart if it crashes or not,
865                               default to True.
866            on_host_maintenance: enum["MIGRATE", "TERMINATE"]
867                                 The instance's maintenance behavior, which
868                                 determines whether the instance is live
869                                 "MIGRATE" or "TERMINATE" when there is
870                                 a maintenance event.
871
872        Raises:
873            errors.GceOperationTimeoutError: Operation takes too long to finish.
874        """
875        body = {
876            "automaticRestart": automatic_restart,
877            "onHostMaintenance": on_host_maintenance
878        }
879        api = self.service.instances().setScheduling(
880            project=self._project, zone=zone, instance=instance, body=body)
881        operation = self.Execute(api)
882        try:
883            self.WaitOnOperation(
884                operation=operation,
885                operation_scope=OperationScope.ZONE,
886                scope_name=zone)
887        except errors.GceOperationTimeoutError:
888            logger.error("Set instance scheduling failed: %s", instance)
889            raise
890        logger.info(
891            "Instance scheduling changed:\n"
892            "    automaticRestart: %s\n"
893            "    onHostMaintenance: %s\n",
894            str(automatic_restart).lower(), on_host_maintenance)
895
896    def ListInstances(self, instance_filter=None):
897        """List instances cross all zones.
898
899        Gcompute response instance. For example:
900        {
901            'items':
902            {
903                'zones/europe-west3-b':
904                {
905                    'warning':
906                    {
907                        'message': "There are no results for scope
908                        'zones/europe-west3-b' on this page.",
909                        'code': 'NO_RESULTS_ON_PAGE',
910                        'data': [{'value': u'zones/europe-west3-b',
911                                  'key': u'scope'}]
912                    }
913                },
914                'zones/asia-east1-b':
915                {
916                    'instances': [
917                    {
918                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone'
919                        'status': 'RUNNING',
920                        'cpuPlatform': 'Intel Broadwell',
921                        'startRestricted': False,
922                        'labels': {u'created_by': u'herbertxue'},
923                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone',
924                        ...
925                    }]
926                }
927            }
928        }
929
930        Args:
931            instance_filter: A string representing a filter in format of
932                             FIELD_NAME COMPARISON_STRING LITERAL_STRING
933                             e.g. "name ne example-instance"
934                             e.g. "name eq "example-instance-[0-9]+""
935
936        Returns:
937            A list of instances.
938        """
939        # aggregatedList will only return 500 results max, so if there are more,
940        # we need to send in the next page token to get the next 500 (and so on
941        # and so forth.
942        get_more_instances = True
943        page_token = None
944        instances_list = []
945        while get_more_instances:
946            api = self.service.instances().aggregatedList(
947                project=self._project,
948                filter=instance_filter,
949                pageToken=page_token)
950            response = self.Execute(api)
951            page_token = response.get("nextPageToken")
952            get_more_instances = page_token is not None
953            for instances_data in response["items"].values():
954                if "instances" in instances_data:
955                    for instance in instances_data.get("instances"):
956                        instances_list.append(instance)
957
958        return instances_list
959
960    def SetSchedulingInstances(self,
961                               instances,
962                               zone,
963                               automatic_restart=True,
964                               on_host_maintenance="MIGRATE"):
965        """Update scheduling config |automatic_restart| and |on_host_maintenance|.
966
967        See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
968
969        Args:
970            instances: A list of string, representing instance names.
971            zone: A string, representing zone name. e.g. "us-central1-f".
972            automatic_restart: Boolean, determine whether the instance will
973                               automatically restart if it crashes or not,
974                               default to True.
975            on_host_maintenance: enum["MIGRATE", "TERMINATE"]
976                                 The instance's maintenance behavior, which
977                                 determines whether the instance is live
978                                 migrated or terminated when there is
979                                 a maintenance event.
980
981        Returns:
982            A tuple, (done, failed, error_msgs)
983            done: A list of string, representing the names of instances that
984                  have been executed.
985            failed: A list of string, representing the names of instances that
986                    we failed to execute.
987            error_msgs: A list of string, representing the failure messages.
988        """
989        body = {
990            "automaticRestart": automatic_restart,
991            "OnHostMaintenance": on_host_maintenance
992        }
993        action = functools.partial(
994            self.service.instances().setScheduling,
995            project=self._project,
996            zone=zone,
997            body=body)
998        return self._BatchExecuteOnInstances(instances, zone, action)
999
1000    def _BatchExecuteOnInstances(self, instances, zone, action):
1001        """Batch processing operations requiring computing time.
1002
1003        Args:
1004            instances: A list of instance names.
1005            zone: A string, e.g. "us-central1-f".
1006            action: partial func, all kwargs for this gcloud action has been
1007                    defined in the caller function (e.g. See "StartInstances")
1008                    except 'instance' which will be defined by iterating the
1009                    |instances|.
1010
1011        Returns:
1012            A tuple, (done, failed, error_msgs)
1013            done: A list of string, representing the names of instances that
1014                  have been executed.
1015            failed: A list of string, representing the names of instances that
1016                    we failed to execute.
1017            error_msgs: A list of string, representing the failure messages.
1018        """
1019        if not instances:
1020            return [], [], []
1021        # Batch send requests.
1022        logger.info("Batch executing instances: %s", instances)
1023        requests = {}
1024        for instance_name in set(instances):
1025            requests[instance_name] = action(instance=instance_name)
1026        return self._BatchExecuteAndWait(
1027            requests, operation_scope=OperationScope.ZONE, scope_name=zone)
1028
1029    def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
1030        """Batch processing requests and wait on the operation.
1031
1032        Args:
1033            requests: A dictionary. The key is a string representing the resource
1034                      name. For example, an instance name, or an image name.
1035            operation_scope: A value from OperationScope, "zone", "region",
1036                             or "global".
1037            scope_name: If operation_scope is "zone" or "region", this should be
1038                        the name of the zone or region, e.g. "us-central1-f".
1039        Returns:
1040            A tuple, (done, failed, error_msgs)
1041            done: A list of string, representing the resource names that have
1042                  been executed.
1043            failed: A list of string, representing resource names that
1044                    we failed to execute.
1045            error_msgs: A list of string, representing the failure messages.
1046        """
1047        results = self.BatchExecute(requests)
1048        # Initialize return values
1049        failed = []
1050        error_msgs = []
1051        for resource_name, (_, error) in results.items():
1052            if error is not None:
1053                failed.append(resource_name)
1054                error_msgs.append(str(error))
1055        done = []
1056        # Wait for the executing operations to finish.
1057        logger.info("Waiting for executing operations")
1058        for resource_name in requests.keys():
1059            operation, _ = results[resource_name]
1060            if operation:
1061                try:
1062                    self.WaitOnOperation(operation, operation_scope,
1063                                         scope_name)
1064                    done.append(resource_name)
1065                except errors.DriverError as exc:
1066                    failed.append(resource_name)
1067                    error_msgs.append(str(exc))
1068        return done, failed, error_msgs
1069
1070    def ListZones(self):
1071        """List all zone instances in the project.
1072
1073        Returns:
1074            Gcompute response instance. For example:
1075            {
1076              "id": "projects/google.com%3Aandroid-build-staging/zones",
1077              "kind": "compute#zoneList",
1078              "selfLink": "https://www.googleapis.com/compute/v1/projects/"
1079                  "google.com:android-build-staging/zones"
1080              "items": [
1081                {
1082                  'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
1083                  'description': 'asia-east1-c',
1084                  'id': '2222',
1085                  'kind': 'compute#zone',
1086                  'name': 'asia-east1-c',
1087                  'region': 'https://www.googleapis.com/compute/v1/projects/'
1088                      'google.com:android-build-staging/regions/asia-east1',
1089                  'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
1090                      'google.com:android-build-staging/zones/asia-east1-c',
1091                  'status': 'UP'
1092                }, {
1093                  'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
1094                  'description': 'asia-east1-b',
1095                  'id': '2221',
1096                  'kind': 'compute#zone',
1097                  'name': 'asia-east1-b',
1098                  'region': 'https://www.googleapis.com/compute/v1/projects/'
1099                      'google.com:android-build-staging/regions/asia-east1',
1100                  'selfLink': 'https://www.googleapis.com/compute/v1/projects'
1101                      '/google.com:android-build-staging/zones/asia-east1-b',
1102                  'status': 'UP'
1103                }]
1104            }
1105            See cloud cluster's api/mixer_zones.proto
1106        """
1107        api = self.service.zones().list(project=self._project)
1108        return self.Execute(api)
1109
1110    def ListRegions(self):
1111        """List all the regions for a project.
1112
1113        Returns:
1114            A dictionary containing all the zones and additional data. See this link
1115            for the detailed response:
1116            https://cloud.google.com/compute/docs/reference/latest/regions/list.
1117            Example:
1118            {
1119              'items': [{
1120                  'name':
1121                      'us-central1',
1122                  'quotas': [{
1123                      'usage': 2.0,
1124                      'limit': 24.0,
1125                      'metric': 'CPUS'
1126                  }, {
1127                      'usage': 1.0,
1128                      'limit': 23.0,
1129                      'metric': 'IN_USE_ADDRESSES'
1130                  }, {
1131                      'usage': 209.0,
1132                      'limit': 10240.0,
1133                      'metric': 'DISKS_TOTAL_GB'
1134                  }, {
1135                      'usage': 1000.0,
1136                      'limit': 20000.0,
1137                      'metric': 'INSTANCES'
1138                  }]
1139              },..]
1140            }
1141        """
1142        api = self.service.regions().list(project=self._project)
1143        return self.Execute(api)
1144
1145    def _GetNetworkArgs(self, network, zone, disable_external_ip):
1146        """Helper to generate network args that is used to create an instance.
1147
1148        See: https://cloud.google.com/compute/docs/reference/rest/v1/instances
1149        for more information on the specific fields.
1150
1151        Args:
1152            network: A string, e.g. "default".
1153            zone: String, representing zone name, e.g. "us-central1-f"
1154            disable_external_ip: Boolean, true if the external ip should be
1155                                 disabled.
1156
1157        Returns:
1158            A dictionary representing network args.
1159        """
1160        network_args = {
1161            "network": self.GetNetworkUrl(network),
1162            "accessConfigs": [] if disable_external_ip else [{
1163                "name": "External NAT",
1164                "type": "ONE_TO_ONE_NAT"
1165            }]
1166        }
1167        # default network can be blank or set to default, we don't need to
1168        # specify the subnetwork for that.
1169        if network and network != "default":
1170            network_args["subnetwork"] = self.GetSubnetworkUrl(network, zone)
1171        return network_args
1172
1173    def _GetDiskArgs(self,
1174                     disk_name,
1175                     image_name,
1176                     image_project=None,
1177                     disk_size_gb=None):
1178        """Helper to generate disk args that is used to create an instance.
1179
1180        Args:
1181            disk_name: A string
1182            image_name: A string
1183            image_project: A string
1184            disk_size_gb: An integer
1185
1186        Returns:
1187            List holding dict of disk args.
1188        """
1189        args = copy.deepcopy(BASE_DISK_ARGS)
1190        args["initializeParams"] = {
1191            "diskName": disk_name,
1192            "sourceImage": self.GetImage(image_name,
1193                                         image_project)["selfLink"],
1194        }
1195        # TODO: Remove this check once it's validated that we can either pass in
1196        # a None diskSizeGb or we find an appropriate default val.
1197        if disk_size_gb:
1198            args["diskSizeGb"] = disk_size_gb
1199        return [args]
1200
1201    def _GetExtraDiskArgs(self, extra_disk_name, zone):
1202        """Get extra disk arg for given disk.
1203
1204        Args:
1205            extra_disk_name: String, name of the disk.
1206            zone: String, representing zone name, e.g. "us-central1-f"
1207
1208        Returns:
1209            A dictionary of disk args.
1210        """
1211        return [{
1212            "type": "PERSISTENT",
1213            "mode": "READ_WRITE",
1214            "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1215                                                         extra_disk_name),
1216            "autoDelete": True,
1217            "boot": False,
1218            "interface": "SCSI",
1219            "deviceName": extra_disk_name,
1220        }]
1221
1222    # pylint: disable=too-many-locals
1223    def CreateInstance(self,
1224                       instance,
1225                       image_name,
1226                       machine_type,
1227                       metadata,
1228                       network,
1229                       zone,
1230                       disk_args=None,
1231                       image_project=None,
1232                       gpu=None,
1233                       disk_type=None,
1234                       extra_disk_name=None,
1235                       extra_scopes=None,
1236                       tags=None,
1237                       disable_external_ip=False):
1238        """Create a gce instance with a gce image.
1239
1240        Args:
1241            instance: String, instance name.
1242            image_name: String, source image used to create this disk.
1243            machine_type: String, representing machine_type,
1244                          e.g. "n1-standard-1"
1245            metadata: Dict, maps a metadata name to its value.
1246            network: String, representing network name, e.g. "default"
1247            zone: String, representing zone name, e.g. "us-central1-f"
1248            disk_args: A list of extra disk args (strings), see _GetDiskArgs
1249                       for example, if None, will create a disk using the given
1250                       image.
1251            image_project: String, name of the project where the image
1252                           belongs. Assume the default project if None.
1253            gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
1254                 None no gpus will be attached. For more details see:
1255                 https://cloud.google.com/compute/docs/gpus/add-gpus
1256            disk_type: String, type of GCE instance disk type.
1257            extra_disk_name: String,the name of the extra disk to attach.
1258            extra_scopes: A list of extra scopes to be provided to the instance.
1259            tags: A list of tags to associate with the instance. e.g.
1260                  ["http-server", "https-server"]
1261            disable_external_ip: Boolean, true if instance external ip should be
1262                                 disabled.
1263        """
1264        disk_args = (disk_args
1265                     or self._GetDiskArgs(instance, image_name, image_project))
1266        if extra_disk_name:
1267            disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
1268
1269        if disk_type:
1270            for disk_arg in disk_args:
1271                if "initializeParams" not in disk_arg:
1272                    disk_arg["initializeParams"] = {}
1273                disk_arg["initializeParams"][
1274                    "diskType"] = "projects/%s/zones/%s/diskTypes/%s" % (
1275                        self._project, zone, disk_type)
1276
1277        scopes = []
1278        scopes.extend(self.DEFAULT_INSTANCE_SCOPE)
1279        if extra_scopes:
1280            scopes.extend(extra_scopes)
1281
1282        # Add labels for giving the instances ability to be filter for
1283        # acloud list/delete cmds.
1284        body = {
1285            "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1286            "name": instance,
1287            "networkInterfaces": [
1288                self._GetNetworkArgs(network, zone, disable_external_ip)
1289            ],
1290            "disks": disk_args,
1291            "labels": {constants.LABEL_CREATE_BY: getpass.getuser()},
1292            "serviceAccounts": [{
1293                "email": "default",
1294                "scopes": scopes,
1295            }],
1296            "enableVtpm": True,
1297        }
1298
1299        if tags:
1300            body["tags"] = {"items": tags}
1301        if gpu:
1302            body["guestAccelerators"] = [{
1303                "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1304                "acceleratorCount": 1
1305            }]
1306            # Instances with GPUs cannot live migrate because they are assigned
1307            # to specific hardware devices.
1308            body["scheduling"] = {"onHostMaintenance": "terminate"}
1309        if metadata:
1310            metadata_list = [{
1311                _METADATA_KEY: key,
1312                _METADATA_KEY_VALUE: val
1313            } for key, val in metadata.items()]
1314            body[_METADATA] = {_ITEMS: metadata_list}
1315        logger.info("Creating instance: project %s, zone %s, body:%s",
1316                    self._project, zone, body)
1317        api = self.service.instances().insert(
1318            project=self._project, zone=zone, body=body)
1319        operation = self.Execute(api)
1320        self.WaitOnOperation(
1321            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1322        logger.info("Instance %s has been created.", instance)
1323
1324    def DeleteInstance(self, instance, zone):
1325        """Delete a gce instance.
1326
1327        Args:
1328            instance: A string, instance name.
1329            zone: A string, e.g. "us-central1-f"
1330        """
1331        logger.info("Deleting instance: %s", instance)
1332        api = self.service.instances().delete(
1333            project=self._project, zone=zone, instance=instance)
1334        operation = self.Execute(api)
1335        self.WaitOnOperation(
1336            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1337        logger.info("Deleted instance: %s", instance)
1338
1339    def DeleteInstances(self, instances, zone):
1340        """Delete multiple instances.
1341
1342        Args:
1343            instances: A list of instance names.
1344            zone: A string, e.g. "us-central1-f".
1345
1346        Returns:
1347            A tuple, (deleted, failed, error_msgs)
1348            deleted: A list of names of instances that have been deleted.
1349            failed: A list of names of instances that we fail to delete.
1350            error_msgs: A list of failure messages.
1351        """
1352        action = functools.partial(
1353            self.service.instances().delete, project=self._project, zone=zone)
1354        return self._BatchExecuteOnInstances(instances, zone, action)
1355
1356    def ResetInstance(self, instance, zone):
1357        """Reset the gce instance.
1358
1359        Args:
1360            instance: A string, instance name.
1361            zone: A string, e.g. "us-central1-f".
1362        """
1363        logger.info("Resetting instance: %s", instance)
1364        api = self.service.instances().reset(
1365            project=self._project, zone=zone, instance=instance)
1366        operation = self.Execute(api)
1367        self.WaitOnOperation(
1368            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1369        logger.info("Instance has been reset: %s", instance)
1370
1371    def GetMachineType(self, machine_type, zone):
1372        """Get URL for a given machine typle.
1373
1374        Args:
1375            machine_type: A string, name of the machine type.
1376            zone: A string, e.g. "us-central1-f"
1377
1378        Returns:
1379            A machine type resource in json.
1380            https://cloud.google.com/compute/docs/reference/latest/
1381            machineTypes#resource
1382        """
1383        api = self.service.machineTypes().get(
1384            project=self._project, zone=zone, machineType=machine_type)
1385        return self.Execute(api)
1386
1387    def GetAcceleratorUrl(self, accelerator_type, zone):
1388        """Get URL for a given type of accelator.
1389
1390        Args:
1391            accelerator_type: A string, representing the accelerator, e.g
1392              "nvidia-tesla-k80"
1393            zone: A string representing a zone, e.g. "us-west1-b"
1394
1395        Returns:
1396            A URL that points to the accelerator resource, e.g.
1397            https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1398            us-west1-b/acceleratorTypes/nvidia-tesla-k80
1399        """
1400        api = self.service.acceleratorTypes().get(
1401            project=self._project, zone=zone, acceleratorType=accelerator_type)
1402        result = self.Execute(api)
1403        return result["selfLink"]
1404
1405    def GetNetworkUrl(self, network):
1406        """Get URL for a given network.
1407
1408        Args:
1409            network: A string, representing network name, e.g "default"
1410
1411        Returns:
1412            A URL that points to the network resource, e.g.
1413            https://www.googleapis.com/compute/v1/projects/<project id>/
1414            global/networks/default
1415        """
1416        api = self.service.networks().get(
1417            project=self._project, network=network)
1418        result = self.Execute(api)
1419        return result["selfLink"]
1420
1421    def GetSubnetworkUrl(self, network, zone):
1422        """Get URL for a given network and zone.
1423
1424        Return the subnetwork for the network in the specified region that the
1425        specified zone resides in. If there is no subnetwork for the specified
1426        zone, raise an exception.
1427
1428        Args:
1429            network: A string, representing network name, e.g "default"
1430            zone: String, representing zone name, e.g. "us-central1-f"
1431
1432        Returns:
1433            A URL that points to the network resource, e.g.
1434            https://www.googleapis.com/compute/v1/projects/<project id>/
1435            global/networks/default
1436
1437        Raises:
1438            errors.NoSubnetwork: When no subnetwork exists for the zone
1439            specified.
1440        """
1441        api = self.service.networks().get(
1442            project=self._project, network=network)
1443        result = self.Execute(api)
1444        region = zone.rsplit("-", 1)[0]
1445        for subnetwork in result.get("subnetworks", []):
1446            if region in subnetwork:
1447                return subnetwork
1448        raise errors.NoSubnetwork("No subnetwork for network %s in region %s" %
1449                                  (network, region))
1450
1451    def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1452        """Compare the size of two machine types.
1453
1454        Args:
1455            machine_type_1: A string representing a machine type, e.g. n1-standard-1
1456            machine_type_2: A string representing a machine type, e.g. n1-standard-1
1457            zone: A string representing a zone, e.g. "us-central1-f"
1458
1459        Returns:
1460            -1 if any metric of machine size of the first type is smaller than
1461                the second type.
1462            0 if all metrics of machine size are equal.
1463            1 if at least one metric of machine size of the first type is
1464                greater than the second type and all metrics of first type are
1465                greater or equal to the second type.
1466
1467        Raises:
1468            errors.DriverError: For malformed response.
1469        """
1470        machine_info_1 = self.GetMachineType(machine_type_1, zone)
1471        machine_info_2 = self.GetMachineType(machine_type_2, zone)
1472        result = 0
1473        for metric in self.MACHINE_SIZE_METRICS:
1474            if metric not in machine_info_1 or metric not in machine_info_2:
1475                raise errors.DriverError(
1476                    "Malformed machine size record: Can't find '%s' in %s or %s"
1477                    % (metric, machine_info_1, machine_info_2))
1478            cmp_result = machine_info_1[metric] - machine_info_2[metric]
1479            if cmp_result < 0:
1480                return -1
1481            if cmp_result > 0:
1482                result = 1
1483        return result
1484
1485    def GetSerialPortOutput(self, instance, zone, port=1):
1486        """Get serial port output.
1487
1488        Args:
1489            instance: string, instance name.
1490            zone: string, zone name.
1491            port: int, which COM port to read from, 1-4, default to 1.
1492
1493        Returns:
1494            String, contents of the output.
1495
1496        Raises:
1497            errors.DriverError: For malformed response.
1498        """
1499        api = self.service.instances().getSerialPortOutput(
1500            project=self._project, zone=zone, instance=instance, port=port)
1501        result = self.Execute(api)
1502        if "contents" not in result:
1503            raise errors.DriverError(
1504                "Malformed response for GetSerialPortOutput: %s" % result)
1505        return result["contents"]
1506
1507    def GetInstanceNamesByIPs(self, ips):
1508        """Get Instance names by IPs.
1509
1510        This function will go through all instances, which
1511        could be slow if there are too many instances.  However, currently
1512        GCE doesn't support search for instance by IP.
1513
1514        Args:
1515            ips: A set of IPs.
1516
1517        Returns:
1518            A dictionary where key is IP and value is instance name or None
1519            if instance is not found for the given IP.
1520        """
1521        ip_name_map = dict.fromkeys(ips)
1522        for instance in self.ListInstances():
1523            try:
1524                instance_ip = GetInstanceIP(instance)
1525                ip = instance_ip.external or instance_ip.internal
1526                if ip in ips:
1527                    ip_name_map[ip] = instance["name"]
1528            except (IndexError, KeyError) as e:
1529                logger.error("Could not get instance names by ips: %s", str(e))
1530        return ip_name_map
1531
1532    def GetInstanceIP(self, instance, zone):
1533        """Get Instance IP given instance name.
1534
1535        Args:
1536            instance: String, representing instance name.
1537            zone: String, name of the zone.
1538
1539        Returns:
1540            ssh.IP object, that stores internal and external ip of the instance.
1541        """
1542        instance = self.GetInstance(instance, zone)
1543        return GetInstanceIP(instance)
1544
1545    @utils.TimeExecute(function_description="Updating instance metadata: ")
1546    def SetInstanceMetadata(self, zone, instance, body):
1547        """Set instance metadata.
1548
1549        Args:
1550            zone: String, name of zone.
1551            instance: String, representing instance name.
1552            body: Dict, Metadata body.
1553                  metdata is in the following format.
1554                  {
1555                    "kind": "compute#metadata",
1556                    "fingerprint": "a-23icsyx4E=",
1557                    "items": [
1558                      {
1559                        "key": "sshKeys",
1560                        "value": "key"
1561                      }, ...
1562                    ]
1563                  }
1564        """
1565        api = self.service.instances().setMetadata(
1566            project=self._project, zone=zone, instance=instance, body=body)
1567        operation = self.Execute(api)
1568        self.WaitOnOperation(
1569            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1570
1571    def AddSshRsaInstanceMetadata(self, user, ssh_rsa_path, instance):
1572        """Add the public rsa key to the instance's metadata.
1573
1574        Confirm that the instance has this public key in the instance's
1575        metadata, if not we will add this public key.
1576
1577        Args:
1578            user: String, name of the user which the key belongs to.
1579            ssh_rsa_path: String, The absolute path to public rsa key.
1580            instance: String, representing instance name.
1581        """
1582        ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1583        rsa = GetRsaKey(ssh_rsa_path)
1584        entry = "%s:%s" % (user, rsa)
1585        logger.debug("New RSA entry: %s", entry)
1586
1587        zone = self.GetZoneByInstance(instance)
1588        gce_instance = self.GetInstance(instance, zone)
1589        metadata = gce_instance.get(_METADATA)
1590        if RsaNotInMetadata(metadata, entry):
1591            self.UpdateRsaInMetadata(zone, instance, metadata, entry)
1592
1593    def GetZoneByInstance(self, instance):
1594        """Get the zone from instance name.
1595
1596        Gcompute response instance. For example:
1597        {
1598            'items':
1599            {
1600                'zones/europe-west3-b':
1601                {
1602                    'warning':
1603                    {
1604                        'message': "There are no results for scope
1605                        'zones/europe-west3-b' on this page.",
1606                        'code': 'NO_RESULTS_ON_PAGE',
1607                        'data': [{'value': u'zones/europe-west3-b',
1608                                  'key': u'scope'}]
1609                    }
1610                },
1611                'zones/asia-east1-b':
1612                {
1613                    'instances': [
1614                    {
1615                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone'
1616                        'status': 'RUNNING',
1617                        'cpuPlatform': 'Intel Broadwell',
1618                        'startRestricted': False,
1619                        'labels': {u'created_by': u'herbertxue'},
1620                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone',
1621                        ...
1622                    }]
1623                }
1624            }
1625        }
1626
1627        Args:
1628            instance: String, representing instance name.
1629
1630        Raises:
1631            errors.GetGceZoneError: Can't get zone from instance name.
1632
1633        Returns:
1634            String of zone name.
1635        """
1636        api = self.service.instances().aggregatedList(
1637            project=self._project,
1638            filter="name=%s" % instance)
1639        response = self.Execute(api)
1640        for zone, instance_data in response["items"].items():
1641            if "instances" in instance_data:
1642                zone_match = _ZONE_RE.match(zone)
1643                if zone_match:
1644                    return zone_match.group("zone")
1645        raise errors.GetGceZoneError("Can't get zone from the instance name %s"
1646                                     % instance)
1647
1648    def GetZonesByInstances(self, instances):
1649        """Get the zone from instance name.
1650
1651        Args:
1652            instances: List of strings, representing instance names.
1653
1654        Returns:
1655            A dictionary that contains the name of all instances in the zone.
1656            The key is the name of the zone, and the value is a list contains
1657            the name of the instances.
1658        """
1659        zone_instances = {}
1660        for instance in instances:
1661            zone = self.GetZoneByInstance(instance)
1662            if zone in zone_instances:
1663                zone_instances[zone].append(instance)
1664            else:
1665                zone_instances[zone] = [instance]
1666        return zone_instances
1667
1668    def CheckAccess(self):
1669        """Check if the user has read access to the cloud project.
1670
1671        Returns:
1672            True if the user has at least read access to the project.
1673            False otherwise.
1674
1675        Raises:
1676            errors.HttpError if other unexpected error happens when
1677            accessing the project.
1678        """
1679        api = self.service.zones().list(project=self._project)
1680        retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1681        retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1682        try:
1683            self.Execute(api, retry_http_codes=retry_http_codes)
1684        except errors.HttpError as e:
1685            if e.code == self.ACCESS_DENIED_CODE:
1686                return False
1687            raise
1688        return True
1689
1690    def UpdateRsaInMetadata(self, zone, instance, metadata, entry):
1691        """Update ssh public key to sshKeys's value in this metadata.
1692
1693        Args:
1694            zone: String, name of zone.
1695            instance: String, representing instance name.
1696            metadata: Dict, maps a metadata name to its value.
1697            entry: String, ssh public key.
1698        """
1699        ssh_key_item = GetSshKeyFromMetadata(metadata)
1700        if ssh_key_item:
1701            # The ssh key exists in the metadata so update the reference to it
1702            # in the metadata. There may not be an actual ssh key value so
1703            # that's why we filter for None to avoid an empty line in front.
1704            ssh_key_item[_METADATA_KEY_VALUE] = "\n".join(
1705                list(filter(None, [ssh_key_item[_METADATA_KEY_VALUE], entry])))
1706        else:
1707            # Since there is no ssh key item in the metadata, we need to add it in.
1708            ssh_key_item = {_METADATA_KEY: _SSH_KEYS_NAME,
1709                            _METADATA_KEY_VALUE: entry}
1710            metadata[_ITEMS].append(ssh_key_item)
1711        utils.PrintColorString(
1712            "Ssh public key doesn't exist in the instance(%s), adding it."
1713            % instance, utils.TextColors.WARNING)
1714        self.SetInstanceMetadata(zone, instance, metadata)
1715
1716
1717def RsaNotInMetadata(metadata, entry):
1718    """Check ssh public key exist in sshKeys's value.
1719
1720    Args:
1721        metadata: Dict, maps a metadata name to its value.
1722        entry: String, ssh public key.
1723
1724    Returns:
1725        Boolean. True if ssh public key doesn't exist in metadata.
1726    """
1727    for item in metadata.setdefault(_ITEMS, []):
1728        if item[_METADATA_KEY] == _SSH_KEYS_NAME:
1729            if entry in item[_METADATA_KEY_VALUE]:
1730                return False
1731    return True
1732
1733
1734def GetSshKeyFromMetadata(metadata):
1735    """Get ssh key item from metadata.
1736
1737    Args:
1738        metadata: Dict, maps a metadata name to its value.
1739
1740    Returns:
1741        Dict of ssk_key_item in metadata, None if can't find the ssh key item
1742        in metadata.
1743    """
1744    for item in metadata.setdefault(_ITEMS, []):
1745        if item.get(_METADATA_KEY, '') == _SSH_KEYS_NAME:
1746            return item
1747    return None
1748
1749
1750def GetRsaKey(ssh_rsa_path):
1751    """Get rsa key from rsa path.
1752
1753    Args:
1754        ssh_rsa_path: String, The absolute path to public rsa key.
1755
1756    Returns:
1757        String, rsa key.
1758
1759    Raises:
1760        errors.DriverError: RSA file does not exist.
1761    """
1762    ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1763    if not os.path.exists(ssh_rsa_path):
1764        raise errors.DriverError(
1765            "RSA file %s does not exist." % ssh_rsa_path)
1766
1767    with open(ssh_rsa_path) as f:
1768        rsa = f.read()
1769        # The space must be removed here for string processing,
1770        # if it is not string, it doesn't have a strip function.
1771        rsa = rsa.strip() if rsa else rsa
1772        utils.VerifyRsaPubKey(rsa)
1773    return rsa
1774
1775def GetInstanceIP(instance):
1776    """Get the internal and external IP for a given instance
1777
1778    Args:
1779        instance: A dict, representing a gce instance
1780
1781    Returns:
1782        A populated IP object
1783    """
1784    network_interface = instance["networkInterfaces"][0]
1785    access_configs = network_interface.get("accessConfigs", [{}])[0]
1786    external_ip = access_configs.get("natIP", "")
1787    internal_ip = network_interface.get("networkIP", "")
1788    return IP(internal=internal_ip, external=external_ip)
1789
1790def GetGCEHostName(gce_project, instance, zone):
1791    """Get the GCE host name with specific rule.
1792
1793    Args:
1794        gce_project: String, GCE project name.
1795        instance: String, GCE instance name.
1796        zone: String, Instance zone name.
1797
1798    Returns:
1799        One host name coverted by instance name, project name, and zone.
1800    """
1801    if ":" in gce_project:
1802        domain = gce_project.split(":")[0]
1803        project_no_domain = gce_project.split(":")[1]
1804        project = f"{project_no_domain}.{domain}"
1805        return f"nic0.{instance}.{zone}.c.{project}.internal.gcpnode.com"
1806    return f"nic0.{instance}.{zone}.c.{gce_project}.internal.gcpnode.com"
1807