alpine-zdt-images/alpine-cloud-images/get-image-cache.py

#!/usr/bin/env python3
# vim: ts=4 et:

# NOTE: this is an experimental work-in-progress

# Ensure we're using the Python virtual env with our installed dependencies
import os
import sys
import textwrap

NOTE = textwrap.dedent("""
    Experimental:  Outputs image cache YAML on STDOUT for use with prune-images.py
    """)

sys.pycache_prefix = 'work/__pycache__'

if not os.path.exists('work'):
    print('FATAL: Work directory does not exist.', file=sys.stderr)
    print(NOTE, file=sys.stderr)
    exit(1)

# Re-execute using the right virtual environment, if necessary.
venv_args = [os.path.join('work', 'bin', 'python3')] + sys.argv
if os.path.join(os.getcwd(), venv_args[0]) != sys.executable:
    print("Re-executing with work environment's Python...\n", file=sys.stderr)
    os.execv(venv_args[0], venv_args)

# We're now in the right Python environment

import argparse
import logging
import re
import time
from collections import defaultdict
from ruamel.yaml import YAML

import clouds


### Constants & Variables

CLOUDS = ['aws']
LOGFORMAT = '%(asctime)s - %(levelname)s - %(message)s'

RE_ALPINE = re.compile(r'^(?:aws_)?alpine-')
RE_RELEASE = re.compile(r'-(edge|[\d\.]+)-')
RE_REVISION = re.compile(r'-r?(\d+)$')
RE_STUFF = re.compile(r'(edge|[\d+\.]+)(?:_rc(\d+))?-(.+)-r?(\d+)$')


### Functions

# allows us to set values deep within an object that might not be fully defined
def dictfactory():
    return defaultdict(dictfactory)


# undo dictfactory() objects to normal objects
def undictfactory(o):
    if isinstance(o, defaultdict):
        o = {k: undictfactory(v) for k, v in o.items()}
    return o


### Command Line & Logging

parser = argparse.ArgumentParser(description=NOTE)
parser.add_argument('--debug', action='store_true', help='enable debug output')
parser.add_argument('--cloud', choices=CLOUDS, required=True, help='cloud provider')
parser.add_argument('--region', help='specific region, instead of all regions')
parser.add_argument(
    '--use-broker', action='store_true',
    help='use the identity broker to get credentials')
args = parser.parse_args()

log = logging.getLogger()
log.setLevel(logging.DEBUG if args.debug else logging.INFO)
console = logging.StreamHandler()
logfmt = logging.Formatter(LOGFORMAT, datefmt='%FT%TZ')
logfmt.converter = time.gmtime
console.setFormatter(logfmt)
log.addHandler(console)
log.debug(args)

# set up credential provider, if we're going to use it
if args.use_broker:
    clouds.set_credential_provider(debug=args.debug)

# what region(s)?
regions = clouds.ADAPTERS[args.cloud].regions
if args.region:
    if args.region not in regions:
        log.error('invalid region: %s', args.region)
        exit(1)
    else:
        regions = [args.region]

filters = {
    'Owners': ['self'],
    'Filters': [
        {'Name': 'state', 'Values': ['available']},
    ]
}

data = dictfactory()
now = time.gmtime()

for region in sorted(regions):
    # TODO: make more generic if we need to do this for other clouds someday
    ec2r = clouds.ADAPTERS[args.cloud].session(region).resource('ec2')
    images = sorted(ec2r.images.filter(**filters), key=lambda k: k.creation_date)
    log.info(f'--- {region} : {len(images)} ---')
    version = release = revision = None

    for image in images:
        latest = data[region]['latest']     # shortcut

        # information about the image
        id = image.id
        name = image.name

        # only consider images named /^alpine-/
        if not RE_ALPINE.search(image.name):
            log.warning(f'IGNORING {region}\t{id}\t{name}')
            continue

        # parse image name for more information
        # NOTE: we can't rely on tags, because they may not have been set successfully
        # TODO: but we should use them if we got them!
        m = RE_STUFF.search(name)
        if not m:
            log.error(f'!PARSE\t{region}\t{id}\t{name}')
            continue

        release = m.group(1)
        rc = m.group(2)
        version = '.'.join(release.split('.')[0:2])
        variant = m.group(3)
        revision = m.group(4)
        variant_key = '-'.join([version, variant])
        release_key = revision if release == 'edge' else '-'.join([release, revision])

        last_launched_attr = image.describe_attribute(Attribute='lastLaunchedTime')['LastLaunchedTime']
        last_launched = last_launched_attr.get('Value', 'Never')

        eol = None  # we don't know for sure, unless we have a deprecation time
        if image.deprecation_time:
            eol = time.strptime(image.deprecation_time, '%Y-%m-%dT%H:%M:%S.%fZ') < now
            # TODO: when did we start setting deprecation time?

        # keep track of images
        data[region]['images'][id] = {
            'name': name,
            'release': release,
            'version': version,
            'variant': variant,
            'revision': revision,
            'variant_key': variant_key,
            'release_key': release_key,
            'created': image.creation_date,
            'launched': last_launched,
            'deprecated': image.deprecation_time,
            'rc': rc is not None,
            'eol': eol,
            'private': not image.public,
            'snapshot_id': image.block_device_mappings[0]['Ebs']['SnapshotId']
        }

        # keep track of the latest release_key per variant_key
        if variant_key not in latest or (release > latest[variant_key]['release']) or (release == latest[variant_key]['release'] and [revision > latest[variant_key]['revision']]):
            data[region]['latest'][variant_key] = {
                'release': release,
                'revision': revision,
                'release_key': release_key
            }

        log.info(f'{region}\t{not image.public}\t{eol}\t{last_launched.split("T")[0]}\t{name}')

# instantiate YAML
yaml = YAML()
yaml.explicit_start = True

# TODO?  dump out to a file instead of STDOUT?
yaml.dump(undictfactory(data), sys.stdout)

total = 0
for region, rdata in sorted(data.items()):
    count = len(rdata['images'])
    log.info(f'{region} : {count} images')
    total += count

log.info(f'TOTAL : {total} images')
Squashed 'alpine-cloud-images/' changes from 064a526..ae2361d ae2361d Make Storage Authoritative for State 91082fb various fixes pre-3.18.4 git-subtree-dir: alpine-cloud-images git-subtree-split: ae2361d9becd3f0bf4c2d3510f4fb126fff3d8fa 2023-11-14 16:52:02 +00:00			`#!/usr/bin/env python3`
			`# vim: ts=4 et:`

			`# NOTE: this is an experimental work-in-progress`

			`# Ensure we're using the Python virtual env with our installed dependencies`
			`import os`
			`import sys`
			`import textwrap`

			`NOTE = textwrap.dedent("""`
			`Experimental: Outputs image cache YAML on STDOUT for use with prune-images.py`
			`""")`

			`sys.pycache_prefix = 'work/__pycache__'`

			`if not os.path.exists('work'):`
			`print('FATAL: Work directory does not exist.', file=sys.stderr)`
			`print(NOTE, file=sys.stderr)`
			`exit(1)`

			`# Re-execute using the right virtual environment, if necessary.`
			`venv_args = [os.path.join('work', 'bin', 'python3')] + sys.argv`
			`if os.path.join(os.getcwd(), venv_args[0]) != sys.executable:`
			`print("Re-executing with work environment's Python...\n", file=sys.stderr)`
			`os.execv(venv_args[0], venv_args)`

			`# We're now in the right Python environment`

			`import argparse`
			`import logging`
			`import re`
			`import time`
			`from collections import defaultdict`
			`from ruamel.yaml import YAML`

			`import clouds`


			`### Constants & Variables`

			`CLOUDS = ['aws']`
			`LOGFORMAT = '%(asctime)s - %(levelname)s - %(message)s'`

chore: sync upstream library 2024-03-11 13:31:30 +00:00			`RE_ALPINE = re.compile(r'^(?:aws_)?alpine-')`
Squashed 'alpine-cloud-images/' changes from 064a526..ae2361d ae2361d Make Storage Authoritative for State 91082fb various fixes pre-3.18.4 git-subtree-dir: alpine-cloud-images git-subtree-split: ae2361d9becd3f0bf4c2d3510f4fb126fff3d8fa 2023-11-14 16:52:02 +00:00			`RE_RELEASE = re.compile(r'-(edge\|[\d\.]+)-')`
			`RE_REVISION = re.compile(r'-r?(\d+)$')`
			`RE_STUFF = re.compile(r'(edge\|[\d+\.]+)(?:_rc(\d+))?-(.+)-r?(\d+)$')`


			`### Functions`

			`# allows us to set values deep within an object that might not be fully defined`
			`def dictfactory():`
			`return defaultdict(dictfactory)`


			`# undo dictfactory() objects to normal objects`
			`def undictfactory(o):`
			`if isinstance(o, defaultdict):`
			`o = {k: undictfactory(v) for k, v in o.items()}`
			`return o`


			`### Command Line & Logging`

			`parser = argparse.ArgumentParser(description=NOTE)`
			`parser.add_argument('--debug', action='store_true', help='enable debug output')`
			`parser.add_argument('--cloud', choices=CLOUDS, required=True, help='cloud provider')`
			`parser.add_argument('--region', help='specific region, instead of all regions')`
			`parser.add_argument(`
			`'--use-broker', action='store_true',`
			`help='use the identity broker to get credentials')`
			`args = parser.parse_args()`

			`log = logging.getLogger()`
			`log.setLevel(logging.DEBUG if args.debug else logging.INFO)`
			`console = logging.StreamHandler()`
			`logfmt = logging.Formatter(LOGFORMAT, datefmt='%FT%TZ')`
			`logfmt.converter = time.gmtime`
			`console.setFormatter(logfmt)`
			`log.addHandler(console)`
			`log.debug(args)`

			`# set up credential provider, if we're going to use it`
			`if args.use_broker:`
			`clouds.set_credential_provider(debug=args.debug)`

			`# what region(s)?`
			`regions = clouds.ADAPTERS[args.cloud].regions`
			`if args.region:`
			`if args.region not in regions:`
			`log.error('invalid region: %s', args.region)`
			`exit(1)`
			`else:`
			`regions = [args.region]`

			`filters = {`
			`'Owners': ['self'],`
			`'Filters': [`
			`{'Name': 'state', 'Values': ['available']},`
			`]`
			`}`

			`data = dictfactory()`
			`now = time.gmtime()`

			`for region in sorted(regions):`
			`# TODO: make more generic if we need to do this for other clouds someday`
			`ec2r = clouds.ADAPTERS[args.cloud].session(region).resource('ec2')`
			`images = sorted(ec2r.images.filter(**filters), key=lambda k: k.creation_date)`
			`log.info(f'--- {region} : {len(images)} ---')`
			`version = release = revision = None`

			`for image in images:`
			`latest = data[region]['latest'] # shortcut`

			`# information about the image`
			`id = image.id`
			`name = image.name`

			`# only consider images named /^alpine-/`
			`if not RE_ALPINE.search(image.name):`
			`log.warning(f'IGNORING {region}\t{id}\t{name}')`
			`continue`

			`# parse image name for more information`
			`# NOTE: we can't rely on tags, because they may not have been set successfully`
feat: First WIP of Alpine 3.20.1 minimal 2024-07-15 12:52:22 +00:00			`# TODO: but we should use them if we got them!`
Squashed 'alpine-cloud-images/' changes from 064a526..ae2361d ae2361d Make Storage Authoritative for State 91082fb various fixes pre-3.18.4 git-subtree-dir: alpine-cloud-images git-subtree-split: ae2361d9becd3f0bf4c2d3510f4fb126fff3d8fa 2023-11-14 16:52:02 +00:00			`m = RE_STUFF.search(name)`
			`if not m:`
			`log.error(f'!PARSE\t{region}\t{id}\t{name}')`
			`continue`

			`release = m.group(1)`
			`rc = m.group(2)`
			`version = '.'.join(release.split('.')[0:2])`
			`variant = m.group(3)`
			`revision = m.group(4)`
			`variant_key = '-'.join([version, variant])`
			`release_key = revision if release == 'edge' else '-'.join([release, revision])`

			`last_launched_attr = image.describe_attribute(Attribute='lastLaunchedTime')['LastLaunchedTime']`
			`last_launched = last_launched_attr.get('Value', 'Never')`

chore: sync upstream library 2024-03-11 13:31:30 +00:00			`eol = None # we don't know for sure, unless we have a deprecation time`
			`if image.deprecation_time:`
			`eol = time.strptime(image.deprecation_time, '%Y-%m-%dT%H:%M:%S.%fZ') < now`
feat: First WIP of Alpine 3.20.1 minimal 2024-07-15 12:52:22 +00:00			`# TODO: when did we start setting deprecation time?`
Squashed 'alpine-cloud-images/' changes from 064a526..ae2361d ae2361d Make Storage Authoritative for State 91082fb various fixes pre-3.18.4 git-subtree-dir: alpine-cloud-images git-subtree-split: ae2361d9becd3f0bf4c2d3510f4fb126fff3d8fa 2023-11-14 16:52:02 +00:00
			`# keep track of images`
			`data[region]['images'][id] = {`
			`'name': name,`
			`'release': release,`
			`'version': version,`
			`'variant': variant,`
			`'revision': revision,`
			`'variant_key': variant_key,`
			`'release_key': release_key,`
			`'created': image.creation_date,`
			`'launched': last_launched,`
			`'deprecated': image.deprecation_time,`
			`'rc': rc is not None,`
			`'eol': eol,`
			`'private': not image.public,`
			`'snapshot_id': image.block_device_mappings[0]['Ebs']['SnapshotId']`
			`}`

			`# keep track of the latest release_key per variant_key`
			`if variant_key not in latest or (release > latest[variant_key]['release']) or (release == latest[variant_key]['release'] and [revision > latest[variant_key]['revision']]):`
			`data[region]['latest'][variant_key] = {`
			`'release': release,`
			`'revision': revision,`
			`'release_key': release_key`
			`}`

			`log.info(f'{region}\t{not image.public}\t{eol}\t{last_launched.split("T")[0]}\t{name}')`

			`# instantiate YAML`
			`yaml = YAML()`
			`yaml.explicit_start = True`

			`# TODO? dump out to a file instead of STDOUT?`
			`yaml.dump(undictfactory(data), sys.stdout)`

			`total = 0`
			`for region, rdata in sorted(data.items()):`
			`count = len(rdata['images'])`
			`log.info(f'{region} : {count} images')`
			`total += count`

			`log.info(f'TOTAL : {total} images')`