Source code for foundry_dev_tools.utils.caches.metadata_store
"""A Metadata store for cached datasets.
Primary use is resolution of dataset_path and dataset_Rid for
offline usage of Foundry DevTools
"""
from __future__ import annotations
import json
import os
from collections.abc import MutableMapping
from shutil import rmtree
from typing import TYPE_CHECKING
from foundry_dev_tools.utils import api_types
if TYPE_CHECKING:
from collections.abc import Iterator
from pathlib import Path
from foundry_dev_tools.config.context import FoundryContext
# TODO shelve? https://docs.python.org/3/library/shelve.html
[docs]
class DatasetMetadataStore(MutableMapping[api_types.FoundryPath, api_types.DatasetIdentity]):
"""A Metadata store for cached datasets.
Primary use is resolution of dataset_path and dataset_rid for
offline usage of Foundry DevTools
"""
[docs]
def __init__(self, ctx: FoundryContext):
"""Init meta data store.
Args:
ctx: the foundry context, used for the cache_dir
"""
self.ctx = ctx
self._db_path = self._cache_dir / "metadata.json"
if not self._db_path.exists():
# clear cache if metadata.json does not exist
rmtree(self._cache_dir)
self._cache_dir.mkdir(parents=True, exist_ok=True)
# create empty metadata.json
with self._db_path.open(mode="w", encoding="UTF-8") as file:
json.dump({}, file)
@property
def _cache_dir(self) -> Path:
return self.ctx.config.cache_dir
def __setitem__(self, dataset_path: api_types.FoundryPath, dataset_identity: api_types.DatasetIdentity) -> None:
db = self._read_db()
db[dataset_identity["dataset_path"]] = dataset_identity
self._write_db(db)
def __delitem__(self, dataset_path: api_types.FoundryPath) -> None:
db = self._read_db()
if dataset_path in db:
del db[dataset_path]
self._write_db(db)
else:
raise KeyError(dataset_path)
def __getitem__(self, dataset_path: api_types.FoundryPath) -> api_types.DatasetIdentity:
db = self._read_db()
if dataset_path in db:
return db[dataset_path]
raise KeyError(dataset_path)
def __len__(self) -> int:
db = self._read_db()
return len(db.keys())
def __iter__(self) -> Iterator[api_types.FoundryPath]:
db = self._read_db()
yield from db.keys()
def _read_db(self) -> DatasetMetadataStore:
with self._db_path.open(encoding="UTF-8") as file:
return json.load(file)
def _write_db(self, db: DatasetMetadataStore) -> None:
with self._db_path.open(mode="w", encoding="UTF-8") as file:
json.dump(db, file, indent=4)
file.flush()
os.fsync(file.fileno())