MDFactoryMDFactory

cli

Command-line interface for mdfactory using cyclopts.

attributeapp
= App(name='MDFactory', version=__version__)
attributesync_app
= App(help='Synchronize system and analysis metadata with configured backend.')
attributesync_push_app
= App(help='Push local systems or analyses into the database.')
attributesync_pull_app
= App(help='Pull systems or analyses from the database.')
attributesync_init_app
= App(help='Initialize databases or datasets for systems or analyses.')
attributesync_clear_app
= App(help='Clear sync datasets (destructive).')
attributeanalysis_app
= App()
attributeanalysis_artifacts_app
= App()
attributeconfig_app
= App(help='Manage mdfactory configuration.')
funcprepare_build(input, output=Path('.'))

Prepare YAML files for system build from a CSV data frame.

paraminputPath

Path to the CSV file with the compositions

paramoutputPath
= Path('.')

Output directory, by default Path(".")

Returns

None
funcdf_models_from_input_csv(input)
paraminput

Returns

None
funcbuild_system(input, output=Path('.'))

Build MD system from YAML input file.

paraminputPath

Path to the YAML file specifying the BuildInput

paramoutputPath
= Path('.')

Output directory, by default Path(".")

Returns

None
funcclean(parameters=True, database=True)
paramparametersbool
= True
paramdatabasebool
= True

Returns

None
funcshow_db(name='run')

Show information about the specified database.

paramnameLiteral['run']
= 'run'

Returns

None
funcinfo()

Returns

None
funccheck_csv(input)

Check if CSV is valid by building BuildInput models from each row.

paraminputPath

Input CSV file to check.

Returns

None
func_validate_sync_push_inputs(source, csv, csv_root, force, diff)None

Validate sync push command inputs.

paramsourcePath | None

Source directory or glob pattern

paramcsvPath | None

CSV input file

paramcsv_rootPath | None

Root for CSV hash folder search

paramforcebool

Overwrite mode flag

paramdiffbool

Skip-existing mode flag

Returns

None
func_ensure_sync_target_initialized(table_name, init_command)None

Ensure local backend database/file exists and is initialized.

paramtable_namestr

Table name to check (e.g., "RUN_DATABASE")

paraminit_commandstr

CLI command to show in error message if not initialized

Returns

None
func_exit_sync_push_error(exc)None

Log push errors and exit without Python traceback.

paramexcException

The exception that caused the push failure

Returns

None
funcsync_push_systems(source=None, csv=None, csv_root=None, force=False, diff=False)

Push simulation system metadata to database.

Discovers simulation folders and uploads their metadata (hash, status, directory, build input) to the runs database.

Exactly one of --source or --csv must be provided.

paramsourcePath
= None

Directory, glob pattern (e.g., "systems/*/"), or summary YAML file

paramcsvPath
= None

Input CSV file (hashes will be extracted and folders searched)

paramcsv_rootPath
= None

Root directory to search for hash folders when using --csv mode

paramforcebool
= False

Delete existing records and re-insert (overwrite duplicates)

paramdiffbool
= False

Only upload new hashes that don't exist in database yet

Returns

None
funcsync_pull_systems(status=None, simulation_type=None, parametrization=None, engine=None, output=None, full=False)

Pull simulation system metadata from database.

Retrieves simulation records from the runs database and outputs them to CLI or file.

paramstatusstr
= None

Filter by status (build, equilibrated, production, completed)

paramsimulation_typestr
= None

Filter by simulation type (mixedbox, bilayer)

paramparametrizationstr
= None

Filter by parametrization (cgenff, smirnoff)

paramenginestr
= None

Filter by engine (gromacs)

paramoutputPath
= None

Output file path (.csv or .json). If not provided, prints to CLI. File output always includes all columns.

paramfullbool
= False

Show all columns in CLI output (excluding only JSON blob). Default shows summary columns: hash, simulation_type, parametrization, status, directory

Returns

None
funcsync_init_systems(reset=False)

Initialize the systems database (RUN_DATABASE).

Creates the database or Foundry dataset if it doesn't exist. For Foundry, validates schema compatibility if dataset exists.

The backend type (sqlite or foundry) is determined by config.

paramresetbool
= False

Reset and recreate existing dataset/table. --force is an alias for --reset.

Returns

None
func_log_init_results(results, resource_type)None

Log initialization results for tables or datasets.

paramresultsdict[str, bool]

Mapping of table name to whether it was created

paramresource_typestr

Label for log messages (e.g., "Systems", "Analysis")

Returns

None
funcsync_init_analysis(reset=False)

Initialize the analysis database.

Creates the database file/tables (SQLite), CSV files, or datasets (Foundry) for storing analysis results.

paramresetbool
= False

Reset and recreate existing datasets/tables. --force is an alias for --reset.

Returns

None
funcsync_init_artifacts(reset=False)

Initialize the artifact database tables.

Creates the database file/tables (SQLite), CSV files, or datasets (Foundry) for storing artifact metadata.

paramresetbool
= False

Reset and recreate existing datasets/tables. --force is an alias for --reset.

Returns

None
funcsync_init_check()

Validate Foundry connection and configured paths.

Checks that:

  1. Foundry connection can be established
  2. All configured directories (BASE_PATH, analysis, artifacts, runs) exist

Returns

None
func_clear_tables_with_confirmation(tables)None

Prompt for confirmation and delete all rows from the given tables.

paramtableslist[str]

Table names to clear (e.g., ["RUN_DATABASE", "ANALYSIS_OVERVIEW"])

Returns

None
funcsync_clear_systems()

Clear all records from systems database (RUN_DATABASE).

Returns

None
funcsync_clear_analysis(analysis_name=None, artifact_name=None, overview=False, analyses=False, artifacts=False, all=False)

Clear analysis/artifact datasets with confirmation.

paramanalysis_nameAnnotated[str | None, Parameter(help='Specific analysis table (e.g., area_per_lipid).')]
= None
paramartifact_nameAnnotated[str | None, Parameter(help='Specific artifact table (e.g., bilayer_snapshot).')]
= None
paramoverviewAnnotated[bool, Parameter(help='Clear overview table.')]
= False
paramanalysesAnnotated[bool, Parameter(help='Clear all analysis tables.')]
= False
paramartifactsAnnotated[bool, Parameter(help='Clear all artifact tables.')]
= False
paramallAnnotated[bool, Parameter(help='Clear all analysis/artifact tables and overview.')]
= False

Returns

None
funcsync_clear_all()

Clear all sync datasets (runs, analyses, artifacts, overview).

Returns

None
funcsync_push_analysis(source=None, csv=None, csv_root=None, analysis_name=None, force=False, diff=False)

Push analysis results to database.

Discovers simulation folders, loads their analysis data (parquet files from .analysis/), and uploads to the analysis database.

Exactly one of --source or --csv must be provided.

paramsourcePath
= None

Directory, glob pattern (e.g., "systems/*/"), or summary YAML file

paramcsvPath
= None

Input CSV file (hashes will be extracted and folders searched)

paramcsv_rootPath
= None

Root directory to search for hash folders when using --csv mode

paramanalysis_namestr
= None

Push only this specific analysis (e.g., "area_per_lipid"). If not provided, pushes all available analyses.

paramforcebool
= False

Delete existing records and re-insert (overwrite duplicates)

paramdiffbool
= False

Only upload new hashes that don't exist in database yet

Returns

None
funcsync_push_artifacts(source=None, csv=None, csv_root=None, artifact_name=None, force=False, diff=False)

Push artifact metadata to database.

Discovers simulation folders, loads their artifact metadata (file paths and checksums from .analysis/artifacts/), and uploads to the artifact database.

Exactly one of --source or --csv must be provided.

paramsourcePath
= None

Directory, glob pattern (e.g., "systems/*/"), or summary YAML file

paramcsvPath
= None

Input CSV file (hashes will be extracted and folders searched)

paramcsv_rootPath
= None

Root directory to search for hash folders when using --csv mode

paramartifact_namestr
= None

Push only this specific artifact (e.g., "bilayer_snapshot"). If not provided, pushes all available artifacts.

paramforcebool
= False

Delete existing records and re-insert (overwrite duplicates)

paramdiffbool
= False

Only upload new hashes that don't exist in database yet

Returns

None
funcsync_pull_analysis(analysis_name=None, hash=None, simulation_type=None, output=None, full=False, overview=False)

Pull analysis results from database.

Retrieves analysis records from the analysis database and outputs them to CLI or file.

paramanalysis_namestr
= None

Pull from specific analysis table (e.g., "area_per_lipid"). Required unless --overview is specified.

paramhashstr
= None

Filter by simulation hash

paramsimulation_typestr
= None

Filter by simulation type (mixedbox, bilayer)

paramoutputPath
= None

Output file path (.csv or .json). If not provided, prints to CLI.

paramfullbool
= False

Show all columns in CLI output including data_csv (can be very long)

paramoverviewbool
= False

Pull from the overview table instead of a specific analysis table

Returns

None
funcsync_pull_artifacts(artifact_name=None, hash=None, simulation_type=None, output=None, full=False, overview=False)

Pull artifact metadata from database.

Retrieves artifact records from the artifact database and outputs them to CLI or file.

paramartifact_namestr
= None

Pull from specific artifact table (e.g., "bilayer_snapshot"). Required unless --overview is specified.

paramhashstr
= None

Filter by simulation hash

paramsimulation_typestr
= None

Filter by simulation type (mixedbox, bilayer)

paramoutputPath
= None

Output file path (.csv or .json). If not provided, prints to CLI.

paramfullbool
= False

Show all columns in CLI output

paramoverviewbool
= False

Pull from the overview table instead of a specific artifact table

Returns

None
func_resolve_sim_paths(source, *, trajectory_file='prod.xtc', structure_file='system.pdb', simulation_type=None, hashes=None)list[Path]

Resolve source to simulation paths, applying optional filters.

paramsourcePath
paramtrajectory_filestr
= 'prod.xtc'
paramstructure_filestr
= 'system.pdb'
paramsimulation_typestr | None
= None
paramhasheslist[str] | None
= None

Returns

list[pathlib.Path]
funcanalysis_run(source=None, analysis=None, simulation_type=None, hash=None, structure_file='system.pdb', trajectory_file='prod.xtc', skip_existing=True, slurm=False, account=None, partition='cpu', time='2h', cpus=4, mem_gb=8, analysis_backend='multiprocessing', analysis_workers=None, qos=None, constraint=None, log_dir=None, job_name_prefix='mdfactory-analysis', start_ns=None, end_ns=None, last_ns=None, stride=None, max_residues=None)

Run analyses locally or via submitit/SLURM.

Provide SOURCE as either a simulation directory or a build summary YAML. Use --hash to filter to specific simulations (comma-separated, prefix matching supported).

Notes

For local runs (without --slurm), analysis_backend and analysis_workers are passed through to analysis execution.

Analysis parameters (--start-ns, --end-ns, --last-ns, --stride, --max-residues) are forwarded to each analysis function. Analyses that do not accept a given parameter will ignore it.

paramsourcePath | None
= None
paramanalysislist[str] | None
= None
paramsimulation_typestr | None
= None
paramhashlist[str] | None
= None
paramstructure_filestr
= 'system.pdb'
paramtrajectory_filestr
= 'prod.xtc'
paramskip_existingbool
= True
paramslurmbool
= False
paramaccountstr | None
= None
parampartitionstr
= 'cpu'
paramtimestr
= '2h'
paramcpusint
= 4
parammem_gbint
= 8
paramanalysis_backendstr
= 'multiprocessing'
paramanalysis_workersint | None
= None
paramqosstr | None
= None
paramconstraintstr | None
= None
paramlog_dirPath | None
= None
paramjob_name_prefixstr
= 'mdfactory-analysis'
paramstart_nsfloat | None
= None
paramend_nsfloat | None
= None
paramlast_nsfloat | None
= None
paramstrideint | None
= None
parammax_residuesint | None
= None

Returns

None
funcanalysis_info(source=None, simulation_type=None, hash=None, structure_file='system.pdb', trajectory_file='prod.xtc', output=None, chemistry_output=None, chemistry_mode='all')

Show analysis status for simulations.

Provide SOURCE as either a simulation directory or a build summary YAML. Use --hash to filter to specific simulations (comma-separated, prefix matching supported).

Options: --chemistry-output: Path to write chemistry CSV --chemistry-mode: "all" extracts all species from YAML, "lnp" uses LNP-specific grouping (HL, CHL, IL with ILN+ILP merged)

paramsourcePath | None
= None
paramsimulation_typestr | None
= None
paramhashlist[str] | None
= None
paramstructure_filestr
= 'system.pdb'
paramtrajectory_filestr
= 'prod.xtc'
paramoutputPath | None
= None
paramchemistry_outputPath | None
= None
paramchemistry_modeLiteral['all', 'lnp']
= 'all'

Returns

None
funcanalysis_preprocess(source=None, script=None, output=None, simulation_type=None, hash=None, structure_file='system.pdb', trajectory_file='prod.xtc', dry_run=False)

Run a preprocessing script across simulations.

Provide SOURCE as either a simulation directory or a build summary YAML. Use --hash to filter to specific simulations (comma-separated, prefix matching supported).

paramsourcePath | None
= None
paramscriptPath | None
= None
paramoutputstr | None
= None
paramsimulation_typestr | None
= None
paramhashlist[str] | None
= None
paramstructure_filestr
= 'system.pdb'
paramtrajectory_filestr
= 'prod.xtc'
paramdry_runbool
= False

Returns

None
funcanalysis_artifacts_run(source=None, artifact=None, simulation_type=None, hash=None, structure_file='system.pdb', trajectory_file='prod.xtc', output_prefix=None, vmd_path=None, ffmpeg_path=None, skip_existing=True, slurm=False, account=None, partition='cpu', time='2h', cpus=4, mem_gb=8, qos=None, constraint=None, log_dir=None, job_name_prefix='mdfactory-artifacts')

Run artifacts locally or via submitit/SLURM.

Provide SOURCE as either a simulation directory or a build summary YAML. Use --hash to filter to specific simulations (comma-separated, prefix matching supported).

paramsourcePath | None
= None
paramartifactlist[str] | None
= None
paramsimulation_typestr | None
= None
paramhashlist[str] | None
= None
paramstructure_filestr
= 'system.pdb'
paramtrajectory_filestr
= 'prod.xtc'
paramoutput_prefixstr | None
= None
paramvmd_pathstr | None
= None
paramffmpeg_pathstr | None
= None
paramskip_existingbool
= True
paramslurmbool
= False
paramaccountstr | None
= None
parampartitionstr
= 'cpu'
paramtimestr
= '2h'
paramcpusint
= 4
parammem_gbint
= 8
paramqosstr | None
= None
paramconstraintstr | None
= None
paramlog_dirPath | None
= None
paramjob_name_prefixstr
= 'mdfactory-artifacts'

Returns

None
funcanalysis_artifacts_info(source=None, simulation_type=None, hash=None, structure_file='system.pdb', trajectory_file='prod.xtc')

Show artifact status for simulations.

Provide SOURCE as either a simulation directory or a build summary YAML. Use --hash to filter to specific simulations (comma-separated, prefix matching supported).

paramsourcePath | None
= None
paramsimulation_typestr | None
= None
paramhashlist[str] | None
= None
paramstructure_filestr
= 'system.pdb'
paramtrajectory_filestr
= 'prod.xtc'

Returns

None
funcanalysis_artifacts_remove(source=None, simulation_type=None, hash=None, structure_file='system.pdb', trajectory_file='prod.xtc')

Remove artifacts for simulations.

Provide SOURCE as either a simulation directory or a build summary YAML. Use --hash to filter to specific simulations (comma-separated, prefix matching supported).

paramsourcePath | None
= None
paramsimulation_typestr | None
= None
paramhashlist[str] | None
= None
paramstructure_filestr
= 'system.pdb'
paramtrajectory_filestr
= 'prod.xtc'

Returns

None
funcanalysis_remove(source=None, simulation_type=None, hash=None, structure_file='system.pdb', trajectory_file='prod.xtc')

Remove all analyses for simulations.

Provide SOURCE as either a simulation directory or a build summary YAML. Use --hash to filter to specific simulations (comma-separated, prefix matching supported).

paramsourcePath | None
= None
paramsimulation_typestr | None
= None
paramhashlist[str] | None
= None
paramstructure_filestr
= 'system.pdb'
paramtrajectory_filestr
= 'prod.xtc'

Returns

None
funcconfig_init()

Interactive wizard to set up mdfactory configuration.

Returns

None
funcconfig_show()

Display current active configuration.

Returns

None
funcconfig_path()

Print the configuration file path.

Returns

None
funcconfig_edit()

Open the configuration file in a terminal text editor.

Uses $EDITOR if set, otherwise falls back to vi. Creates the config file with defaults if it does not exist.

Returns

None
funcmain()

Returns

None

On this page