Welcome to MOF DB! This guide will help you get started with accessing and using our MOF datasets.
You can download datasets in two ways:
Use the following Python script to fetch the JSON directly from Hugging Face into your current working directory. Choose the appropriate script based on which dataset you need:
# Download H2MOF-ML Dataset
from pathlib import Path
from huggingface_hub import hf_hub_download
current_dir = Path.cwd()
file_path = hf_hub_download(
repo_id="ytbai/H2MOF-ML",
filename="H2MOF-ML.json",
repo_type="dataset",
local_dir=str(current_dir),
local_dir_use_symlinks=False,
)
print("Saved at:", file_path)
# Download hMOF-ML Dataset
from pathlib import Path
from huggingface_hub import hf_hub_download
current_dir = Path.cwd()
file_path = hf_hub_download(
repo_id="ytbai/hMOF-ML",
filename="hMOF-ML.json",
repo_type="dataset",
local_dir=str(current_dir),
local_dir_use_symlinks=False,
)
print("Saved at:", file_path)
Run the corresponding script (e.g., python download_h2mof.py or python download_hmof.py) and the dataset will be saved where you execute the script.
Use Python to load the JSON file and access samples with convenient dot notation, similar to the explore.py helper shown below:
import json
import logging
import os
from types import SimpleNamespace
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
logger.disabled = True
def dict_to_namespace(d):
"""Convert a dictionary to dot-notation objects"""
if isinstance(d, dict):
return SimpleNamespace(**{k: dict_to_namespace(v) for k, v in d.items()})
elif isinstance(d, list):
return [dict_to_namespace(item) for item in d]
return d
def load_json_samples(json_file):
if not os.path.exists(json_file):
raise FileNotFoundError(f"JSON file not found: {json_file}")
with open(json_file, 'r', encoding='utf-8') as f:
json_data = json.load(f)
samples = {}
for sample_name, sample_data in json_data.items():
samples[sample_name] = dict_to_namespace(sample_data)
return samples
json_file = "H2MOF-ML.json"
samples = load_json_samples(json_file)
sample_name = "ABEXOW"
print(samples[sample_name].lattice.abc)
print(samples[sample_name].properties.metal_composition.total_metal_atoms)
print(samples[sample_name].properties.UV_at_TPS)
# Example output
# [12.002, 11.008, 19.473]
# 7
# 23.96
The loading method is the same for both datasets - simply replace the file path with the corresponding dataset filename. For detailed attribute descriptions and available properties, please refer to the Data Format documentation.