Source code for T-reX.ExplodeDatabase

"""
ExplodeDatabase Module
======================

This module is responsible for exploding a Brightway2 database into a single-level list of all exchanges.
It utilizes the wurst package to unpack the database, explode it to a list of all exchanges, and save this data 
in a DataFrame as a .pickle binary file.

"""

# Imports
import os
from datetime import datetime

import bw2data as bd
import pandas as pd
import wurst as w


[docs] def ExplodeDatabase(db_name): """ Explode a Brightway2 database into a single-level list of all exchanges using wurst. :param str db_name: Name of the Brightway2 database to be exploded. :returns: None The function saves the output to a file and logs the operation, but does not return any value. :rtype: None """ from config.user_settings import dir_logs, dir_tmp if not os.path.isdir(dir_tmp): os.makedirs(dir_tmp) if not os.path.isdir(dir_logs): os.makedirs(dir_logs) print("\n*** Starting ExplodeDatabase ***") print( "ExplodeDatabase uses wurst to open a bw2 database, explodes the exchanges for each process, and then returns a pickle file with a DataFrame list of all activities" ) # Set the path to save the pickle file pickle_path = dir_tmp / f"{db_name}_exploded.pickle" # Extract information from the specified database db = bd.Database(db_name) print(f"\n** db: {db.name}, in project: {bd.projects.current} will be processed") # Unpack the database using wurst print("\n** Opening the sausage...") guts = w.extract_brightway2_databases(db_name) # Create a DataFrame from the extracted data print("\n*** Extracting activities from db...") df = pd.DataFrame( guts, columns=[ "code", "name", "location", "reference product", "categories", "classifications", "exchanges", ], ) # Expand the exchanges column into a new DataFrame and join it with the original data print("\n*** Exploding exchanges from activities...") df = df.explode("exchanges", ignore_index=True) df_ex = pd.json_normalize(df.exchanges, max_level=0) df_ex = df_ex[ ["name", "amount", "unit", "product", "production volume", "type", "location"] ] df_ex = df_ex.add_prefix("ex_") df = df.join(df_ex) # Finalize the DataFrame by setting the index and removing the now redundant exchanges column df = df.drop("exchanges", axis=1) df.set_index("code", inplace=True) # Save the DataFrame as a pickle file print("\n*** Pickling...") df.to_pickle(pickle_path) print("\n Pickle is:", "%1.0f" % (os.path.getsize(pickle_path) / 1024**2), "MB") # Log the operation with a timestamp, database name, and project name print("\n*** The sausage <" + db.name + "> was exploded and pickled. Rejoice!") log_entry = ( datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "," + db.name + "," + bd.projects.current ) log_file = dir_logs / f'{datetime.now().strftime("%Y-%m-%d")}_ExplodeDatabase.log' with open(log_file, "a") as l: l.write(str(log_entry) + "\n") return None