simulation_handler.preprocess

This module generates ship, truck, and train data for a maritime simulation. It includes functions to initialize random number generators, generate truncated exponential distributions, select terminals based on ship type and size, and create ship and truck data based on predefined probabilities. It also provides functions to fill ship details, generate random sizes, and create ship and truck data dictionaries.

  1"""
  2This module generates ship, truck, and train data for a maritime simulation.
  3It includes functions to initialize random number generators, generate truncated exponential distributions,
  4select terminals based on ship type and size, and create ship and truck data based on predefined probabilities.
  5It also provides functions to fill ship details, generate random sizes, and create ship and truck data dictionaries.
  6"""
  7import random
  8import time
  9from concurrent.futures import ThreadPoolExecutor
 10
 11import pandas as pd
 12import numpy as np
 13import matplotlib.pyplot as plt
 14from scipy.stats import kstest, expon
 15from tqdm import tqdm
 16
 17from constants import *
 18from simulation_handler.helpers import normal_random_with_sd, normal_random_with_limit, get_value_by_terminal, get_values_by_terminal_random_sample, save_warning
 19from simulation_handler.helpers import is_daytime
 20
 21global NUM_TRUNCATION
 22NUM_TRUNCATION = {"Container": 0, "Liquid": 0, "DryBulk": 0}
 23
 24def initialize_rng(seed):
 25    """
 26    Initialize random state objects for repeatable randomness across runs.
 27    This function sets up two random number generators: one using Python's built-in `random` module
 28    and another using NumPy's random number generator. This allows for consistent random behavior
 29    across different runs of the simulation, which is crucial for debugging and testing.
 30    Args:
 31        seed (int): The seed value to initialize the random number generators.
 32    Returns:
 33        None
 34    """
 35    global rng_random, rng_numpy
 36    rng_random = random.Random(seed)
 37    rng_numpy = np.random.default_rng(seed)
 38
 39def truncated_exponential(a, b, scale, ship_type, ship_terminal):
 40    """ 
 41    Generate a truncated exponential random variable.   
 42    This function generates a random variable from an exponential distribution
 43    truncated to the interval [a, b]. If the generated value exceeds b, it is set to b.
 44    Args:
 45        a (float): The lower bound of the truncation interval.
 46        b (float): The upper bound of the truncation interval.
 47        scale (float): The scale parameter of the exponential distribution.
 48        ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
 49        ship_terminal (int): Terminal number where the ship is located.
 50    Returns:
 51        float: A random variable from the truncated exponential distribution.
 52    """
 53    global NUM_TRUNCATION
 54    candidate = rng_numpy.exponential(scale)
 55    if candidate >= b:
 56        if ship_type == 'Container':
 57                NUM_TRUNCATION[ship_type] += 1
 58        elif ship_type == 'Liquid':
 59                NUM_TRUNCATION[ship_type] += 1
 60        elif ship_type == 'DryBulk':
 61                NUM_TRUNCATION[ship_type] += 1
 62        candidate = b
 63
 64    return candidate
 65
 66def truncated_exponential_advanced(a, b, scale, ship_type, ship_terminal):
 67    """ 
 68    Generate a truncated exponential random variable with advanced truncation.
 69    This function generates a random variable from an exponential distribution
 70    truncated to the interval [a, b]. If the generated value exceeds b, it is set to b.
 71    Args:
 72        a (float): The lower bound of the truncation interval.
 73        b (float): The upper bound of the truncation interval.
 74        scale (float): The scale parameter of the exponential distribution.
 75        ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
 76        ship_terminal (int): Terminal number where the ship is located.
 77    Returns:
 78        float: A random variable from the truncated exponential distribution.
 79    """
 80
 81    cdf_a = 1 - np.exp(-a / scale)
 82    cdf_b = 1 - np.exp(-b / scale)
 83    u = rng_numpy.uniform(cdf_a, cdf_b)
 84    samples = -scale * np.log(1 - u) 
 85    return samples
 86
 87def select_terminal(ship_type, ship_beam, num_terminals, seed):
 88    """
 89    Select terminal based on probability based on number of berths in that terminal to the total number of berths in all terminal of that type.
 90    This function selects a terminal for a ship based on its type and beam size.
 91    If the ship is larger than a certain beam size, it selects from a limited set of terminals.
 92    Args:
 93        ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
 94        ship_beam (float): Beam size of the ship.
 95        num_terminals (list): List containing the number of terminals for each type of ship [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
 96        seed (int): Random seed for reproducibility.
 97    Returns:
 98        int: Selected terminal number based on the ship type and beam size.
 99    """
100    random.seed(seed)
101    num_container_terminals, num_liquid_terminals, num_drybulk_terminals = num_terminals
102    if ship_type == "Container":
103        if ship_beam > MAX_BEAM_SMALL_SHIP:
104            total_berths = sum(BERTHS_CTR_TERMINAL)
105            berth_probs = [berth/total_berths for berth in BERTHS_CTR_TERMINAL][:NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_CTR]
106            berth_probs = [x/sum(berth_probs) for x in berth_probs]
107            return random.choices(range(1, NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_CTR+1), berth_probs)[0]
108        else:
109            total_berths = sum(BERTHS_CTR_TERMINAL)
110            berth_probs = [berth/total_berths for berth in BERTHS_CTR_TERMINAL]
111            return random.choices(range(1, num_container_terminals+1), berth_probs)[0]
112    elif ship_type == "Liquid":
113        if ship_beam > MAX_BEAM_SMALL_SHIP:
114            total_berths = sum(BERTHS_LIQ_TERMINAL)
115            berth_probs = [berth/total_berths for berth in BERTHS_LIQ_TERMINAL][:NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_LIQ]
116            berth_probs = [x/sum(berth_probs) for x in berth_probs]
117            return random.choices(range(1, NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_LIQ+1), berth_probs)[0]
118        else:
119            total_berths = sum(BERTHS_LIQ_TERMINAL)
120            berth_probs = [berth/total_berths for berth in BERTHS_LIQ_TERMINAL]
121            return random.choices(range(1, num_liquid_terminals+1), berth_probs)[0]
122    elif ship_type == "DryBulk":
123        if ship_beam > MAX_BEAM_SMALL_SHIP:
124            total_berths = sum(BERTH_DRYBULK_TERMINAL)
125            berth_probs = [berth/total_berths for berth in BERTH_DRYBULK_TERMINAL][:NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_DRYBULK]
126            berth_probs = [x/sum(berth_probs) for x in berth_probs]
127            return random.choices(range(1, NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_DRYBULK+1), berth_probs)[0]
128        else:
129            total_berths = sum(BERTH_DRYBULK_TERMINAL)
130            berth_probs = [berth/total_berths for berth in BERTH_DRYBULK_TERMINAL]
131            return random.choices(range(1, num_drybulk_terminals+1), berth_probs)[0]
132
133def assign_random_size(ship_type, probabilities):
134    """
135    Assign a random size to a ship based on its type.
136    This function uses predefined probabilities to assign a size category ('Small', 'Medium', 'Large') to a ship.
137    Args:
138        ship_type (str): Type of ship ('Container', 'DryBulk', 'Liquid').
139        probabilities (dict): Dictionary containing size probabilities for each ship type.
140    Returns:
141        str: Assigned size category ('Small', 'Medium', 'Large') or NaN if ship type is invalid.
142    """ 
143    if ship_type == 'Container':
144        return rng_numpy.choice(['Small', 'Medium', 'Large'], p=probabilities["container"])
145    elif ship_type == 'DryBulk':
146        return rng_numpy.choice(['Small', 'Medium', 'Large'], p = probabilities["drybulk"])
147    elif ship_type == 'Liquid':
148        return rng_numpy.choice(['Small', 'Medium', 'Large'], p = probabilities["liquid"])
149    else:
150        return np.nan
151
152def generate_ship_data(ship_type, NUM_SHIPS, probabilities):
153    """ 
154    Generate ship data for a specific ship type.    
155    This function generates a dictionary of ship data for a given ship type, including ship ID, direction, ship type, and size.
156    Args:
157        ship_type (str): Type of ship ('Container', 'DryBulk', 'Liquid').
158        NUM_SHIPS (dict): Dictionary containing the number of ships for each ship type.
159    Returns:
160        dict: Dictionary containing ship data for the specified ship type.
161    """
162    return {
163        f"{i}": {
164            "ship_id": i,
165            "direction": "in",
166            "ship_type": (ship_type),
167            "Size" : assign_random_size(ship_type, probabilities),
168        } for i in range(NUM_SHIPS[ship_type])
169    }
170
171def fill_ship_details(row, vessel_sizes, seed):
172    """ Fill ship details such as length, beam, draft, tonnage, pilots, and tugboats based on the ship type and size.
173    This function retrieves the average and standard deviation values for ship dimensions and tonnage from a DataFrame
174    containing vessel size data. It then samples from a normal distribution to generate realistic ship dimensions and tonnage.
175    Args:
176        row (pd.Series): A row from the ship data DataFrame containing ship type and size.
177        vessel_sizes (pd.DataFrame): DataFrame containing vessel size information with average and standard deviation values.
178        seed (int): Random seed for reproducibility.
179    Returns:
180        pd.Series: A Series containing the generated ship details: length, beam, draft, tonnage, pilots, and tugboats.
181    """
182    size_data = vessel_sizes[(vessel_sizes['ship_type'] == row['ship_type']) & (vessel_sizes['Size'] == row['Size'])]
183    
184    if not size_data.empty:
185        # Sample from a normal distribution using average and standard deviation
186        length = normal_random_with_sd(size_data['Avg_Length'].values[0], size_data['Std_Length'].values[0], rng_random.randint(0, 100000), scale_factor = 2)
187        beam = normal_random_with_sd(size_data['Avg_Beam'].values[0], size_data['Std_Beam'].values[0], rng_random.randint(0, 100000), scale_factor = 2)
188        draft = normal_random_with_sd(size_data['Avg_Draft'].values[0], size_data['Std_Draft'].values[0], rng_random.randint(0, 100000), scale_factor = 2)
189        tonnage = normal_random_with_sd(size_data['Avg_Tonnage'].values[0], size_data['Std_Tonnage'].values[0], rng_random.randint(0, 100000), scale_factor = 3)
190        if row['ship_type'] == 'Container':
191            tonnage =  (1 - NON_CARGO_DEAD_WEIGHT_PERCENT_CTR) * tonnage
192        elif row['ship_type'] == 'DryBulk':
193            tonnage =  (1 - NON_CARGO_DEAD_WEIGHT_PERCENT_DK) * tonnage
194        elif row['ship_type'] == 'Liquid':
195            tonnage =  (1 - NON_CARGO_DEAD_WEIGHT_PERCENT_LIQ) * tonnage
196        else:
197            raise ValueError("Invalid ship type")
198        if tonnage < 0:
199            print(f"Negative tonnage: {tonnage}, ship type: {row['ship_type']}, size: {row['Size']}, length: {length}, beam: {beam}, draft: {draft}")
200        if row['ship_type'] == 'Liquid':
201            tonnage = tonnage * rng_random.choice(LIQUID_CONVERSION_FACTORS)
202        elif row['ship_type'] == 'DryBulk':
203            tonnage = tonnage
204        else: 
205            tonnage = int(tonnage * rng_random.choice(CONTAINER_CONVERSION_FACTORS))
206        pilots = int(rng_numpy.uniform(size_data['min_pilots'].values[0], size_data['max_pilots'].values[0]+1))
207        tugboats = int(rng_numpy.uniform(size_data['min_tugs'].values[0], size_data['max_tugs'].values[0]+1))
208        return pd.Series([length, beam, draft, tonnage, pilots, tugboats])
209    else:
210        return pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])
211
212def generate_truck_data(truck_type, NUM_TRUCKS):
213    """
214    Generate truck data for a specific truck type.
215    This function generates a dictionary of truck data for a given truck type, including truck ID, direction, and truck type.
216    Args:
217        truck_type (str): Type of truck ('Container', 'DryBulk', 'Liquid').
218        NUM_TRUCKS (dict): Dictionary containing the number of trucks for each truck type.
219    Returns:
220        dict: Dictionary containing truck data for the specified truck type.
221    """
222    if truck_type == 'Container':
223        return {
224            f"{i}": {
225                "truck_id": i,
226                "direction": "in",
227                "truck_type": (truck_type),
228            } for i in range(NUM_TRUCKS[truck_type])
229        }
230    elif truck_type == 'DryBulk':
231        return {
232            f"{i}": {
233                "truck_id": i,
234                "direction": "in",
235                "truck_type": (truck_type),
236            } for i in range(NUM_TRUCKS[truck_type])
237        }   
238    elif truck_type == 'Liquid':
239        return {
240            f"{i}": {
241                "truck_id": i,
242                "direction": "in",
243                "truck_type": (truck_type),
244            } for i in range(NUM_TRUCKS[truck_type])
245        }
246
247def generate_ships(run_id, NUM_TERMINALS_LIST, seed):
248    """
249    Generate ship data for the simulation.
250    This function creates a DataFrame containing ship data for different types of ships (Container, Liquid, DryBulk).
251    It initializes the random number generator, generates ship data based on predefined probabilities,
252    and assigns ship details such as length, beam, draft, tonnage, pilots, tugboats, and terminal.
253    Args:
254        run_id (str): Unique identifier for the simulation run.
255        NUM_TERMINALS_LIST (list): List containing the number of terminals for each ship type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
256        seed (int): Random seed for reproducibility.
257    Returns:
258        None
259    """
260    vessel_size_inputs = pd.read_csv('inputs/ship_sizes.csv')  
261
262    container_class_probs = vessel_size_inputs[vessel_size_inputs['ship_type'] == 'Container']['Fraction'].values
263    liquid_class_probs = vessel_size_inputs[vessel_size_inputs['ship_type'] == 'Liquid']['Fraction'].values
264    drybulk_class_probs = vessel_size_inputs[vessel_size_inputs['ship_type'] == 'DryBulk']['Fraction'].values
265    probabilities = {"container": container_class_probs, "liquid": liquid_class_probs, "drybulk": drybulk_class_probs}
266    
267    initialize_rng(seed)
268    num_container_terminals, num_liquid_terminals, num_drybulk_terminals = NUM_TERMINALS_LIST
269
270    # Number of ships for each cargo type
271    NUM_SHIPS = {
272        "Container": 3 * int(SIMULATION_TIME // ((mean_interarrival_time_container)*1)), 
273        "Liquid": 3 * int(SIMULATION_TIME // ((mean_interarrival_time_tanker)*1)),  
274        "DryBulk": 3 * int(SIMULATION_TIME // ((mean_interarrival_time_gencargo)*1)),
275    }
276
277    SCALE_VALS_TERMINALS = {
278        "Container": mean_interarrival_time_container,
279        "Liquid": mean_interarrival_time_tanker,
280        "DryBulk": mean_interarrival_time_gencargo
281    }
282
283    CONTAINER_SHIP_DATA, CARGO_SHIP_DATA, TANKER_SHIP_DATA = generate_ship_data('Container', NUM_SHIPS, probabilities), generate_ship_data('DryBulk', NUM_SHIPS, probabilities), generate_ship_data('Liquid', NUM_SHIPS, probabilities)
284
285    df_tanker = pd.DataFrame(TANKER_SHIP_DATA).T
286    df_cargo = pd.DataFrame(CARGO_SHIP_DATA).T
287    df_container = pd.DataFrame(CONTAINER_SHIP_DATA).T
288
289    ship_data_components = []
290
291    for ship_type_name in ['Liquid', 'DryBulk', 'Container']:
292
293        if ship_type_name == 'Liquid':
294            df_ship = df_tanker
295            num_terminals = num_liquid_terminals
296        elif ship_type_name == 'DryBulk':
297            df_ship = df_cargo
298            num_terminals = num_drybulk_terminals
299        elif ship_type_name == 'Container':
300            df_ship = df_container
301            num_terminals = num_container_terminals
302
303        df_ship[['length', 'width', 'draft', 'num_container_or_liq_tons_or_dry_tons_to_load', 'pilots', 'tugboats']] = df_ship.apply(
304                fill_ship_details, axis=1, vessel_sizes=vessel_size_inputs, seed=seed
305            )
306        df_ship[['length', 'width', 'draft', 'num_container_or_liq_tons_or_dry_tons_to_unload', 'pilots', 'tugboats']] = df_ship.apply(
307            fill_ship_details, axis=1, vessel_sizes=vessel_size_inputs, seed=seed
308        )
309        df_ship['terminal'] = df_ship.apply(lambda row: select_terminal(row['ship_type'], row['width'], NUM_TERMINALS_LIST, seed = rng_random.randint(1, 100000000)), axis=1)
310        ship_data_components.append(df_ship)
311    
312
313    ship_data = pd.concat(ship_data_components, ignore_index=True)
314    ship_data_ctr = ship_data[ship_data['ship_type'] == 'Container'].copy()
315    ship_data_liq = ship_data[ship_data['ship_type'] == 'Liquid'].copy()
316    ship_data_drybulk = ship_data[ship_data['ship_type'] == 'DryBulk'].copy()
317    ship_data_ctr["interarrival"] = ship_data_ctr.apply(lambda row: (rng_numpy.exponential(scale= SCALE_VALS_TERMINALS['Container'])), axis=1)
318    ship_data_ctr['interarrival'] = pd.to_numeric(ship_data_ctr['interarrival'], errors='coerce')
319    ship_data_liq["interarrival"] = ship_data_liq.apply(lambda row: (truncated_exponential_advanced(a = min_interarrival_liquid, b = max_interaarival_liq, scale= SCALE_VALS_TERMINALS['Liquid'], ship_type = 'Liquid', ship_terminal = row['terminal'])), axis=1)
320    ship_data_liq['interarrival'] = pd.to_numeric(ship_data_liq['interarrival'], errors='coerce')
321    ship_data_drybulk["interarrival"] = ship_data_drybulk.apply(lambda row: (rng_numpy.exponential(scale= SCALE_VALS_TERMINALS['DryBulk'])), axis=1)
322    ship_data_drybulk['interarrival'] = pd.to_numeric(ship_data_drybulk['interarrival'], errors='coerce')
323    ship_data_ctr['arrival'] = ship_data_ctr['interarrival'].cumsum()
324    ship_data_liq['arrival'] = ship_data_liq['interarrival'].cumsum()
325    ship_data_drybulk['arrival'] = ship_data_drybulk['interarrival'].cumsum()
326
327    ship_data_ctr = ship_data_ctr.drop(columns=['interarrival'])
328    ship_data_liq = ship_data_liq.drop(columns=['interarrival'])
329    ship_data_drybulk = ship_data_drybulk.drop(columns=['interarrival'])
330
331    ship_data = pd.concat([ship_data_ctr, ship_data_liq, ship_data_drybulk], ignore_index=True)
332    ship_data['arrival'] = pd.to_numeric(ship_data['arrival'], errors='coerce')
333    ship_data.sort_values('arrival', inplace=True)
334
335    ship_data = ship_data[ship_data['arrival'] <= SIMULATION_TIME]
336    ship_data['ship_id'] = range(1, len(ship_data) + 1)
337    ship_data.reset_index(drop=True, inplace=True)
338    ship_data['last_section'] = ship_data.apply(lambda row: LAST_SECTION_DICT[row['ship_type']][row['terminal']], axis=1)
339
340    output_path = f'.{run_id}/logs/ship_data.csv'
341    ship_data.to_csv(output_path, index=False)
342
343    # for container terminals create a box plot of number of containers to load + unload for each terminal in smae plot
344    container_ships = ship_data[ship_data['ship_type'] == 'Container']    
345    container_ships = container_ships.copy()
346    container_ships.loc[:, 'moves'] = (
347        container_ships['num_container_or_liq_tons_or_dry_tons_to_load'] +
348        container_ships['num_container_or_liq_tons_or_dry_tons_to_unload']
349    )
350    container_ships = container_ships[['terminal', 'moves']]
351    plt.figure()
352    container_ships.boxplot(by='terminal', column='moves')
353    plt.ylim(bottom=0)
354    plt.ylabel("Number of container moves")
355    plt.xlabel("Container terminal")
356    plt.savefig(f".{run_id}/plots/moves.pdf")
357    plt.close()
358
359    print("Total number of ships: Container {}, Liquid {}, DryBulk {}".format(len(ship_data_ctr), len(ship_data_liq), len(ship_data_drybulk)))
360
361    # compute mean interarrival times
362    liquid_data = ship_data[ship_data['ship_type'] == 'Liquid']['arrival'].diff().dropna()
363    container_data = ship_data[ship_data['ship_type'] == 'Container']['arrival'].diff().dropna()
364    drybulk_data = ship_data[ship_data['ship_type'] == 'DryBulk']['arrival'].diff().dropna()
365
366    # Filter data by ship types
367    print("\n\n RUN ID:", run_id)
368    save_warning(run_id, f"Container terminal, Generated mean interarrival time: {round(container_data.mean(), 1)} and expected mean interarrival time: {round(mean_interarrival_time_container, 2)}")
369    save_warning(run_id, f"Liquid terminal, Generated mean interarrival time: {round(liquid_data.mean(), 1)} and expected mean interarrival time: {round(mean_interarrival_time_tanker, 2)}")
370    save_warning(run_id, f"DryBulk terminal, Generated mean interarrival time: {round(drybulk_data.mean(), 1)} and expected mean interarrival time: {round(mean_interarrival_time_gencargo, 2)}\n")
371
372    save_warning(run_id, f"Minimum interarrival times: Container {min(container_data):.2e}, Liquid {min(liquid_data):.2e}, DryBulk {min(drybulk_data):.2e}\n")
373
374    # Create a stacked bar plot of the number of ships in each terminal, split by size
375    for ship_type in ['Container', 'Liquid', 'DryBulk']:
376        ship_type_data = ship_data[ship_data['ship_type'] == ship_type]
377        size_counts = ship_type_data.groupby(['terminal', 'Size']).size().unstack(fill_value=0)
378        size_counts.plot(kind='bar', stacked=True, edgecolor="black")
379        plt.xlabel("Terminal")
380        plt.ylabel("Number of Ships")
381        plt.title(f"Stacked Bar Plot of {ship_type} Ship Distribution by Size")
382        plt.xticks(rotation=0)  # Keeps terminal labels horizontal
383        plt.legend(title="Size")
384        plt.tight_layout()
385        plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_ship_distribution_stacked_bar.pdf")
386        plt.close()
387    
388    #  Plot a distribution of number of berths in each terminal. Number of berths is like sum(BERTH_DRYBULK_TERMINAL) and BERTH_DRYBULK_TERMINAL[i] for terminal i+1
389    for ship_type in ['Container', 'Liquid', 'DryBulk']:
390        if ship_type == 'Container':
391            plt.bar(range(1, num_container_terminals+1), BERTHS_CTR_TERMINAL)
392            plt.xlabel("Terminal")
393            plt.ylabel("Number of berths")
394            plt.title(f"Number of berths in each {ship_type} terminal")
395            plt.xticks(range(1, num_container_terminals+1))
396            plt.tight_layout()
397            plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_terminal_berth_distribution.pdf")
398            plt.close()
399        elif ship_type == 'Liquid':
400            plt.bar(range(1, num_liquid_terminals+1), BERTHS_LIQ_TERMINAL)
401            plt.xlabel("Terminal")
402            plt.ylabel("Number of berths")
403            plt.title(f"Number of berths in each {ship_type} terminal")
404            plt.xticks(range(1, num_liquid_terminals+1))
405            plt.tight_layout()
406            plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_terminal_berth_distribution.pdf")
407            plt.close()
408        elif ship_type == 'DryBulk':
409            plt.bar(range(1, num_drybulk_terminals+1), BERTH_DRYBULK_TERMINAL)
410            plt.xlabel("Terminal")
411            plt.ylabel("Number of berths")
412            plt.title(f"Number of berths in each {ship_type} terminal")
413            plt.xticks(range(1, num_drybulk_terminals+1))
414            plt.tight_layout()
415            plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_terminal_berth_distribution.pdf")
416            plt.close()
417
418
419def generate_trucks(run_id, num_terminals, terminal_data_df, terminal_tuple_cache, seed):
420    """
421    Generate truck data for the simulation.
422    This function creates a DataFrame containing truck data for different types of trucks (Container, Liquid, DryBulk).
423    It initializes the random number generator, calculates mean interarrival times for trucks at each terminal,
424    generates truck data based on the mean interarrival times, and assigns terminal information to each truck.
425    Args:
426        run_id (str): Unique identifier for the simulation run.
427        num_terminals (list): List containing the number of terminals for each truck type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
428        terminal_data_df (pd.DataFrame): DataFrame containing terminal data.
429        terminal_tuple_cache (dict): Dictionary containing terminal tuple cache.
430        seed (int): Random seed for reproducibility.
431    Returns:
432        None
433    """
434    start_time = time.time()
435    terminal_data_df = terminal_data_df
436    num_container_terminals, num_liquid_terminals, num_drybulk_terminals = num_terminals
437
438    mean_ctr_terminal_arrival_rate = list(terminal_data_df[terminal_data_df['Cargo'] == 'Container']['truck arrival rate'].values)
439    mean_liq_terminal_arrival_rate = list(terminal_data_df[terminal_data_df['Cargo'] == 'Liquid']['truck arrival rate'].values)
440    mean_drybulk_terminal_arrival_rate = list(terminal_data_df[terminal_data_df['Cargo'] == 'DryBulk']['truck arrival rate'].values)
441
442    mean_interarrival_time_container_trucks = 1 / sum(list(mean_ctr_terminal_arrival_rate))
443    mean_interarrival_time_tanker_trucks = 1 / sum(list(mean_liq_terminal_arrival_rate))
444    mean_interarrival_time_gencargo_trucks = 1 / sum(list(mean_drybulk_terminal_arrival_rate))
445    
446    NUM_CTR_TRUCKS = int(SIMULATION_TIME // ((mean_interarrival_time_container_trucks)*1))
447    NUM_LIQ_TRUCKS = int(SIMULATION_TIME // ((mean_interarrival_time_tanker_trucks)*1))
448    NUM_DRYBULK_TRUCKS = int(SIMULATION_TIME // ((mean_interarrival_time_gencargo_trucks)*1))
449    
450    NUM_TRUCKS = {
451        "Container": NUM_CTR_TRUCKS,
452        "Liquid": NUM_LIQ_TRUCKS,
453        "DryBulk": NUM_DRYBULK_TRUCKS
454    }
455
456    CONTAINER_TRUCK_DATA = generate_truck_data('Container', NUM_TRUCKS)
457    CARGO_TRUCK_DATA = generate_truck_data('DryBulk', NUM_TRUCKS)
458    TANKER_TRUCK_DATA = generate_truck_data('Liquid', NUM_TRUCKS)
459
460    df_tanker, df_cargo, df_container = [
461    pd.DataFrame.from_dict(data, orient="index")
462    for data in [TANKER_TRUCK_DATA, CARGO_TRUCK_DATA, CONTAINER_TRUCK_DATA]
463    ]
464
465    truck_data_components = []
466    for truck_type_name in ['Liquid', 'DryBulk', 'Container']:
467        if truck_type_name == 'Liquid':
468            df_ship = df_tanker
469            num_terminals = num_liquid_terminals
470            rates = mean_liq_terminal_arrival_rate
471        elif truck_type_name == 'DryBulk':
472            df_ship = df_cargo
473            num_terminals = num_drybulk_terminals
474            rates = mean_drybulk_terminal_arrival_rate
475        elif truck_type_name == 'Container':
476            df_ship = df_container
477            num_terminals = num_container_terminals
478            rates = mean_ctr_terminal_arrival_rate
479
480        df_truck_terminals = {}
481        for terminal in range(1, num_terminals+1):
482            df_truck_terminals[terminal] = df_ship.copy()
483            df_truck_terminals[terminal]['terminal'] = terminal
484            df_truck_terminals[terminal]['terminal_id'] = terminal-1
485            df_truck_terminals[terminal]['interarrival'] = round(1 / rates[terminal-1], 8) #is a list
486            df_truck_terminals[terminal]['interarrival'] = df_truck_terminals[terminal]['interarrival'].astype(float)
487            df_truck_terminals[terminal]['arrival'] = df_truck_terminals[terminal]['interarrival'].cumsum()
488            df_truck_terminals[terminal] = df_truck_terminals[terminal][(df_truck_terminals[terminal]['arrival'] <= SIMULATION_TIME)]
489        df_truck_combined = pd.concat(df_truck_terminals.values(), ignore_index=True)
490        df_truck_combined = df_truck_combined.sort_values('arrival')
491        truck_data_components.append(df_truck_combined)
492
493
494    truck_data = pd.concat(truck_data_components, ignore_index=True)
495    truck_data['arrival'] = pd.to_numeric(truck_data['arrival'], errors='coerce')
496    truck_data.sort_values('arrival', inplace=True)
497    truck_data['truck_id'] = range(1, len(truck_data) + 1)
498    truck_data.reset_index(drop=True, inplace=True)
499
500    # look at cargo type and terminal and drop rows where TERMINALS_WITH_NO_TRUCKS dict 
501    for cargo_type, list_of_terminals in TERMINALS_WITH_NO_TRUCKS.items():
502        for terminal in list_of_terminals:
503            truck_data = truck_data[~((truck_data['truck_type'] == cargo_type) & (truck_data['terminal'] == terminal))]
504
505    # remove trucks at night using is_daytime
506    truck_data = truck_data[truck_data['arrival'].apply(lambda x: is_daytime(x))]
507    output_path = f'.{run_id}/logs/truck_data.csv'
508    truck_data.to_pickle(f'.{run_id}/logs/truck_data.pkl')
509
510    for terminal_type in ["Container", "Liquid", "DryBulk"]:
511        terminal_data = truck_data[truck_data['truck_type'] == terminal_type]
512        terminal_counts = terminal_data['terminal'].value_counts().sort_index()
513        plt.bar(terminal_counts.index, terminal_counts.values)
514        plt.xlabel("Terminal")
515        plt.ylabel("Number of Trucks")
516        plt.title(f"Number of Trucks in Each {terminal_type} Terminal")
517        plt.xticks(terminal_counts.index)
518        plt.tight_layout()
519        plt.savefig(f".{run_id}/plots/truckDistribution/{terminal_type}_truck_distribution.pdf")
520        plt.close()
521        
522    print("Total number of trucks", len(truck_data))
523
524def generate_trains(run_id, num_terminals, terminal_data, terminal_data_df, terminal_tuple_cache, seed):
525    """
526    Generate train data for the simulation.
527    This function creates a DataFrame containing train data for different types of trains (Container, Liquid, DryBulk).
528    It initializes the random number generator, calculates mean interarrival times for trains at each terminal,
529    generates train data based on the mean interarrival times, and assigns terminal information to each train.
530    Args:
531        run_id (str): Unique identifier for the simulation run.
532        num_terminals (list): List containing the number of terminals for each train type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
533        terminal_data (pd.DataFrame): DataFrame containing terminal data.
534        terminal_data_df (pd.DataFrame): DataFrame containing terminal data.
535        terminal_tuple_cache (dict): Dictionary containing terminal tuple cache.
536        seed (int): Random seed for reproducibility.
537    Returns:
538        None
539    """
540    initialize_rng(seed)
541
542    train_data = pd.DataFrame(columns=[
543        'train_id', 'terminal_id', 'cargo type', 'terminal', 
544        'arrival at', 'car amount', 'cargo transfer rate', 
545        'total transfer cargo', 'import', 'export'
546    ])
547
548    i = 0
549    for terminal_type in ["Container", "Liquid", "DryBulk"]:
550        for terminal_id in range(num_terminals[i]):
551            terminal = terminal_id + 1
552            interarrival_time = 1 / get_value_by_terminal(terminal_data, terminal_type, terminal, "train arrival rate")
553            transfer_rate = get_value_by_terminal(terminal_data, terminal_type, terminal, "train cargo transfer rate")
554            per_car_cargo = get_value_by_terminal(terminal_data, terminal_type, terminal, "train car payload size") 
555            import_terminal = get_value_by_terminal(terminal_data, terminal_type, terminal, "import")
556            export_terminal = get_value_by_terminal(terminal_data, terminal_type, terminal, "export")
557            
558            num_trains = int(SIMULATION_TIME / interarrival_time)
559
560            if import_terminal and not export_terminal:
561                import_bool = [True] * num_trains
562                export_bool = [False] * num_trains
563            elif not import_terminal and export_terminal:
564                import_bool = [False] * num_trains
565                export_bool = [True] * num_trains
566            else:
567                import_bool = []
568                export_bool = []
569                for _ in range(num_trains):
570                    test = random.choice([True, False])
571                    import_bool.append(test)
572                    export_bool.append(not test)
573
574            # Add subsequent trains
575            car_amounts = get_values_by_terminal_random_sample(terminal_data_df, terminal_type, terminal, "train car amount", num_trains, seed)
576            total_transfer_cargos = [per_car_cargo * car_amount for car_amount in car_amounts]
577            arrivals = [j * interarrival_time for j in range(num_trains)]
578
579            # Create the DataFrame directly
580            subsequent_trains = pd.DataFrame({
581                'train_id': [None] * num_trains,
582                'terminal_id': [terminal_id] * num_trains,
583                'terminal': [terminal] * num_trains,
584                'cargo type': [terminal_type] * num_trains,
585                'arrival at': arrivals,
586                'car amount': car_amounts,
587                'cargo transfer rate': [transfer_rate] * num_trains,
588                'total transfer cargo': total_transfer_cargos,
589                'import': import_bool,
590                'export': export_bool
591            })
592
593            subsequent_trains = subsequent_trains.dropna(axis=1, how='all')
594            subsequent_trains = subsequent_trains.dropna(axis=0, how='all')
595            if not subsequent_trains.empty:
596                train_data = train_data.dropna(axis=1, how='all')
597                train_data = pd.concat([train_data, subsequent_trains], ignore_index=True)
598
599        i += 1
600
601    # Convert data types
602    train_data['arrival at'] = pd.to_numeric(train_data['arrival at'], errors='coerce')
603    train_data['terminal_id'] = train_data['terminal_id'].astype(int)
604    train_data['terminal'] = train_data['terminal'].astype(int)
605    train_data['car amount'] = train_data['car amount'].astype(int)
606    train_data['cargo transfer rate'] = train_data['cargo transfer rate'].astype(float)
607    train_data['total transfer cargo'] = train_data['total transfer cargo'].astype(float)
608    train_data['import'] = train_data['import'].astype(bool)
609    train_data['export'] = train_data['export'].astype(bool)
610    train_data = train_data.sort_values('arrival at')
611    train_data = train_data[train_data['arrival at'] != 0]
612
613    # filter using TERMINALS_WITH_NO_TRAINS
614    for cargo_type, list_of_terminals in TERMINALS_WITH_NO_TRAINS.items():
615        for terminal in list_of_terminals:
616            train_data = train_data[~((train_data['cargo type'] == cargo_type) & (train_data['terminal'] == terminal))]
617
618    train_data.reset_index(drop=True, inplace=True)
619    train_data['train_id'] = train_data.index + 1
620    print("Number of trains", len(train_data))
621    output_path = f'.{run_id}/logs/train_data.csv'
622    train_data.to_csv(output_path, index=False)
623
624def get_piplines_import(num_terminals_list, terminal_data):
625    """
626    Get the list of liquid terminals that have pipelines as source or sink.
627    This function checks each liquid terminal to see if it has a pipeline source or sink.
628    Args:
629        num_terminals_list (tuple): A tuple containing the number of terminals for each type of ship (num_container_terminals, num_liquid_terminals, num_drybulk_terminals).
630        terminal_data (pd.DataFrame): DataFrame containing terminal data.
631    Returns:
632        tuple: Two lists containing the liquid terminals with pipeline sources and sinks.
633    """
634
635    _, num_liquid_terminals, _ = num_terminals_list
636    liq_terminals_with_pipeline_source = []
637    liq_terminals_with_pipeline_sink = []
638
639    for liquid_terminal in range(num_liquid_terminals):
640        pipeline_source = get_value_by_terminal(terminal_data, "Liquid", liquid_terminal+1, "pipeline source")
641        pipeline_sink = get_value_by_terminal(terminal_data, "Liquid", liquid_terminal+1, "pipeline sink")
642        if pipeline_source:
643            liq_terminals_with_pipeline_source.append(liquid_terminal+1)
644        if pipeline_sink:
645            liq_terminals_with_pipeline_sink.append(liquid_terminal+1)
646    
647    # print("Liquid terminals with pipeline source:", liq_terminals_with_pipeline_source)
648    # print("Liquid terminals with pipeline sink:", liq_terminals_with_pipeline_sink)
649
650    return liq_terminals_with_pipeline_source, liq_terminals_with_pipeline_sink
651        
def initialize_rng(seed):
25def initialize_rng(seed):
26    """
27    Initialize random state objects for repeatable randomness across runs.
28    This function sets up two random number generators: one using Python's built-in `random` module
29    and another using NumPy's random number generator. This allows for consistent random behavior
30    across different runs of the simulation, which is crucial for debugging and testing.
31    Args:
32        seed (int): The seed value to initialize the random number generators.
33    Returns:
34        None
35    """
36    global rng_random, rng_numpy
37    rng_random = random.Random(seed)
38    rng_numpy = np.random.default_rng(seed)

Initialize random state objects for repeatable randomness across runs. This function sets up two random number generators: one using Python's built-in random module and another using NumPy's random number generator. This allows for consistent random behavior across different runs of the simulation, which is crucial for debugging and testing.

Arguments:
  • seed (int): The seed value to initialize the random number generators.
Returns:

None

def truncated_exponential(a, b, scale, ship_type, ship_terminal):
40def truncated_exponential(a, b, scale, ship_type, ship_terminal):
41    """ 
42    Generate a truncated exponential random variable.   
43    This function generates a random variable from an exponential distribution
44    truncated to the interval [a, b]. If the generated value exceeds b, it is set to b.
45    Args:
46        a (float): The lower bound of the truncation interval.
47        b (float): The upper bound of the truncation interval.
48        scale (float): The scale parameter of the exponential distribution.
49        ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
50        ship_terminal (int): Terminal number where the ship is located.
51    Returns:
52        float: A random variable from the truncated exponential distribution.
53    """
54    global NUM_TRUNCATION
55    candidate = rng_numpy.exponential(scale)
56    if candidate >= b:
57        if ship_type == 'Container':
58                NUM_TRUNCATION[ship_type] += 1
59        elif ship_type == 'Liquid':
60                NUM_TRUNCATION[ship_type] += 1
61        elif ship_type == 'DryBulk':
62                NUM_TRUNCATION[ship_type] += 1
63        candidate = b
64
65    return candidate

Generate a truncated exponential random variable.
This function generates a random variable from an exponential distribution truncated to the interval [a, b]. If the generated value exceeds b, it is set to b.

Arguments:
  • a (float): The lower bound of the truncation interval.
  • b (float): The upper bound of the truncation interval.
  • scale (float): The scale parameter of the exponential distribution.
  • ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
  • ship_terminal (int): Terminal number where the ship is located.
Returns:

float: A random variable from the truncated exponential distribution.

def truncated_exponential_advanced(a, b, scale, ship_type, ship_terminal):
67def truncated_exponential_advanced(a, b, scale, ship_type, ship_terminal):
68    """ 
69    Generate a truncated exponential random variable with advanced truncation.
70    This function generates a random variable from an exponential distribution
71    truncated to the interval [a, b]. If the generated value exceeds b, it is set to b.
72    Args:
73        a (float): The lower bound of the truncation interval.
74        b (float): The upper bound of the truncation interval.
75        scale (float): The scale parameter of the exponential distribution.
76        ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
77        ship_terminal (int): Terminal number where the ship is located.
78    Returns:
79        float: A random variable from the truncated exponential distribution.
80    """
81
82    cdf_a = 1 - np.exp(-a / scale)
83    cdf_b = 1 - np.exp(-b / scale)
84    u = rng_numpy.uniform(cdf_a, cdf_b)
85    samples = -scale * np.log(1 - u) 
86    return samples

Generate a truncated exponential random variable with advanced truncation. This function generates a random variable from an exponential distribution truncated to the interval [a, b]. If the generated value exceeds b, it is set to b.

Arguments:
  • a (float): The lower bound of the truncation interval.
  • b (float): The upper bound of the truncation interval.
  • scale (float): The scale parameter of the exponential distribution.
  • ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
  • ship_terminal (int): Terminal number where the ship is located.
Returns:

float: A random variable from the truncated exponential distribution.

def select_terminal(ship_type, ship_beam, num_terminals, seed):
 88def select_terminal(ship_type, ship_beam, num_terminals, seed):
 89    """
 90    Select terminal based on probability based on number of berths in that terminal to the total number of berths in all terminal of that type.
 91    This function selects a terminal for a ship based on its type and beam size.
 92    If the ship is larger than a certain beam size, it selects from a limited set of terminals.
 93    Args:
 94        ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
 95        ship_beam (float): Beam size of the ship.
 96        num_terminals (list): List containing the number of terminals for each type of ship [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
 97        seed (int): Random seed for reproducibility.
 98    Returns:
 99        int: Selected terminal number based on the ship type and beam size.
100    """
101    random.seed(seed)
102    num_container_terminals, num_liquid_terminals, num_drybulk_terminals = num_terminals
103    if ship_type == "Container":
104        if ship_beam > MAX_BEAM_SMALL_SHIP:
105            total_berths = sum(BERTHS_CTR_TERMINAL)
106            berth_probs = [berth/total_berths for berth in BERTHS_CTR_TERMINAL][:NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_CTR]
107            berth_probs = [x/sum(berth_probs) for x in berth_probs]
108            return random.choices(range(1, NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_CTR+1), berth_probs)[0]
109        else:
110            total_berths = sum(BERTHS_CTR_TERMINAL)
111            berth_probs = [berth/total_berths for berth in BERTHS_CTR_TERMINAL]
112            return random.choices(range(1, num_container_terminals+1), berth_probs)[0]
113    elif ship_type == "Liquid":
114        if ship_beam > MAX_BEAM_SMALL_SHIP:
115            total_berths = sum(BERTHS_LIQ_TERMINAL)
116            berth_probs = [berth/total_berths for berth in BERTHS_LIQ_TERMINAL][:NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_LIQ]
117            berth_probs = [x/sum(berth_probs) for x in berth_probs]
118            return random.choices(range(1, NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_LIQ+1), berth_probs)[0]
119        else:
120            total_berths = sum(BERTHS_LIQ_TERMINAL)
121            berth_probs = [berth/total_berths for berth in BERTHS_LIQ_TERMINAL]
122            return random.choices(range(1, num_liquid_terminals+1), berth_probs)[0]
123    elif ship_type == "DryBulk":
124        if ship_beam > MAX_BEAM_SMALL_SHIP:
125            total_berths = sum(BERTH_DRYBULK_TERMINAL)
126            berth_probs = [berth/total_berths for berth in BERTH_DRYBULK_TERMINAL][:NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_DRYBULK]
127            berth_probs = [x/sum(berth_probs) for x in berth_probs]
128            return random.choices(range(1, NO_LARGE_SHIP_BEYOND_THIS_TERMINAL_DRYBULK+1), berth_probs)[0]
129        else:
130            total_berths = sum(BERTH_DRYBULK_TERMINAL)
131            berth_probs = [berth/total_berths for berth in BERTH_DRYBULK_TERMINAL]
132            return random.choices(range(1, num_drybulk_terminals+1), berth_probs)[0]

Select terminal based on probability based on number of berths in that terminal to the total number of berths in all terminal of that type. This function selects a terminal for a ship based on its type and beam size. If the ship is larger than a certain beam size, it selects from a limited set of terminals.

Arguments:
  • ship_type (str): Type of ship ('Container', 'Liquid', 'DryBulk').
  • ship_beam (float): Beam size of the ship.
  • num_terminals (list): List containing the number of terminals for each type of ship [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
  • seed (int): Random seed for reproducibility.
Returns:

int: Selected terminal number based on the ship type and beam size.

def assign_random_size(ship_type, probabilities):
134def assign_random_size(ship_type, probabilities):
135    """
136    Assign a random size to a ship based on its type.
137    This function uses predefined probabilities to assign a size category ('Small', 'Medium', 'Large') to a ship.
138    Args:
139        ship_type (str): Type of ship ('Container', 'DryBulk', 'Liquid').
140        probabilities (dict): Dictionary containing size probabilities for each ship type.
141    Returns:
142        str: Assigned size category ('Small', 'Medium', 'Large') or NaN if ship type is invalid.
143    """ 
144    if ship_type == 'Container':
145        return rng_numpy.choice(['Small', 'Medium', 'Large'], p=probabilities["container"])
146    elif ship_type == 'DryBulk':
147        return rng_numpy.choice(['Small', 'Medium', 'Large'], p = probabilities["drybulk"])
148    elif ship_type == 'Liquid':
149        return rng_numpy.choice(['Small', 'Medium', 'Large'], p = probabilities["liquid"])
150    else:
151        return np.nan

Assign a random size to a ship based on its type. This function uses predefined probabilities to assign a size category ('Small', 'Medium', 'Large') to a ship.

Arguments:
  • ship_type (str): Type of ship ('Container', 'DryBulk', 'Liquid').
  • probabilities (dict): Dictionary containing size probabilities for each ship type.
Returns:

str: Assigned size category ('Small', 'Medium', 'Large') or NaN if ship type is invalid.

def generate_ship_data(ship_type, NUM_SHIPS, probabilities):
153def generate_ship_data(ship_type, NUM_SHIPS, probabilities):
154    """ 
155    Generate ship data for a specific ship type.    
156    This function generates a dictionary of ship data for a given ship type, including ship ID, direction, ship type, and size.
157    Args:
158        ship_type (str): Type of ship ('Container', 'DryBulk', 'Liquid').
159        NUM_SHIPS (dict): Dictionary containing the number of ships for each ship type.
160    Returns:
161        dict: Dictionary containing ship data for the specified ship type.
162    """
163    return {
164        f"{i}": {
165            "ship_id": i,
166            "direction": "in",
167            "ship_type": (ship_type),
168            "Size" : assign_random_size(ship_type, probabilities),
169        } for i in range(NUM_SHIPS[ship_type])
170    }

Generate ship data for a specific ship type.
This function generates a dictionary of ship data for a given ship type, including ship ID, direction, ship type, and size.

Arguments:
  • ship_type (str): Type of ship ('Container', 'DryBulk', 'Liquid').
  • NUM_SHIPS (dict): Dictionary containing the number of ships for each ship type.
Returns:

dict: Dictionary containing ship data for the specified ship type.

def fill_ship_details(row, vessel_sizes, seed):
172def fill_ship_details(row, vessel_sizes, seed):
173    """ Fill ship details such as length, beam, draft, tonnage, pilots, and tugboats based on the ship type and size.
174    This function retrieves the average and standard deviation values for ship dimensions and tonnage from a DataFrame
175    containing vessel size data. It then samples from a normal distribution to generate realistic ship dimensions and tonnage.
176    Args:
177        row (pd.Series): A row from the ship data DataFrame containing ship type and size.
178        vessel_sizes (pd.DataFrame): DataFrame containing vessel size information with average and standard deviation values.
179        seed (int): Random seed for reproducibility.
180    Returns:
181        pd.Series: A Series containing the generated ship details: length, beam, draft, tonnage, pilots, and tugboats.
182    """
183    size_data = vessel_sizes[(vessel_sizes['ship_type'] == row['ship_type']) & (vessel_sizes['Size'] == row['Size'])]
184    
185    if not size_data.empty:
186        # Sample from a normal distribution using average and standard deviation
187        length = normal_random_with_sd(size_data['Avg_Length'].values[0], size_data['Std_Length'].values[0], rng_random.randint(0, 100000), scale_factor = 2)
188        beam = normal_random_with_sd(size_data['Avg_Beam'].values[0], size_data['Std_Beam'].values[0], rng_random.randint(0, 100000), scale_factor = 2)
189        draft = normal_random_with_sd(size_data['Avg_Draft'].values[0], size_data['Std_Draft'].values[0], rng_random.randint(0, 100000), scale_factor = 2)
190        tonnage = normal_random_with_sd(size_data['Avg_Tonnage'].values[0], size_data['Std_Tonnage'].values[0], rng_random.randint(0, 100000), scale_factor = 3)
191        if row['ship_type'] == 'Container':
192            tonnage =  (1 - NON_CARGO_DEAD_WEIGHT_PERCENT_CTR) * tonnage
193        elif row['ship_type'] == 'DryBulk':
194            tonnage =  (1 - NON_CARGO_DEAD_WEIGHT_PERCENT_DK) * tonnage
195        elif row['ship_type'] == 'Liquid':
196            tonnage =  (1 - NON_CARGO_DEAD_WEIGHT_PERCENT_LIQ) * tonnage
197        else:
198            raise ValueError("Invalid ship type")
199        if tonnage < 0:
200            print(f"Negative tonnage: {tonnage}, ship type: {row['ship_type']}, size: {row['Size']}, length: {length}, beam: {beam}, draft: {draft}")
201        if row['ship_type'] == 'Liquid':
202            tonnage = tonnage * rng_random.choice(LIQUID_CONVERSION_FACTORS)
203        elif row['ship_type'] == 'DryBulk':
204            tonnage = tonnage
205        else: 
206            tonnage = int(tonnage * rng_random.choice(CONTAINER_CONVERSION_FACTORS))
207        pilots = int(rng_numpy.uniform(size_data['min_pilots'].values[0], size_data['max_pilots'].values[0]+1))
208        tugboats = int(rng_numpy.uniform(size_data['min_tugs'].values[0], size_data['max_tugs'].values[0]+1))
209        return pd.Series([length, beam, draft, tonnage, pilots, tugboats])
210    else:
211        return pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])

Fill ship details such as length, beam, draft, tonnage, pilots, and tugboats based on the ship type and size. This function retrieves the average and standard deviation values for ship dimensions and tonnage from a DataFrame containing vessel size data. It then samples from a normal distribution to generate realistic ship dimensions and tonnage.

Arguments:
  • row (pd.Series): A row from the ship data DataFrame containing ship type and size.
  • vessel_sizes (pd.DataFrame): DataFrame containing vessel size information with average and standard deviation values.
  • seed (int): Random seed for reproducibility.
Returns:

pd.Series: A Series containing the generated ship details: length, beam, draft, tonnage, pilots, and tugboats.

def generate_truck_data(truck_type, NUM_TRUCKS):
213def generate_truck_data(truck_type, NUM_TRUCKS):
214    """
215    Generate truck data for a specific truck type.
216    This function generates a dictionary of truck data for a given truck type, including truck ID, direction, and truck type.
217    Args:
218        truck_type (str): Type of truck ('Container', 'DryBulk', 'Liquid').
219        NUM_TRUCKS (dict): Dictionary containing the number of trucks for each truck type.
220    Returns:
221        dict: Dictionary containing truck data for the specified truck type.
222    """
223    if truck_type == 'Container':
224        return {
225            f"{i}": {
226                "truck_id": i,
227                "direction": "in",
228                "truck_type": (truck_type),
229            } for i in range(NUM_TRUCKS[truck_type])
230        }
231    elif truck_type == 'DryBulk':
232        return {
233            f"{i}": {
234                "truck_id": i,
235                "direction": "in",
236                "truck_type": (truck_type),
237            } for i in range(NUM_TRUCKS[truck_type])
238        }   
239    elif truck_type == 'Liquid':
240        return {
241            f"{i}": {
242                "truck_id": i,
243                "direction": "in",
244                "truck_type": (truck_type),
245            } for i in range(NUM_TRUCKS[truck_type])
246        }

Generate truck data for a specific truck type. This function generates a dictionary of truck data for a given truck type, including truck ID, direction, and truck type.

Arguments:
  • truck_type (str): Type of truck ('Container', 'DryBulk', 'Liquid').
  • NUM_TRUCKS (dict): Dictionary containing the number of trucks for each truck type.
Returns:

dict: Dictionary containing truck data for the specified truck type.

def generate_ships(run_id, NUM_TERMINALS_LIST, seed):
248def generate_ships(run_id, NUM_TERMINALS_LIST, seed):
249    """
250    Generate ship data for the simulation.
251    This function creates a DataFrame containing ship data for different types of ships (Container, Liquid, DryBulk).
252    It initializes the random number generator, generates ship data based on predefined probabilities,
253    and assigns ship details such as length, beam, draft, tonnage, pilots, tugboats, and terminal.
254    Args:
255        run_id (str): Unique identifier for the simulation run.
256        NUM_TERMINALS_LIST (list): List containing the number of terminals for each ship type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
257        seed (int): Random seed for reproducibility.
258    Returns:
259        None
260    """
261    vessel_size_inputs = pd.read_csv('inputs/ship_sizes.csv')  
262
263    container_class_probs = vessel_size_inputs[vessel_size_inputs['ship_type'] == 'Container']['Fraction'].values
264    liquid_class_probs = vessel_size_inputs[vessel_size_inputs['ship_type'] == 'Liquid']['Fraction'].values
265    drybulk_class_probs = vessel_size_inputs[vessel_size_inputs['ship_type'] == 'DryBulk']['Fraction'].values
266    probabilities = {"container": container_class_probs, "liquid": liquid_class_probs, "drybulk": drybulk_class_probs}
267    
268    initialize_rng(seed)
269    num_container_terminals, num_liquid_terminals, num_drybulk_terminals = NUM_TERMINALS_LIST
270
271    # Number of ships for each cargo type
272    NUM_SHIPS = {
273        "Container": 3 * int(SIMULATION_TIME // ((mean_interarrival_time_container)*1)), 
274        "Liquid": 3 * int(SIMULATION_TIME // ((mean_interarrival_time_tanker)*1)),  
275        "DryBulk": 3 * int(SIMULATION_TIME // ((mean_interarrival_time_gencargo)*1)),
276    }
277
278    SCALE_VALS_TERMINALS = {
279        "Container": mean_interarrival_time_container,
280        "Liquid": mean_interarrival_time_tanker,
281        "DryBulk": mean_interarrival_time_gencargo
282    }
283
284    CONTAINER_SHIP_DATA, CARGO_SHIP_DATA, TANKER_SHIP_DATA = generate_ship_data('Container', NUM_SHIPS, probabilities), generate_ship_data('DryBulk', NUM_SHIPS, probabilities), generate_ship_data('Liquid', NUM_SHIPS, probabilities)
285
286    df_tanker = pd.DataFrame(TANKER_SHIP_DATA).T
287    df_cargo = pd.DataFrame(CARGO_SHIP_DATA).T
288    df_container = pd.DataFrame(CONTAINER_SHIP_DATA).T
289
290    ship_data_components = []
291
292    for ship_type_name in ['Liquid', 'DryBulk', 'Container']:
293
294        if ship_type_name == 'Liquid':
295            df_ship = df_tanker
296            num_terminals = num_liquid_terminals
297        elif ship_type_name == 'DryBulk':
298            df_ship = df_cargo
299            num_terminals = num_drybulk_terminals
300        elif ship_type_name == 'Container':
301            df_ship = df_container
302            num_terminals = num_container_terminals
303
304        df_ship[['length', 'width', 'draft', 'num_container_or_liq_tons_or_dry_tons_to_load', 'pilots', 'tugboats']] = df_ship.apply(
305                fill_ship_details, axis=1, vessel_sizes=vessel_size_inputs, seed=seed
306            )
307        df_ship[['length', 'width', 'draft', 'num_container_or_liq_tons_or_dry_tons_to_unload', 'pilots', 'tugboats']] = df_ship.apply(
308            fill_ship_details, axis=1, vessel_sizes=vessel_size_inputs, seed=seed
309        )
310        df_ship['terminal'] = df_ship.apply(lambda row: select_terminal(row['ship_type'], row['width'], NUM_TERMINALS_LIST, seed = rng_random.randint(1, 100000000)), axis=1)
311        ship_data_components.append(df_ship)
312    
313
314    ship_data = pd.concat(ship_data_components, ignore_index=True)
315    ship_data_ctr = ship_data[ship_data['ship_type'] == 'Container'].copy()
316    ship_data_liq = ship_data[ship_data['ship_type'] == 'Liquid'].copy()
317    ship_data_drybulk = ship_data[ship_data['ship_type'] == 'DryBulk'].copy()
318    ship_data_ctr["interarrival"] = ship_data_ctr.apply(lambda row: (rng_numpy.exponential(scale= SCALE_VALS_TERMINALS['Container'])), axis=1)
319    ship_data_ctr['interarrival'] = pd.to_numeric(ship_data_ctr['interarrival'], errors='coerce')
320    ship_data_liq["interarrival"] = ship_data_liq.apply(lambda row: (truncated_exponential_advanced(a = min_interarrival_liquid, b = max_interaarival_liq, scale= SCALE_VALS_TERMINALS['Liquid'], ship_type = 'Liquid', ship_terminal = row['terminal'])), axis=1)
321    ship_data_liq['interarrival'] = pd.to_numeric(ship_data_liq['interarrival'], errors='coerce')
322    ship_data_drybulk["interarrival"] = ship_data_drybulk.apply(lambda row: (rng_numpy.exponential(scale= SCALE_VALS_TERMINALS['DryBulk'])), axis=1)
323    ship_data_drybulk['interarrival'] = pd.to_numeric(ship_data_drybulk['interarrival'], errors='coerce')
324    ship_data_ctr['arrival'] = ship_data_ctr['interarrival'].cumsum()
325    ship_data_liq['arrival'] = ship_data_liq['interarrival'].cumsum()
326    ship_data_drybulk['arrival'] = ship_data_drybulk['interarrival'].cumsum()
327
328    ship_data_ctr = ship_data_ctr.drop(columns=['interarrival'])
329    ship_data_liq = ship_data_liq.drop(columns=['interarrival'])
330    ship_data_drybulk = ship_data_drybulk.drop(columns=['interarrival'])
331
332    ship_data = pd.concat([ship_data_ctr, ship_data_liq, ship_data_drybulk], ignore_index=True)
333    ship_data['arrival'] = pd.to_numeric(ship_data['arrival'], errors='coerce')
334    ship_data.sort_values('arrival', inplace=True)
335
336    ship_data = ship_data[ship_data['arrival'] <= SIMULATION_TIME]
337    ship_data['ship_id'] = range(1, len(ship_data) + 1)
338    ship_data.reset_index(drop=True, inplace=True)
339    ship_data['last_section'] = ship_data.apply(lambda row: LAST_SECTION_DICT[row['ship_type']][row['terminal']], axis=1)
340
341    output_path = f'.{run_id}/logs/ship_data.csv'
342    ship_data.to_csv(output_path, index=False)
343
344    # for container terminals create a box plot of number of containers to load + unload for each terminal in smae plot
345    container_ships = ship_data[ship_data['ship_type'] == 'Container']    
346    container_ships = container_ships.copy()
347    container_ships.loc[:, 'moves'] = (
348        container_ships['num_container_or_liq_tons_or_dry_tons_to_load'] +
349        container_ships['num_container_or_liq_tons_or_dry_tons_to_unload']
350    )
351    container_ships = container_ships[['terminal', 'moves']]
352    plt.figure()
353    container_ships.boxplot(by='terminal', column='moves')
354    plt.ylim(bottom=0)
355    plt.ylabel("Number of container moves")
356    plt.xlabel("Container terminal")
357    plt.savefig(f".{run_id}/plots/moves.pdf")
358    plt.close()
359
360    print("Total number of ships: Container {}, Liquid {}, DryBulk {}".format(len(ship_data_ctr), len(ship_data_liq), len(ship_data_drybulk)))
361
362    # compute mean interarrival times
363    liquid_data = ship_data[ship_data['ship_type'] == 'Liquid']['arrival'].diff().dropna()
364    container_data = ship_data[ship_data['ship_type'] == 'Container']['arrival'].diff().dropna()
365    drybulk_data = ship_data[ship_data['ship_type'] == 'DryBulk']['arrival'].diff().dropna()
366
367    # Filter data by ship types
368    print("\n\n RUN ID:", run_id)
369    save_warning(run_id, f"Container terminal, Generated mean interarrival time: {round(container_data.mean(), 1)} and expected mean interarrival time: {round(mean_interarrival_time_container, 2)}")
370    save_warning(run_id, f"Liquid terminal, Generated mean interarrival time: {round(liquid_data.mean(), 1)} and expected mean interarrival time: {round(mean_interarrival_time_tanker, 2)}")
371    save_warning(run_id, f"DryBulk terminal, Generated mean interarrival time: {round(drybulk_data.mean(), 1)} and expected mean interarrival time: {round(mean_interarrival_time_gencargo, 2)}\n")
372
373    save_warning(run_id, f"Minimum interarrival times: Container {min(container_data):.2e}, Liquid {min(liquid_data):.2e}, DryBulk {min(drybulk_data):.2e}\n")
374
375    # Create a stacked bar plot of the number of ships in each terminal, split by size
376    for ship_type in ['Container', 'Liquid', 'DryBulk']:
377        ship_type_data = ship_data[ship_data['ship_type'] == ship_type]
378        size_counts = ship_type_data.groupby(['terminal', 'Size']).size().unstack(fill_value=0)
379        size_counts.plot(kind='bar', stacked=True, edgecolor="black")
380        plt.xlabel("Terminal")
381        plt.ylabel("Number of Ships")
382        plt.title(f"Stacked Bar Plot of {ship_type} Ship Distribution by Size")
383        plt.xticks(rotation=0)  # Keeps terminal labels horizontal
384        plt.legend(title="Size")
385        plt.tight_layout()
386        plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_ship_distribution_stacked_bar.pdf")
387        plt.close()
388    
389    #  Plot a distribution of number of berths in each terminal. Number of berths is like sum(BERTH_DRYBULK_TERMINAL) and BERTH_DRYBULK_TERMINAL[i] for terminal i+1
390    for ship_type in ['Container', 'Liquid', 'DryBulk']:
391        if ship_type == 'Container':
392            plt.bar(range(1, num_container_terminals+1), BERTHS_CTR_TERMINAL)
393            plt.xlabel("Terminal")
394            plt.ylabel("Number of berths")
395            plt.title(f"Number of berths in each {ship_type} terminal")
396            plt.xticks(range(1, num_container_terminals+1))
397            plt.tight_layout()
398            plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_terminal_berth_distribution.pdf")
399            plt.close()
400        elif ship_type == 'Liquid':
401            plt.bar(range(1, num_liquid_terminals+1), BERTHS_LIQ_TERMINAL)
402            plt.xlabel("Terminal")
403            plt.ylabel("Number of berths")
404            plt.title(f"Number of berths in each {ship_type} terminal")
405            plt.xticks(range(1, num_liquid_terminals+1))
406            plt.tight_layout()
407            plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_terminal_berth_distribution.pdf")
408            plt.close()
409        elif ship_type == 'DryBulk':
410            plt.bar(range(1, num_drybulk_terminals+1), BERTH_DRYBULK_TERMINAL)
411            plt.xlabel("Terminal")
412            plt.ylabel("Number of berths")
413            plt.title(f"Number of berths in each {ship_type} terminal")
414            plt.xticks(range(1, num_drybulk_terminals+1))
415            plt.tight_layout()
416            plt.savefig(f".{run_id}/plots/shipDistribution/{ship_type}_terminal_berth_distribution.pdf")
417            plt.close()

Generate ship data for the simulation. This function creates a DataFrame containing ship data for different types of ships (Container, Liquid, DryBulk). It initializes the random number generator, generates ship data based on predefined probabilities, and assigns ship details such as length, beam, draft, tonnage, pilots, tugboats, and terminal.

Arguments:
  • run_id (str): Unique identifier for the simulation run.
  • NUM_TERMINALS_LIST (list): List containing the number of terminals for each ship type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
  • seed (int): Random seed for reproducibility.
Returns:

None

def generate_trucks(run_id, num_terminals, terminal_data_df, terminal_tuple_cache, seed):
420def generate_trucks(run_id, num_terminals, terminal_data_df, terminal_tuple_cache, seed):
421    """
422    Generate truck data for the simulation.
423    This function creates a DataFrame containing truck data for different types of trucks (Container, Liquid, DryBulk).
424    It initializes the random number generator, calculates mean interarrival times for trucks at each terminal,
425    generates truck data based on the mean interarrival times, and assigns terminal information to each truck.
426    Args:
427        run_id (str): Unique identifier for the simulation run.
428        num_terminals (list): List containing the number of terminals for each truck type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
429        terminal_data_df (pd.DataFrame): DataFrame containing terminal data.
430        terminal_tuple_cache (dict): Dictionary containing terminal tuple cache.
431        seed (int): Random seed for reproducibility.
432    Returns:
433        None
434    """
435    start_time = time.time()
436    terminal_data_df = terminal_data_df
437    num_container_terminals, num_liquid_terminals, num_drybulk_terminals = num_terminals
438
439    mean_ctr_terminal_arrival_rate = list(terminal_data_df[terminal_data_df['Cargo'] == 'Container']['truck arrival rate'].values)
440    mean_liq_terminal_arrival_rate = list(terminal_data_df[terminal_data_df['Cargo'] == 'Liquid']['truck arrival rate'].values)
441    mean_drybulk_terminal_arrival_rate = list(terminal_data_df[terminal_data_df['Cargo'] == 'DryBulk']['truck arrival rate'].values)
442
443    mean_interarrival_time_container_trucks = 1 / sum(list(mean_ctr_terminal_arrival_rate))
444    mean_interarrival_time_tanker_trucks = 1 / sum(list(mean_liq_terminal_arrival_rate))
445    mean_interarrival_time_gencargo_trucks = 1 / sum(list(mean_drybulk_terminal_arrival_rate))
446    
447    NUM_CTR_TRUCKS = int(SIMULATION_TIME // ((mean_interarrival_time_container_trucks)*1))
448    NUM_LIQ_TRUCKS = int(SIMULATION_TIME // ((mean_interarrival_time_tanker_trucks)*1))
449    NUM_DRYBULK_TRUCKS = int(SIMULATION_TIME // ((mean_interarrival_time_gencargo_trucks)*1))
450    
451    NUM_TRUCKS = {
452        "Container": NUM_CTR_TRUCKS,
453        "Liquid": NUM_LIQ_TRUCKS,
454        "DryBulk": NUM_DRYBULK_TRUCKS
455    }
456
457    CONTAINER_TRUCK_DATA = generate_truck_data('Container', NUM_TRUCKS)
458    CARGO_TRUCK_DATA = generate_truck_data('DryBulk', NUM_TRUCKS)
459    TANKER_TRUCK_DATA = generate_truck_data('Liquid', NUM_TRUCKS)
460
461    df_tanker, df_cargo, df_container = [
462    pd.DataFrame.from_dict(data, orient="index")
463    for data in [TANKER_TRUCK_DATA, CARGO_TRUCK_DATA, CONTAINER_TRUCK_DATA]
464    ]
465
466    truck_data_components = []
467    for truck_type_name in ['Liquid', 'DryBulk', 'Container']:
468        if truck_type_name == 'Liquid':
469            df_ship = df_tanker
470            num_terminals = num_liquid_terminals
471            rates = mean_liq_terminal_arrival_rate
472        elif truck_type_name == 'DryBulk':
473            df_ship = df_cargo
474            num_terminals = num_drybulk_terminals
475            rates = mean_drybulk_terminal_arrival_rate
476        elif truck_type_name == 'Container':
477            df_ship = df_container
478            num_terminals = num_container_terminals
479            rates = mean_ctr_terminal_arrival_rate
480
481        df_truck_terminals = {}
482        for terminal in range(1, num_terminals+1):
483            df_truck_terminals[terminal] = df_ship.copy()
484            df_truck_terminals[terminal]['terminal'] = terminal
485            df_truck_terminals[terminal]['terminal_id'] = terminal-1
486            df_truck_terminals[terminal]['interarrival'] = round(1 / rates[terminal-1], 8) #is a list
487            df_truck_terminals[terminal]['interarrival'] = df_truck_terminals[terminal]['interarrival'].astype(float)
488            df_truck_terminals[terminal]['arrival'] = df_truck_terminals[terminal]['interarrival'].cumsum()
489            df_truck_terminals[terminal] = df_truck_terminals[terminal][(df_truck_terminals[terminal]['arrival'] <= SIMULATION_TIME)]
490        df_truck_combined = pd.concat(df_truck_terminals.values(), ignore_index=True)
491        df_truck_combined = df_truck_combined.sort_values('arrival')
492        truck_data_components.append(df_truck_combined)
493
494
495    truck_data = pd.concat(truck_data_components, ignore_index=True)
496    truck_data['arrival'] = pd.to_numeric(truck_data['arrival'], errors='coerce')
497    truck_data.sort_values('arrival', inplace=True)
498    truck_data['truck_id'] = range(1, len(truck_data) + 1)
499    truck_data.reset_index(drop=True, inplace=True)
500
501    # look at cargo type and terminal and drop rows where TERMINALS_WITH_NO_TRUCKS dict 
502    for cargo_type, list_of_terminals in TERMINALS_WITH_NO_TRUCKS.items():
503        for terminal in list_of_terminals:
504            truck_data = truck_data[~((truck_data['truck_type'] == cargo_type) & (truck_data['terminal'] == terminal))]
505
506    # remove trucks at night using is_daytime
507    truck_data = truck_data[truck_data['arrival'].apply(lambda x: is_daytime(x))]
508    output_path = f'.{run_id}/logs/truck_data.csv'
509    truck_data.to_pickle(f'.{run_id}/logs/truck_data.pkl')
510
511    for terminal_type in ["Container", "Liquid", "DryBulk"]:
512        terminal_data = truck_data[truck_data['truck_type'] == terminal_type]
513        terminal_counts = terminal_data['terminal'].value_counts().sort_index()
514        plt.bar(terminal_counts.index, terminal_counts.values)
515        plt.xlabel("Terminal")
516        plt.ylabel("Number of Trucks")
517        plt.title(f"Number of Trucks in Each {terminal_type} Terminal")
518        plt.xticks(terminal_counts.index)
519        plt.tight_layout()
520        plt.savefig(f".{run_id}/plots/truckDistribution/{terminal_type}_truck_distribution.pdf")
521        plt.close()
522        
523    print("Total number of trucks", len(truck_data))

Generate truck data for the simulation. This function creates a DataFrame containing truck data for different types of trucks (Container, Liquid, DryBulk). It initializes the random number generator, calculates mean interarrival times for trucks at each terminal, generates truck data based on the mean interarrival times, and assigns terminal information to each truck.

Arguments:
  • run_id (str): Unique identifier for the simulation run.
  • num_terminals (list): List containing the number of terminals for each truck type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
  • terminal_data_df (pd.DataFrame): DataFrame containing terminal data.
  • terminal_tuple_cache (dict): Dictionary containing terminal tuple cache.
  • seed (int): Random seed for reproducibility.
Returns:

None

def generate_trains( run_id, num_terminals, terminal_data, terminal_data_df, terminal_tuple_cache, seed):
525def generate_trains(run_id, num_terminals, terminal_data, terminal_data_df, terminal_tuple_cache, seed):
526    """
527    Generate train data for the simulation.
528    This function creates a DataFrame containing train data for different types of trains (Container, Liquid, DryBulk).
529    It initializes the random number generator, calculates mean interarrival times for trains at each terminal,
530    generates train data based on the mean interarrival times, and assigns terminal information to each train.
531    Args:
532        run_id (str): Unique identifier for the simulation run.
533        num_terminals (list): List containing the number of terminals for each train type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
534        terminal_data (pd.DataFrame): DataFrame containing terminal data.
535        terminal_data_df (pd.DataFrame): DataFrame containing terminal data.
536        terminal_tuple_cache (dict): Dictionary containing terminal tuple cache.
537        seed (int): Random seed for reproducibility.
538    Returns:
539        None
540    """
541    initialize_rng(seed)
542
543    train_data = pd.DataFrame(columns=[
544        'train_id', 'terminal_id', 'cargo type', 'terminal', 
545        'arrival at', 'car amount', 'cargo transfer rate', 
546        'total transfer cargo', 'import', 'export'
547    ])
548
549    i = 0
550    for terminal_type in ["Container", "Liquid", "DryBulk"]:
551        for terminal_id in range(num_terminals[i]):
552            terminal = terminal_id + 1
553            interarrival_time = 1 / get_value_by_terminal(terminal_data, terminal_type, terminal, "train arrival rate")
554            transfer_rate = get_value_by_terminal(terminal_data, terminal_type, terminal, "train cargo transfer rate")
555            per_car_cargo = get_value_by_terminal(terminal_data, terminal_type, terminal, "train car payload size") 
556            import_terminal = get_value_by_terminal(terminal_data, terminal_type, terminal, "import")
557            export_terminal = get_value_by_terminal(terminal_data, terminal_type, terminal, "export")
558            
559            num_trains = int(SIMULATION_TIME / interarrival_time)
560
561            if import_terminal and not export_terminal:
562                import_bool = [True] * num_trains
563                export_bool = [False] * num_trains
564            elif not import_terminal and export_terminal:
565                import_bool = [False] * num_trains
566                export_bool = [True] * num_trains
567            else:
568                import_bool = []
569                export_bool = []
570                for _ in range(num_trains):
571                    test = random.choice([True, False])
572                    import_bool.append(test)
573                    export_bool.append(not test)
574
575            # Add subsequent trains
576            car_amounts = get_values_by_terminal_random_sample(terminal_data_df, terminal_type, terminal, "train car amount", num_trains, seed)
577            total_transfer_cargos = [per_car_cargo * car_amount for car_amount in car_amounts]
578            arrivals = [j * interarrival_time for j in range(num_trains)]
579
580            # Create the DataFrame directly
581            subsequent_trains = pd.DataFrame({
582                'train_id': [None] * num_trains,
583                'terminal_id': [terminal_id] * num_trains,
584                'terminal': [terminal] * num_trains,
585                'cargo type': [terminal_type] * num_trains,
586                'arrival at': arrivals,
587                'car amount': car_amounts,
588                'cargo transfer rate': [transfer_rate] * num_trains,
589                'total transfer cargo': total_transfer_cargos,
590                'import': import_bool,
591                'export': export_bool
592            })
593
594            subsequent_trains = subsequent_trains.dropna(axis=1, how='all')
595            subsequent_trains = subsequent_trains.dropna(axis=0, how='all')
596            if not subsequent_trains.empty:
597                train_data = train_data.dropna(axis=1, how='all')
598                train_data = pd.concat([train_data, subsequent_trains], ignore_index=True)
599
600        i += 1
601
602    # Convert data types
603    train_data['arrival at'] = pd.to_numeric(train_data['arrival at'], errors='coerce')
604    train_data['terminal_id'] = train_data['terminal_id'].astype(int)
605    train_data['terminal'] = train_data['terminal'].astype(int)
606    train_data['car amount'] = train_data['car amount'].astype(int)
607    train_data['cargo transfer rate'] = train_data['cargo transfer rate'].astype(float)
608    train_data['total transfer cargo'] = train_data['total transfer cargo'].astype(float)
609    train_data['import'] = train_data['import'].astype(bool)
610    train_data['export'] = train_data['export'].astype(bool)
611    train_data = train_data.sort_values('arrival at')
612    train_data = train_data[train_data['arrival at'] != 0]
613
614    # filter using TERMINALS_WITH_NO_TRAINS
615    for cargo_type, list_of_terminals in TERMINALS_WITH_NO_TRAINS.items():
616        for terminal in list_of_terminals:
617            train_data = train_data[~((train_data['cargo type'] == cargo_type) & (train_data['terminal'] == terminal))]
618
619    train_data.reset_index(drop=True, inplace=True)
620    train_data['train_id'] = train_data.index + 1
621    print("Number of trains", len(train_data))
622    output_path = f'.{run_id}/logs/train_data.csv'
623    train_data.to_csv(output_path, index=False)

Generate train data for the simulation. This function creates a DataFrame containing train data for different types of trains (Container, Liquid, DryBulk). It initializes the random number generator, calculates mean interarrival times for trains at each terminal, generates train data based on the mean interarrival times, and assigns terminal information to each train.

Arguments:
  • run_id (str): Unique identifier for the simulation run.
  • num_terminals (list): List containing the number of terminals for each train type [num_container_terminals, num_liquid_terminals, num_drybulk_terminals].
  • terminal_data (pd.DataFrame): DataFrame containing terminal data.
  • terminal_data_df (pd.DataFrame): DataFrame containing terminal data.
  • terminal_tuple_cache (dict): Dictionary containing terminal tuple cache.
  • seed (int): Random seed for reproducibility.
Returns:

None

def get_piplines_import(num_terminals_list, terminal_data):
625def get_piplines_import(num_terminals_list, terminal_data):
626    """
627    Get the list of liquid terminals that have pipelines as source or sink.
628    This function checks each liquid terminal to see if it has a pipeline source or sink.
629    Args:
630        num_terminals_list (tuple): A tuple containing the number of terminals for each type of ship (num_container_terminals, num_liquid_terminals, num_drybulk_terminals).
631        terminal_data (pd.DataFrame): DataFrame containing terminal data.
632    Returns:
633        tuple: Two lists containing the liquid terminals with pipeline sources and sinks.
634    """
635
636    _, num_liquid_terminals, _ = num_terminals_list
637    liq_terminals_with_pipeline_source = []
638    liq_terminals_with_pipeline_sink = []
639
640    for liquid_terminal in range(num_liquid_terminals):
641        pipeline_source = get_value_by_terminal(terminal_data, "Liquid", liquid_terminal+1, "pipeline source")
642        pipeline_sink = get_value_by_terminal(terminal_data, "Liquid", liquid_terminal+1, "pipeline sink")
643        if pipeline_source:
644            liq_terminals_with_pipeline_source.append(liquid_terminal+1)
645        if pipeline_sink:
646            liq_terminals_with_pipeline_sink.append(liquid_terminal+1)
647    
648    # print("Liquid terminals with pipeline source:", liq_terminals_with_pipeline_source)
649    # print("Liquid terminals with pipeline sink:", liq_terminals_with_pipeline_sink)
650
651    return liq_terminals_with_pipeline_source, liq_terminals_with_pipeline_sink

Get the list of liquid terminals that have pipelines as source or sink. This function checks each liquid terminal to see if it has a pipeline source or sink.

Arguments:
  • num_terminals_list (tuple): A tuple containing the number of terminals for each type of ship (num_container_terminals, num_liquid_terminals, num_drybulk_terminals).
  • terminal_data (pd.DataFrame): DataFrame containing terminal data.
Returns:

tuple: Two lists containing the liquid terminals with pipeline sources and sinks.