simulation_analysis.collate_results

This script collates results from multiple simulation runs, generates plots, and calculates statistics. TODO: Break down into smaller functions for better readability and maintainability.

  1"""
  2This script collates results from multiple simulation runs, generates plots, and calculates statistics.
  3TODO: Break down into smaller functions for better readability and maintainability.
  4"""
  5from runpy import run_path
  6import time
  7import sys
  8import os
  9
 10import pandas as pd
 11import numpy as np
 12import matplotlib.pyplot as plt
 13from tqdm import tqdm
 14
 15# #import constants
 16
 17# # Get the parent directory path
 18# parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 19
 20# # Add parent directory to sys.path
 21# sys.path.append(parent_dir)
 22
 23# # Now you can import your script/module
 24import constants
 25
 26from simulation_analysis.capacity import calculate_mean_wait_time, calculate_ultimate_capacity
 27from simulation_analysis.results import get_dists
 28import config 
 29from constants import WARMUP_ITERS, SCENARIO_NAME, base_arrival_rate, ARRIVAL_INCREASE_FACTOR
 30
 31
 32def collate_results(num_runs, total_time):
 33    """
 34    Collates results from multiple simulation runs and generates various plots and statistics.
 35    The function creates directories for storing results, processes data from each run, 
 36    and generates plots for wait times, queue lengths, and channel utilization.
 37    It also calculates mean wait times, standard deviations, and other statistics for different ship types.
 38    The results are saved in a folder structure under 'collatedResults'.
 39
 40    Args:
 41        num_runs (int): Number of simulation runs to collate.
 42        total_time (int): Total time for the simulation in hours.
 43    Returns:
 44
 45    """
 46    start_time = time.time()
 47
 48    try:
 49        log_num = constants.LOG_NUM
 50    except:
 51        log_num = "TEST"
 52
 53    delete_everything = constants.DELETE_EVERYTHING
 54
 55    logfileid = f"{int(constants.NUM_MONTHS)}_months_{num_runs}_runs_run_{log_num}"
 56
 57    if delete_everything:
 58        os.system('rm -rf collatedResults')
 59
 60    # remove previous results
 61    os.makedirs('collatedResults', exist_ok=True)
 62    os.makedirs(f'collatedResults/{logfileid}/', exist_ok=True)
 63    os.makedirs(f'collatedResults/{logfileid}/data', exist_ok=True)
 64    os.makedirs(f'collatedResults/{logfileid}/timePlots', exist_ok=True)
 65    os.makedirs(f'collatedResults/{logfileid}/queuePlots', exist_ok=True)
 66    os.makedirs(f'collatedResults/{logfileid}/distPlots', exist_ok=True)
 67
 68    logfilename = f'collatedResults/{logfileid}/run_details_{logfileid}.txt'
 69
 70    print("Run details:")
 71    print(f"Number of runs: {num_runs}")
 72    print(f"Simulation time: {constants.NUM_MONTHS} months")
 73    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'w') as f:
 74        f.write("Run details:\n")
 75
 76        f.write(f"Simulation time: {constants.NUM_MONTHS} months\n")
 77        f.write(f"Number of runs: {num_runs}\n")
 78        f.write(f"Start seed: {constants.START_SEED}\n\n")
 79        f.write(f"Expanded channel: {constants.EXPANDED_CHANNEL}\n")
 80        f.write(f"Hurricane scenario: {constants.MODEL_HURRICANE}\n")
 81        f.write(f"Fog scenario: {constants.MODEL_FOG}\n")
 82        f.write("\nConstants:\n")
 83        f.write(
 84            f"Arrival increase factor: {constants.ARRIVAL_INCREASE_FACTOR}\n")
 85        f.write(
 86            f"Anchorage waiting of container: {constants.ANCHORAGE_WAITING_CONTAINER}\n")
 87        f.write(
 88            f"Anchorage waiting of liquid: {constants.ANCHORAGE_WAITING_LIQUID}\n")
 89        f.write(
 90            f"Anchorage waiting of dry bulk: {constants.ANCHORAGE_WAITING_DRYBULK}\n")
 91        f.write(
 92            f"Efficiency of container terminal: {constants.CTR_TERMINAL_EFFICIENCY}\n")
 93        f.write(
 94            f"Efficiency of liquid terminal: {constants.LIQ_TERMINAL_EFFICIENCY}\n")
 95        f.write(
 96            f"Efficiency of dry bulk terminal: {constants.DRYBULK_TERMINAL_EFFICIENCY}\n")
 97        f.write(f"Channel safety two way: {constants.CHANNEL_SAFETWOWAY}\n")
 98        f.write("Arrival rates:\n")
 99        f.write(f"Container: {constants.mean_interarrival_time_container}\n")
100        f.write(f"Liquid: {constants.mean_interarrival_time_tanker}\n")
101        f.write(f"Dry Bulk: {constants.mean_interarrival_time_gencargo}\n")
102
103    # distribution plots for each terminal
104    for terminal in ['Container', 'Liquid', 'Dry Bulk']:
105
106        plt.figure(figsize=(10, 6))
107        colors = plt.cm.viridis(np.linspace(0, 1, num_runs))
108        means_for_all_seeds = []
109        stds_for_all_seeds = []
110
111        for seed in range(constants.START_SEED, constants.START_SEED + num_runs):
112
113            try:
114                run_id = f"Results_{seed}_{int(constants.NUM_MONTHS)}_months_{constants.ARRIVAL_INCREASE_FACTOR}"
115                mean_values, min_values, max_values, std_values = get_dists(
116                    run_id, plot=False)
117                error_bars = {'min': mean_values - min_values,
118                              'max': max_values - mean_values}
119                columns = [
120                    'Time to get Berth', 'Restriction In (T)', 'Channel In', 'Time to get Pilot In',
121                    'Time to get Tugs In', 'Terminal Ops', 'Terminal Other', 'Restriction Out (T)',
122                    'Time to get Pilot Out', 'Time to get Tugs Out', 'Channel Out'
123                ]
124                means = mean_values.loc[terminal]
125                min_err = error_bars['min'].loc[terminal]
126                max_err = error_bars['max'].loc[terminal]
127                stds = std_values.loc[terminal]
128
129                means_for_all_seeds.append(means)
130                stds_for_all_seeds.append(stds)
131
132                plt.errorbar(columns, means, yerr=[
133                             min_err, max_err], fmt='-o', capsize=5, label=f'Seed {seed}', color=colors[seed - constants.START_SEED])
134
135            except Exception as e:
136                print(f"Error in processing seed {seed} {e}")
137                print("Error:", e)
138                pass
139
140        plt.xticks(rotation=90)
141        plt.title(f'{terminal} Terminal: Mean with Min/Max and SD')
142        plt.xlabel('Processes')
143        plt.ylabel('Time (hr)')
144        plt.tight_layout()
145        plt.legend()
146        plt.savefig(
147            f'collatedResults/{logfileid}/distPlots/{terminal}_dists.pdf')
148        plt.close()
149
150        # plot the overall mean and std for each process
151        plt.figure(figsize=(10, 6))
152        means_for_all_seeds = np.array(means_for_all_seeds)
153        stds_for_all_seeds = np.array(stds_for_all_seeds)
154        mean = means_for_all_seeds.mean(axis=0)
155        std = stds_for_all_seeds.mean(axis=0)
156        # annotate
157        for i, (m, s) in enumerate(zip(mean, std)):
158            plt.annotate(
159                f'Mean: {m:.1f}\nSD: {s:.1f}',
160                (i, m),
161                textcoords="offset points",
162                xytext=(5, 15),
163                ha='center'
164            )
165        plt.errorbar(columns, mean, yerr=std, fmt='-o',
166                     capsize=5, label='Mean', color='black')
167        plt.xticks(rotation=90)
168        plt.title(f'Overall Mean with SD for {terminal} Terminal')
169        plt.xlabel('Processes')
170        plt.ylabel('Time (hr)')
171        plt.tight_layout()
172        plt.legend()
173        plt.savefig(
174            f'collatedResults/{logfileid}/distPlots/overall_dists_{terminal}.pdf')
175        plt.close()
176
177    # df to store results for different seeds
178    dwell = pd.DataFrame()
179    turn = pd.DataFrame()
180    anchorage = pd.DataFrame()
181    cargo = pd.DataFrame()
182
183    ctr_dwell = pd.DataFrame()
184    ctr_turn = pd.DataFrame()
185    ctr_anchorage = pd.DataFrame()
186    ctr_cargo = pd.DataFrame()
187
188    liq_dwell = pd.DataFrame()
189    liq_turn = pd.DataFrame()
190    liq_anchorage = pd.DataFrame()
191    liq_cargo = pd.DataFrame()
192
193    db_dwell = pd.DataFrame()
194    db_turn = pd.DataFrame()
195    db_anchorage = pd.DataFrame()
196    db_cargo = pd.DataFrame()
197
198    container_queue = pd.DataFrame()
199    liquid_queue = pd.DataFrame()
200    drybulk_queue = pd.DataFrame()
201    all_queue = pd.DataFrame()
202
203    channel = pd.DataFrame()
204
205    ships_entered = []
206    ships_exited = []
207    ships_entered_channel = []
208
209    ctr_ships_entered = []
210    ctr_ships_exited = []
211    ctr_ships_entered_channel = []
212
213    liq_ships_entered = []
214    liq_ships_exited = []
215    liq_ships_entered_channel = []
216
217    db_ships_entered = []
218    db_ships_exited = []
219    db_ships_entered_channel = []
220
221    total_throughput = []
222    ctr_throughput = []
223    liq_throughput = []
224    dblk_throughput = []
225
226    for seed in tqdm(range(constants.START_SEED, constants.START_SEED + num_runs)):
227
228        try:
229            # Wait times
230            run_id = f"Results_{seed}_{int(constants.NUM_MONTHS)}_months_{constants.ARRIVAL_INCREASE_FACTOR}"
231            df_seed = pd.read_excel(f'.{run_id}/logs/ship_logs.xlsx') 
232
233            ships_entered.append(len(df_seed['Start Time'].dropna()))
234            ships_exited.append(len(df_seed['End Time'].dropna()))
235            ships_entered_channel.append(
236                len(df_seed['Time to Common Channel In'].dropna()))
237
238            # Throughput is computed using rows for each ship the number of container/tons etc loaded + unload and sum for all rows (ships)
239
240            logs_fp = f'.{run_id}/logs/ship_logs.xlsx'
241            data_fp = f'.{run_id}/logs/ship_data.csv'
242
243            out = compute_throughput(
244            logs_fp,
245            data_fp,
246            type_map = {"DryBulk":"Dry Bulk"},
247            )
248
249            # Append totals
250            total_throughput.append(out["total"])
251            ctr_throughput.append(out["by_type"].get("Container", 0.0))
252            liq_throughput.append(out["by_type"].get("Liquid", 0.0))
253            dblk_throughput.append(out["by_type"].get("Dry Bulk", 0.0))
254
255            # print(f"Run {seed}: Total Throughput = {out['total']}")
256            # print(f"Run {seed}: Container Throughput = {out['by_type'].get('Container', 0.0)}")
257            # print(f"Run {seed}: Liquid Throughput = {out['by_type'].get('Liquid', 0.0)}")
258            # print(f"Run {seed}: Dry Bulk Throughput = {out['by_type'].get('Dry Bulk', 0.0)}")
259
260            ctr_ships_entered.append(
261                len(df_seed[df_seed['Terminal Type'] == 'Container']['Start Time'].dropna()))
262            ctr_ships_exited.append(
263                len(df_seed[df_seed['Terminal Type'] == 'Container']['End Time'].dropna()))
264            ctr_ships_entered_channel.append(len(
265                df_seed[df_seed['Terminal Type'] == 'Container']['Time to Common Channel In'].dropna()))
266
267            liq_ships_entered.append(
268                len(df_seed[df_seed['Terminal Type'] == 'Liquid']['Start Time'].dropna()))
269            liq_ships_exited.append(
270                len(df_seed[df_seed['Terminal Type'] == 'Liquid']['End Time'].dropna()))
271            liq_ships_entered_channel.append(len(
272                df_seed[df_seed['Terminal Type'] == 'Liquid']['Time to Common Channel In'].dropna()))
273
274            db_ships_entered.append(
275                len(df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Start Time'].dropna()))
276            db_ships_exited.append(
277                len(df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['End Time'].dropna()))
278            db_ships_entered_channel.append(len(
279                df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Time to Common Channel In'].dropna()))
280
281            df_seed = df_seed[WARMUP_ITERS:]
282
283            dwell[f'Dwell_{seed}'] = df_seed['Dwell Time']
284            turn[f'Turn_{seed}'] = df_seed['Turn Time']
285            anchorage[f'Anchorage_{seed}'] = df_seed['Anchorage Time']
286            cargo[f'Cargo_{seed}'] = df_seed['Unloading Time'] + \
287                df_seed['Loading Time']
288
289            ctr_dwell[f'Dwell_{seed}'] = df_seed[df_seed['Terminal Type']
290                                                 == 'Container']['Dwell Time']
291            ctr_turn[f'Turn_{seed}'] = df_seed[df_seed['Terminal Type']
292                                               == 'Container']['Turn Time']
293            ctr_anchorage[f'Anchorage_{seed}'] = df_seed[df_seed['Terminal Type']
294                                                         == 'Container']['Anchorage Time']
295            ctr_cargo[f'Cargo_{seed}'] = df_seed[df_seed['Terminal Type'] == 'Container']['Unloading Time'] + \
296                df_seed[df_seed['Terminal Type']
297                        == 'Container']['Loading Time']
298
299            liq_dwell[f'Dwell_{seed}'] = df_seed[df_seed['Terminal Type']
300                                                 == 'Liquid']['Dwell Time']
301            liq_turn[f'Turn_{seed}'] = df_seed[df_seed['Terminal Type']
302                                               == 'Liquid']['Turn Time']
303            liq_anchorage[f'Anchorage_{seed}'] = df_seed[df_seed['Terminal Type']
304                                                         == 'Liquid']['Anchorage Time']
305            liq_cargo[f'Cargo_{seed}'] = df_seed[df_seed['Terminal Type'] ==
306                                                 'Liquid']['Unloading Time'] + df_seed[df_seed['Terminal Type'] == 'Liquid']['Loading Time']
307
308            db_dwell[f'Dwell_{seed}'] = df_seed[df_seed['Terminal Type']
309                                                == 'Dry Bulk']['Dwell Time']
310            db_turn[f'Turn_{seed}'] = df_seed[df_seed['Terminal Type']
311                                              == 'Dry Bulk']['Turn Time']
312            db_anchorage[f'Anchorage_{seed}'] = df_seed[df_seed['Terminal Type']
313                                                        == 'Dry Bulk']['Anchorage Time']
314            db_cargo[f'Cargo_{seed}'] = df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Unloading Time'] + \
315                df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Loading Time']
316
317            # Anchorage queue
318            combined = pd.read_csv(
319                f'.{run_id}/logs/waiting_in_anchorage_Container.csv')
320            container_queue[f'ctr_{seed}'] = combined['waiting_in_anchorage']
321
322            combined = pd.read_csv(
323                f'.{run_id}/logs/waiting_in_anchorage_Liquid.csv')
324            liquid_queue[f'liq_{seed}'] = combined['waiting_in_anchorage']
325
326            combined = pd.read_csv(
327                f'.{run_id}/logs/waiting_in_anchorage_DryBulk.csv')
328            drybulk_queue[f'db_{seed}'] = combined['waiting_in_anchorage']
329
330            combined = pd.read_csv(
331                f'.{run_id}/logs/waiting_in_anchorage_all.csv')
332            all_queue[f'all_{seed}'] = combined['waiting_in_anchorage']
333
334            # Channel utilization
335            total_ships_in_channel = pd.read_csv(
336                f'./.{run_id}/logs/total_ships_in_channel.csv')
337            channel[f'channel_{seed}'] = total_ships_in_channel.iloc[:, 1]
338        except Exception as e:
339            print(f"Error in processing seed {seed} {e}")
340            with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
341                f.write(f"Error in processing seed {seed} {e})\n")
342
343    print("\n")
344    print("Mean ships entered:", sum(ships_entered)/len(ships_entered))
345    print("Mean ships exited:", sum(ships_exited)/len(ships_exited))
346    print("Mean ships entered channel:", sum(
347        ships_entered_channel)/len(ships_entered_channel))
348    print("\n")
349
350    print(f"Mean annualised throughput (million tons): {round(((((sum(ctr_throughput)/len(ctr_throughput)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS))) + ((sum(liq_throughput)/len(liq_throughput)) / (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS))) + (sum(dblk_throughput)/len(dblk_throughput))) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
351    print("\n")
352    print(f"Mean annualised container throughput (million FEU containers): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
353    print(f"Mean annualised container throughput (million tons): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS)) / 1e6, 2)}")
354    print("\n")
355    print(f"Mean annualised liquid throughput (million cbm): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
356    print(f"Mean annualised liquid throughput (million tons): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS)) / 1e6, 2)}")
357    print("\n")
358    print(f"Mean annualised dry bulk throughput (million tons): {round(((sum(dblk_throughput)/len(dblk_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
359
360    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
361        f.write("\n")
362        f.write(
363            f"Mean ships entered: {sum(ships_entered)/len(ships_entered)}\n")
364        f.write(f"Mean ships exited: {sum(ships_exited)/len(ships_exited)}\n")
365        f.write(
366            f"Mean ships entered channel: {sum(ships_entered_channel)/len(ships_entered_channel)}\n")
367        f.write("\n")
368        f.write(
369            f"Mean ships entered for container: {sum(ctr_ships_entered)/len(ctr_ships_entered)}\n")
370        f.write(
371            f"Mean ships exited for container: {sum(ctr_ships_exited)/len(ctr_ships_exited)}\n")
372        f.write(
373            f"Mean ships entered channel for container: {sum(ctr_ships_entered_channel)/len(ctr_ships_entered_channel)}\n")
374        f.write("\n")
375        f.write(
376            f"Mean ships entered for liquid: {sum(liq_ships_entered)/len(liq_ships_entered)}\n")
377        f.write(
378            f"Mean ships exited for liquid: {sum(liq_ships_exited)/len(liq_ships_exited)}\n")
379        f.write(
380            f"Mean ships entered channel for liquid: {sum(liq_ships_entered_channel)/len(liq_ships_entered_channel)}\n")
381        f.write("\n")
382        f.write(
383            f"Mean ships entered for dry bulk: {sum(db_ships_entered)/len(db_ships_entered)}\n")
384        f.write(
385            f"Mean ships exited for dry bulk: {sum(db_ships_exited)/len(db_ships_exited)}\n")
386        f.write(
387            f"Mean ships entered channel for dry bulk: {sum(db_ships_entered_channel)/len(db_ships_entered_channel)}\n")
388        f.write("\n")
389
390        f.write(f"Mean annualised throughput (million tons): {round(((((sum(ctr_throughput)/len(ctr_throughput)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS))) + ((sum(liq_throughput)/len(liq_throughput)) / (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS))) + (sum(dblk_throughput)/len(dblk_throughput))) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
391        f.write(f"Mean annualised container throughput (million FEU containers): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
392        f.write(f"Mean annualised container throughput (million tons): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS)) / 1e6, 2)}\n")
393        f.write(f"Mean annualised liquid throughput (million cbm): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
394        f.write(f"Mean annualised liquid throughput (million tons): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS)) / 1e6, 2)}\n")
395        f.write(f"Mean annualised dry bulk throughput (million tons): {round(((sum(dblk_throughput)/len(dblk_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
396
397
398        f.write("\n")
399
400    # Plot multirun combined anchorage queues
401    print("\nAnchorage queue lengths:\n")
402    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
403        f.write("\nAnchorage queue lengths:\n")
404    list_of_df = [container_queue, liquid_queue, drybulk_queue, all_queue]
405    df_name = ['container', 'liquid', 'drybulk', 'all']
406    for i, df in enumerate(list_of_df):
407        # remove last 5 rows
408        df = df[:-5].copy()
409        df_analysis = pd.DataFrame()
410        ship_type = df_name[i]
411        df_analysis['mean'] = df.mean(axis=1)
412        df_analysis['std'] = df.std(axis=1)
413        df_analysis['max'] = df.max(axis=1)
414        df_analysis['min'] = df.min(axis=1)
415        df_analysis['rolling_mean'] = df_analysis['mean'].rolling(
416            window=200).mean()
417        mean = df_analysis['mean'][WARMUP_ITERS:].mean()
418        plt.plot(df_analysis['mean'], label='Mean')
419        plt.plot(df_analysis['rolling_mean'], label='Rolling Mean')
420        plt.fill_between(
421            df_analysis.index, df_analysis['min'], df_analysis['max'], color='skyblue', alpha=0.4, label='Min-Max Range')
422        plt.fill_between(df_analysis.index, df_analysis['mean'] - df_analysis['std'], df_analysis['mean'] +
423                         df_analysis['std'], color='lightgreen', alpha=0.4, label='Mean ± Std Dev')
424        plt.axvspan(0, WARMUP_ITERS, color='lightgray',
425                    alpha=0.5, label='Warmup Period', zorder=-10)
426        plt.xticks(fontsize=12)
427        plt.yticks(fontsize=12)
428        plt.xlim(left=0)
429        plt.title(f'Anchorage queue of {ship_type} ships', fontsize=18)
430        plt.xlabel('Time (hr)', fontsize=15)
431        plt.ylabel('Number of ships', fontsize=15)
432        plt.grid(True)
433        plt.legend()
434        plt.tight_layout()
435        plt.savefig(
436            f'collatedResults/{logfileid}/queuePlots/Queue_{ship_type}.pdf')
437        # plt.show()
438        plt.close()
439        print(
440            f"Mean (+- std) for anchorage queue of {ship_type} ships: {df_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {df_analysis['std'][WARMUP_ITERS:].mean():.1f})")
441        with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
442            f.write(
443                f"Mean (+- std) for anchorage queue of {ship_type} ships: {df_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {df_analysis['std'][WARMUP_ITERS:].mean():.1f})\n")
444    print("\n")
445
446    mean_queue_length = df_analysis['mean'][WARMUP_ITERS:].mean()
447
448    # Plot wait and processing times with error bars for multirun output
449    print("\nTime taken for different ship types:")
450    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
451        f.write("\nTime taken for different ship types:\n")
452    list_of_df = [dwell, turn, anchorage, cargo, ctr_dwell, ctr_turn, ctr_anchorage, ctr_cargo,
453                  liq_dwell, liq_turn, liq_anchorage, liq_cargo, db_dwell, db_turn, db_anchorage, db_cargo]
454    what = ['dwell', 'turn', 'anchorage', 'cargo'] * 4
455    which = ['all'] * 4 + ['container'] * 4 + ['liquid'] * 4 + ['drybulk'] * 4
456    for i, df in enumerate(list_of_df):
457        ship_type = which[i]
458        time_type = what[i]
459        df['mean'] = df.mean(axis=1)
460        df['std'] = df.std(axis=1)
461
462        # Calculate histogram
463        num_bins = 10
464        bins = np.linspace(df['mean'].min(), df['mean'].max(), num_bins + 1)
465        counts, bin_edges = np.histogram(df['mean'], bins=bins)
466        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
467        std_in_bins = []
468        for i in range(len(bins) - 1):
469            std_in_bins.append(df[(df['mean'] > bins[i]) & (
470                df['mean'] <= bins[i + 1])]['std'].mean())
471
472        plt.bar(bin_centers, counts, width=(
473            bin_edges[1] - bin_edges[0]) * 0.8, align='center', alpha=0.7, label='Histogram')
474        plt.errorbar(bin_centers, counts, yerr=std_in_bins,
475                     fmt='o', color='red', label='Error bars')
476        plt.xlabel('Time (hr)')
477        plt.ylabel('Frequency')
478        plt.title(f'Histogram of {time_type} time for {ship_type} ships')
479        plt.legend()
480        plt.savefig(
481            f'collatedResults/{logfileid}/timePlots/{time_type}_{ship_type}.pdf')
482        plt.close()
483
484        print(
485            f"Mean (+- std) for {time_type} time for {ship_type} ships: {df['mean'].mean():.1f} (+- {df['std'].mean():.1f})")
486        with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
487            f.write(
488                f"Mean (+- std) for {time_type} time for {ship_type} ships: {df['mean'].mean():.1f} (+- {df['std'].mean():.1f})\n")
489
490    # Capacity calculation
491
492    # anchorage queue wait
493    mean_all_anc_wait = anchorage.mean(axis=1).mean()
494
495    # mean queue lengths
496    mean_ctr_queue = container_queue.mean(axis=1).mean()
497    mean_liq_queue = liquid_queue.mean(axis=1).mean()
498    mean_db_queue = drybulk_queue.mean(axis=1).mean()
499    queue_lengths = [mean_ctr_queue, mean_liq_queue, mean_db_queue]
500
501    # arrival rates
502    mean_ctr_arrival = constants.mean_interarrival_time_container
503    mean_liq_arrival = constants.mean_interarrival_time_tanker
504    mean_db_arrival = constants.mean_interarrival_time_gencargo
505    arrival_rates = [1/mean_ctr_arrival, 1/mean_liq_arrival, 1/mean_db_arrival]
506    current_mean_arrival_rate = sum(arrival_rates)
507
508
509    print("\nChannel utilization:")
510    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
511        f.write("\nChannel utilization:\n")
512    channel_analysis = pd.DataFrame()
513    channel_analysis['mean'] = channel.mean(axis=1)
514    channel_analysis['std'] = channel.std(axis=1)
515    channel_analysis['max'] = channel.max(axis=1)
516    channel_analysis['min'] = channel.min(axis=1)
517    channel_analysis['rolling_mean'] = channel_analysis['mean'].rolling(
518        window=200).mean()
519    mean = channel_analysis['mean'][WARMUP_ITERS:].mean()
520    plt.plot(channel_analysis['mean'], label='Mean')
521    plt.plot(channel_analysis['rolling_mean'], label='Rolling Mean')
522    plt.fill_between(channel_analysis.index,
523                     channel_analysis['min'], channel_analysis['max'], color='skyblue', alpha=0.4, label='Min-Max Range')
524    plt.fill_between(channel_analysis.index, channel_analysis['mean'] - channel_analysis['std'],
525                     channel_analysis['mean'] + channel_analysis['std'], color='lightgreen', alpha=0.4, label='Mean ± Std Dev')
526    plt.axvspan(0, WARMUP_ITERS, color='lightgray',
527                alpha=0.5, label='Warmup Period', zorder=-10)
528    plt.xticks(fontsize=12)
529    plt.yticks(fontsize=12)
530    plt.xlim(left=0)
531    plt.title('Channel utilization', fontsize=18)
532    plt.xlabel('Time (hr)', fontsize=15)
533    plt.ylabel('Number of ships', fontsize=15)
534    plt.grid(True)
535    plt.legend()
536    plt.tight_layout()
537    plt.savefig(f'collatedResults/{logfileid}/channel.pdf')
538    plt.close()
539    print(
540        f"Mean (+- std) for channel utilization: {channel_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {channel_analysis['std'][WARMUP_ITERS:].mean():.1f})\n")
541    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
542        f.write(
543            f"Mean (+- std) for channel utilization: {channel_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {channel_analysis['std'][WARMUP_ITERS:].mean():.1f})\n")
544
545    try:
546        output = calculate_mean_wait_time(
547            arrival_rates, queue_lengths, mean_all_anc_wait, logfilename)
548        op_cap = output['Service Rate']
549    except Exception as e:
550        print("Operating capacity calculation failed")
551        print("Error:", e)
552        op_cap = 0.0
553
554    print(f"Operating capacity: {op_cap:.2f} vessels / hr")
555
556    print("SAVING RESULTS")
557    if config.SCENARIO_NAME == 'BottleneckDetection':
558        print("Bottleneck detection scenario, not saving results")
559        if np.isclose(current_mean_arrival_rate, base_arrival_rate, atol=1e-1):
560            with open(f'bottleneckAnalysis/logs/{SCENARIO_NAME}_BASE.txt', 'w') as f: #use the scenario name from constants
561                f.write(f"Operating capacity: {op_cap:.2f} vessels / hr\n")
562                f.write(f"Arrival rate: {current_mean_arrival_rate:.2f} vessels / hr\n")
563                f.write(f"Exited ships: {sum(ships_exited)/len(ships_exited):.2f} vessels / hr\n")
564                f.write(f"Dwell time (Container): {ctr_dwell['mean'].mean():.1f} hr\n")
565                f.write(f"Dwell time (Liquid): {liq_dwell['mean'].mean():.1f} hr\n")
566                f.write(f"Dwell time (Dry Bulk): {db_dwell['mean'].mean():.1f} hr\n")
567                f.write(f"Turn time (Average): {turn['mean'].mean():.1f} hr\n")
568                f.write(f"Anchorage queue length: {mean_queue_length:.2f} ships\n")
569                f.write(f"Anchorage queue length (Container): {mean_ctr_queue:.2f} ships\n")
570                f.write(f"Anchorage queue length (Liquid): {mean_liq_queue:.2f} ships\n")
571                f.write(f"Anchorage queue length (Dry Bulk): {mean_db_queue:.2f} ships\n")      
572                f.write(f"Mean wait time in anchorage: {mean_all_anc_wait:.2f} hr\n")
573                f.write(f"Mean wait time in anchorage (Container): {mean_ctr_queue:.2f} hr\n")
574                f.write(f"Mean wait time in anchorage (Liquid): {mean_liq_queue:.2f} hr\n")
575                f.write(f"Mean wait time in anchorage (Dry Bulk): {mean_db_queue:.2f} hr\n")
576                f.write(f"Mean channel utilization: {channel_analysis['mean'][WARMUP_ITERS:].mean():.1f} ships\n")
577        else:
578            with open(f'bottleneckAnalysis/logs/{SCENARIO_NAME}_{ARRIVAL_INCREASE_FACTOR}.txt', 'w') as f: #use the scenario name from constants
579                f.write("Arrival rate: {:.2f} vessels / hr\n".format(current_mean_arrival_rate))
580                f.write("Exited ships: {:.2f} vessels / hr\n".format(sum(ships_exited)/len(ships_exited)))
581
582    # Cu_opt = calculate_ultimate_capacity()
583
584    if config.SCENARIO_NAME != "Base":
585
586        try:
587            Cu_opt,theta_opt = calculate_ultimate_capacity()
588            with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
589                f.write(f"Ultimate capacity: {Cu_opt:.2f} vessels / hr\n")
590                f.write(f"Optimal theta: {theta_opt:.2f}\n")
591        except Exception as e:
592            print("Ultimate capacity calculation failed")
593            print("Error:", e)
594            print("More simulation runs may be needed to calculate ultimate capacity.")
595    
596
597    # save the dataframes
598    dwell.to_csv(f'collatedResults/{logfileid}/data/dwell.csv')
599    turn.to_csv(f'collatedResults/{logfileid}/data/turn.csv')
600    anchorage.to_csv(f'collatedResults/{logfileid}/data/anchorage.csv')
601    cargo.to_csv(f'collatedResults/{logfileid}/data/cargo.csv')
602
603    ctr_dwell.to_csv(f'collatedResults/{logfileid}/data/ctr_dwell.csv')
604    ctr_turn.to_csv(f'collatedResults/{logfileid}/data/ctr_turn.csv')
605    ctr_anchorage.to_csv(f'collatedResults/{logfileid}/data/ctr_anchorage.csv')
606    ctr_cargo.to_csv(f'collatedResults/{logfileid}/data/ctr_cargo.csv')
607
608    liq_dwell.to_csv(f'collatedResults/{logfileid}/data/liq_dwell.csv')
609    liq_turn.to_csv(f'collatedResults/{logfileid}/data/liq_turn.csv')
610    liq_anchorage.to_csv(f'collatedResults/{logfileid}/data/liq_anchorage.csv')
611    liq_cargo.to_csv(f'collatedResults/{logfileid}/data/liq_cargo.csv')
612
613    db_dwell.to_csv(f'collatedResults/{logfileid}/data/db_dwell.csv')
614    db_turn.to_csv(f'collatedResults/{logfileid}/data/db_turn.csv')
615    db_anchorage.to_csv(f'collatedResults/{logfileid}/data/db_anchorage.csv')
616    db_cargo.to_csv(f'collatedResults/{logfileid}/data/db_cargo.csv')
617
618    container_queue.to_csv(
619        f'collatedResults/{logfileid}/data/container_queue.csv')
620    liquid_queue.to_csv(f'collatedResults/{logfileid}/data/liquid_queue.csv')
621    drybulk_queue.to_csv(f'collatedResults/{logfileid}/data/drybulk_queue.csv')
622    all_queue.to_csv(f'collatedResults/{logfileid}/data/all_queue.csv')
623
624    channel.to_csv(f'collatedResults/{logfileid}/data/channel.csv')
625
626    # restrictions
627
628    # Replace these with your actual list of run_ids
629    run_ids = []
630    for seed in range(constants.START_SEED, constants.START_SEED + num_runs):
631        try:
632            run_id = f"Results_{seed}_{int(constants.NUM_MONTHS)}_months_{constants.ARRIVAL_INCREASE_FACTOR}"
633            run_ids.append(run_id)
634        except:
635            print(f"Error in processing seed {seed} (does not exist)")
636            continue
637
638    in_path_template = '.{}/bottlenecks/Waterway/in_hist_percentage.csv'
639    out_path_template = '.{}/bottlenecks/Waterway/out_hist_percentage.csv'
640    in_hist_list = [pd.read_csv(in_path_template.format(
641        run_id), index_col=0) for run_id in run_ids]
642    out_hist_list = [pd.read_csv(out_path_template.format(
643        run_id), index_col=0) for run_id in run_ids]
644
645    restriction_types = ['Beam', 'Draft', 'Daylight', 'Total']
646
647    def compute_mean_std(hist_list, suffix):
648        means = {}
649        stds = {}
650        for r in restriction_types:
651            col = f"{r} {suffix}"
652            # shape: (bins, seeds)
653            stacked = np.stack([df[col].values for df in hist_list], axis=1)
654            means[r] = np.mean(stacked, axis=1)
655            stds[r] = np.std(stacked, axis=1)
656        bin_labels = hist_list[0].index
657        return means, stds, bin_labels
658
659    mean_in, std_in, index_in = compute_mean_std(in_hist_list, 'In')
660    mean_out, std_out, index_out = compute_mean_std(out_hist_list, 'Out')
661
662    def save_individual_errorbar_pdfs(mean_dict, std_dict, index_labels, suffix, output_folder):
663        os.makedirs(output_folder, exist_ok=True)
664        x = np.arange(len(index_labels))
665        xticks = ['0' if str(label) == '(-1, 0]' else str(label)
666                  for label in index_labels]
667
668        for r in restriction_types:
669            mean_vals = mean_dict[r]
670            std_vals = std_dict[r]
671
672            plt.figure(figsize=(10, 6))
673            plt.bar(x, mean_vals, yerr=std_vals, capsize=5, alpha=0.7)
674            plt.xticks(ticks=x, labels=xticks, rotation=45)
675            plt.ylabel('Percentage', fontsize=12)
676            plt.xlabel('Waiting Time (hr)', fontsize=12)
677            plt.title(
678                f'{r} waiting time distribution ({suffix.lower()})', fontsize=14)
679            plt.tight_layout()
680
681            pdf_path = os.path.join(
682                output_folder, f'{r.lower()}_{suffix.lower()}_errorbar.pdf')
683            plt.savefig(pdf_path)
684            plt.close()
685
686    output_folder = f'collatedResults/{logfileid}/restrictionPlots/'
687
688    save_individual_errorbar_pdfs(
689        mean_in, std_in, index_in, 'In', output_folder)
690    save_individual_errorbar_pdfs(
691        mean_out, std_out, index_out, 'Out', output_folder)
692
693    collate_results_time = round((time.time() - start_time) / 60, 2)
694
695    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
696        f.write(
697            f"\nTotal simulation runtime: {round(total_time + collate_results_time, 2)} minutes\n")
698
699    print(
700        f"Total time taken: {round(total_time + collate_results_time, 2)} minutes\n")
701
702def compute_throughput(
703    logs_fp: str,
704    data_fp: str,
705    *,
706    strict_missing: bool = True,
707    type_map: dict | None = None,
708) -> dict:
709    """
710    Compute throughput for a single run given paths to:
711      - logs_fp: Excel/CSV with 'Ship_Id' and 'End Time'
712      - data_fp: CSV with 'ship_id', 'ship_type',
713                 'num_container_or_liq_tons_or_dry_tons_to_load',
714                 'num_container_or_liq_tons_or_dry_tons_to_unload'
715
716    Returns:
717      {
718        "total": float,
719        "by_type": { "<type>": float, ... },
720        "ships_exited": int,
721        "exited_ids": list[str]
722      }
723
724    Raises:
725      KeyError / ValueError with clear messages if required columns are missing
726      or (if strict_missing=True) any exited ship isn't found in ship_data.
727    """
728    # --- read ---
729    if logs_fp.lower().endswith((".xls", ".xlsx")):
730        df_logs = pd.read_excel(logs_fp)
731    else:
732        df_logs = pd.read_csv(logs_fp)
733
734    df_data = pd.read_csv(data_fp)
735
736    # --- sanity: required columns ---
737    req_logs = {"Ship_Id", "End Time"}
738    req_data = {
739        "ship_id",
740        "ship_type",
741        "num_container_or_liq_tons_or_dry_tons_to_load",
742        "num_container_or_liq_tons_or_dry_tons_to_unload",
743    }
744    missing_logs = req_logs - set(df_logs.columns)
745    missing_data = req_data - set(df_data.columns)
746    if missing_logs:
747        raise KeyError(f"Ship Logs missing columns: {sorted(missing_logs)}")
748    if missing_data:
749        raise KeyError(f"Ship data missing columns: {sorted(missing_data)}")
750
751    # --- normalize ids ---
752    # logs are 0-based ("Ship_0", "Ship_1", ...)
753    df_logs["Ship_Id"] = (
754        df_logs["Ship_Id"]
755        .str.extract(r"(\d+)")[0]   # pull out the number part
756        .astype(int)
757        .add(1)                     # shift to match ship_data (1-based)
758        .astype(str)
759    )
760
761    df_logs["Ship_Id"] = df_logs["Ship_Id"].astype(int)
762    df_data["ship_id"] = df_data["ship_id"].astype(int)
763
764    # --- exited ships set ---
765    exited_ids = (
766        df_logs.loc[df_logs["End Time"].notna(), "Ship_Id"]
767        .dropna()
768        .unique()
769        .tolist()
770    )
771    ships_exited = len(exited_ids)
772
773    # --- subset data to exited; validate coverage ---
774    data_exited = df_data[df_data["ship_id"].isin(exited_ids)].copy()
775    missing = sorted(set(exited_ids) - set(data_exited["ship_id"]))
776    if missing and strict_missing:
777        raise ValueError(
778            f"{len(missing)} exited ship(s) not found in ship_data: "
779            f"{missing[:10]}{' ...' if len(missing) > 10 else ''}"
780        )
781
782    # --- (optional) standardize type labels ---
783    if type_map:
784        data_exited["ship_type"] = data_exited["ship_type"].map(
785            lambda x: type_map.get(x, x)
786        )
787
788    # --- numeric quantities ---
789    for c in [
790        "num_container_or_liq_tons_or_dry_tons_to_load",
791        "num_container_or_liq_tons_or_dry_tons_to_unload",
792    ]:
793        data_exited[c] = pd.to_numeric(data_exited[c], errors="coerce").fillna(0)
794
795    # --- collapse to one row per ship_id (+ keep ship_type) ---
796    grouped = (
797        data_exited.groupby(["ship_id", "ship_type"], as_index=False)[
798            [
799                "num_container_or_liq_tons_or_dry_tons_to_load",
800                "num_container_or_liq_tons_or_dry_tons_to_unload",
801            ]
802        ]
803        .sum()
804    )
805    grouped["ship_throughput"] = (
806        grouped["num_container_or_liq_tons_or_dry_tons_to_load"]
807        + grouped["num_container_or_liq_tons_or_dry_tons_to_unload"]
808    )
809
810    # --- aggregates ---
811    by_type = grouped.groupby("ship_type")["ship_throughput"].sum().to_dict()
812    total = float(grouped["ship_throughput"].sum())
813
814    return {
815        "total": float(total),
816        "by_type": {k: float(v) for k, v in by_type.items()},
817        "ships_exited": ships_exited,
818        "exited_ids": exited_ids,
819    }
820
821# collate_results(4, 12.19)
def collate_results(num_runs, total_time):
 33def collate_results(num_runs, total_time):
 34    """
 35    Collates results from multiple simulation runs and generates various plots and statistics.
 36    The function creates directories for storing results, processes data from each run, 
 37    and generates plots for wait times, queue lengths, and channel utilization.
 38    It also calculates mean wait times, standard deviations, and other statistics for different ship types.
 39    The results are saved in a folder structure under 'collatedResults'.
 40
 41    Args:
 42        num_runs (int): Number of simulation runs to collate.
 43        total_time (int): Total time for the simulation in hours.
 44    Returns:
 45
 46    """
 47    start_time = time.time()
 48
 49    try:
 50        log_num = constants.LOG_NUM
 51    except:
 52        log_num = "TEST"
 53
 54    delete_everything = constants.DELETE_EVERYTHING
 55
 56    logfileid = f"{int(constants.NUM_MONTHS)}_months_{num_runs}_runs_run_{log_num}"
 57
 58    if delete_everything:
 59        os.system('rm -rf collatedResults')
 60
 61    # remove previous results
 62    os.makedirs('collatedResults', exist_ok=True)
 63    os.makedirs(f'collatedResults/{logfileid}/', exist_ok=True)
 64    os.makedirs(f'collatedResults/{logfileid}/data', exist_ok=True)
 65    os.makedirs(f'collatedResults/{logfileid}/timePlots', exist_ok=True)
 66    os.makedirs(f'collatedResults/{logfileid}/queuePlots', exist_ok=True)
 67    os.makedirs(f'collatedResults/{logfileid}/distPlots', exist_ok=True)
 68
 69    logfilename = f'collatedResults/{logfileid}/run_details_{logfileid}.txt'
 70
 71    print("Run details:")
 72    print(f"Number of runs: {num_runs}")
 73    print(f"Simulation time: {constants.NUM_MONTHS} months")
 74    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'w') as f:
 75        f.write("Run details:\n")
 76
 77        f.write(f"Simulation time: {constants.NUM_MONTHS} months\n")
 78        f.write(f"Number of runs: {num_runs}\n")
 79        f.write(f"Start seed: {constants.START_SEED}\n\n")
 80        f.write(f"Expanded channel: {constants.EXPANDED_CHANNEL}\n")
 81        f.write(f"Hurricane scenario: {constants.MODEL_HURRICANE}\n")
 82        f.write(f"Fog scenario: {constants.MODEL_FOG}\n")
 83        f.write("\nConstants:\n")
 84        f.write(
 85            f"Arrival increase factor: {constants.ARRIVAL_INCREASE_FACTOR}\n")
 86        f.write(
 87            f"Anchorage waiting of container: {constants.ANCHORAGE_WAITING_CONTAINER}\n")
 88        f.write(
 89            f"Anchorage waiting of liquid: {constants.ANCHORAGE_WAITING_LIQUID}\n")
 90        f.write(
 91            f"Anchorage waiting of dry bulk: {constants.ANCHORAGE_WAITING_DRYBULK}\n")
 92        f.write(
 93            f"Efficiency of container terminal: {constants.CTR_TERMINAL_EFFICIENCY}\n")
 94        f.write(
 95            f"Efficiency of liquid terminal: {constants.LIQ_TERMINAL_EFFICIENCY}\n")
 96        f.write(
 97            f"Efficiency of dry bulk terminal: {constants.DRYBULK_TERMINAL_EFFICIENCY}\n")
 98        f.write(f"Channel safety two way: {constants.CHANNEL_SAFETWOWAY}\n")
 99        f.write("Arrival rates:\n")
100        f.write(f"Container: {constants.mean_interarrival_time_container}\n")
101        f.write(f"Liquid: {constants.mean_interarrival_time_tanker}\n")
102        f.write(f"Dry Bulk: {constants.mean_interarrival_time_gencargo}\n")
103
104    # distribution plots for each terminal
105    for terminal in ['Container', 'Liquid', 'Dry Bulk']:
106
107        plt.figure(figsize=(10, 6))
108        colors = plt.cm.viridis(np.linspace(0, 1, num_runs))
109        means_for_all_seeds = []
110        stds_for_all_seeds = []
111
112        for seed in range(constants.START_SEED, constants.START_SEED + num_runs):
113
114            try:
115                run_id = f"Results_{seed}_{int(constants.NUM_MONTHS)}_months_{constants.ARRIVAL_INCREASE_FACTOR}"
116                mean_values, min_values, max_values, std_values = get_dists(
117                    run_id, plot=False)
118                error_bars = {'min': mean_values - min_values,
119                              'max': max_values - mean_values}
120                columns = [
121                    'Time to get Berth', 'Restriction In (T)', 'Channel In', 'Time to get Pilot In',
122                    'Time to get Tugs In', 'Terminal Ops', 'Terminal Other', 'Restriction Out (T)',
123                    'Time to get Pilot Out', 'Time to get Tugs Out', 'Channel Out'
124                ]
125                means = mean_values.loc[terminal]
126                min_err = error_bars['min'].loc[terminal]
127                max_err = error_bars['max'].loc[terminal]
128                stds = std_values.loc[terminal]
129
130                means_for_all_seeds.append(means)
131                stds_for_all_seeds.append(stds)
132
133                plt.errorbar(columns, means, yerr=[
134                             min_err, max_err], fmt='-o', capsize=5, label=f'Seed {seed}', color=colors[seed - constants.START_SEED])
135
136            except Exception as e:
137                print(f"Error in processing seed {seed} {e}")
138                print("Error:", e)
139                pass
140
141        plt.xticks(rotation=90)
142        plt.title(f'{terminal} Terminal: Mean with Min/Max and SD')
143        plt.xlabel('Processes')
144        plt.ylabel('Time (hr)')
145        plt.tight_layout()
146        plt.legend()
147        plt.savefig(
148            f'collatedResults/{logfileid}/distPlots/{terminal}_dists.pdf')
149        plt.close()
150
151        # plot the overall mean and std for each process
152        plt.figure(figsize=(10, 6))
153        means_for_all_seeds = np.array(means_for_all_seeds)
154        stds_for_all_seeds = np.array(stds_for_all_seeds)
155        mean = means_for_all_seeds.mean(axis=0)
156        std = stds_for_all_seeds.mean(axis=0)
157        # annotate
158        for i, (m, s) in enumerate(zip(mean, std)):
159            plt.annotate(
160                f'Mean: {m:.1f}\nSD: {s:.1f}',
161                (i, m),
162                textcoords="offset points",
163                xytext=(5, 15),
164                ha='center'
165            )
166        plt.errorbar(columns, mean, yerr=std, fmt='-o',
167                     capsize=5, label='Mean', color='black')
168        plt.xticks(rotation=90)
169        plt.title(f'Overall Mean with SD for {terminal} Terminal')
170        plt.xlabel('Processes')
171        plt.ylabel('Time (hr)')
172        plt.tight_layout()
173        plt.legend()
174        plt.savefig(
175            f'collatedResults/{logfileid}/distPlots/overall_dists_{terminal}.pdf')
176        plt.close()
177
178    # df to store results for different seeds
179    dwell = pd.DataFrame()
180    turn = pd.DataFrame()
181    anchorage = pd.DataFrame()
182    cargo = pd.DataFrame()
183
184    ctr_dwell = pd.DataFrame()
185    ctr_turn = pd.DataFrame()
186    ctr_anchorage = pd.DataFrame()
187    ctr_cargo = pd.DataFrame()
188
189    liq_dwell = pd.DataFrame()
190    liq_turn = pd.DataFrame()
191    liq_anchorage = pd.DataFrame()
192    liq_cargo = pd.DataFrame()
193
194    db_dwell = pd.DataFrame()
195    db_turn = pd.DataFrame()
196    db_anchorage = pd.DataFrame()
197    db_cargo = pd.DataFrame()
198
199    container_queue = pd.DataFrame()
200    liquid_queue = pd.DataFrame()
201    drybulk_queue = pd.DataFrame()
202    all_queue = pd.DataFrame()
203
204    channel = pd.DataFrame()
205
206    ships_entered = []
207    ships_exited = []
208    ships_entered_channel = []
209
210    ctr_ships_entered = []
211    ctr_ships_exited = []
212    ctr_ships_entered_channel = []
213
214    liq_ships_entered = []
215    liq_ships_exited = []
216    liq_ships_entered_channel = []
217
218    db_ships_entered = []
219    db_ships_exited = []
220    db_ships_entered_channel = []
221
222    total_throughput = []
223    ctr_throughput = []
224    liq_throughput = []
225    dblk_throughput = []
226
227    for seed in tqdm(range(constants.START_SEED, constants.START_SEED + num_runs)):
228
229        try:
230            # Wait times
231            run_id = f"Results_{seed}_{int(constants.NUM_MONTHS)}_months_{constants.ARRIVAL_INCREASE_FACTOR}"
232            df_seed = pd.read_excel(f'.{run_id}/logs/ship_logs.xlsx') 
233
234            ships_entered.append(len(df_seed['Start Time'].dropna()))
235            ships_exited.append(len(df_seed['End Time'].dropna()))
236            ships_entered_channel.append(
237                len(df_seed['Time to Common Channel In'].dropna()))
238
239            # Throughput is computed using rows for each ship the number of container/tons etc loaded + unload and sum for all rows (ships)
240
241            logs_fp = f'.{run_id}/logs/ship_logs.xlsx'
242            data_fp = f'.{run_id}/logs/ship_data.csv'
243
244            out = compute_throughput(
245            logs_fp,
246            data_fp,
247            type_map = {"DryBulk":"Dry Bulk"},
248            )
249
250            # Append totals
251            total_throughput.append(out["total"])
252            ctr_throughput.append(out["by_type"].get("Container", 0.0))
253            liq_throughput.append(out["by_type"].get("Liquid", 0.0))
254            dblk_throughput.append(out["by_type"].get("Dry Bulk", 0.0))
255
256            # print(f"Run {seed}: Total Throughput = {out['total']}")
257            # print(f"Run {seed}: Container Throughput = {out['by_type'].get('Container', 0.0)}")
258            # print(f"Run {seed}: Liquid Throughput = {out['by_type'].get('Liquid', 0.0)}")
259            # print(f"Run {seed}: Dry Bulk Throughput = {out['by_type'].get('Dry Bulk', 0.0)}")
260
261            ctr_ships_entered.append(
262                len(df_seed[df_seed['Terminal Type'] == 'Container']['Start Time'].dropna()))
263            ctr_ships_exited.append(
264                len(df_seed[df_seed['Terminal Type'] == 'Container']['End Time'].dropna()))
265            ctr_ships_entered_channel.append(len(
266                df_seed[df_seed['Terminal Type'] == 'Container']['Time to Common Channel In'].dropna()))
267
268            liq_ships_entered.append(
269                len(df_seed[df_seed['Terminal Type'] == 'Liquid']['Start Time'].dropna()))
270            liq_ships_exited.append(
271                len(df_seed[df_seed['Terminal Type'] == 'Liquid']['End Time'].dropna()))
272            liq_ships_entered_channel.append(len(
273                df_seed[df_seed['Terminal Type'] == 'Liquid']['Time to Common Channel In'].dropna()))
274
275            db_ships_entered.append(
276                len(df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Start Time'].dropna()))
277            db_ships_exited.append(
278                len(df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['End Time'].dropna()))
279            db_ships_entered_channel.append(len(
280                df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Time to Common Channel In'].dropna()))
281
282            df_seed = df_seed[WARMUP_ITERS:]
283
284            dwell[f'Dwell_{seed}'] = df_seed['Dwell Time']
285            turn[f'Turn_{seed}'] = df_seed['Turn Time']
286            anchorage[f'Anchorage_{seed}'] = df_seed['Anchorage Time']
287            cargo[f'Cargo_{seed}'] = df_seed['Unloading Time'] + \
288                df_seed['Loading Time']
289
290            ctr_dwell[f'Dwell_{seed}'] = df_seed[df_seed['Terminal Type']
291                                                 == 'Container']['Dwell Time']
292            ctr_turn[f'Turn_{seed}'] = df_seed[df_seed['Terminal Type']
293                                               == 'Container']['Turn Time']
294            ctr_anchorage[f'Anchorage_{seed}'] = df_seed[df_seed['Terminal Type']
295                                                         == 'Container']['Anchorage Time']
296            ctr_cargo[f'Cargo_{seed}'] = df_seed[df_seed['Terminal Type'] == 'Container']['Unloading Time'] + \
297                df_seed[df_seed['Terminal Type']
298                        == 'Container']['Loading Time']
299
300            liq_dwell[f'Dwell_{seed}'] = df_seed[df_seed['Terminal Type']
301                                                 == 'Liquid']['Dwell Time']
302            liq_turn[f'Turn_{seed}'] = df_seed[df_seed['Terminal Type']
303                                               == 'Liquid']['Turn Time']
304            liq_anchorage[f'Anchorage_{seed}'] = df_seed[df_seed['Terminal Type']
305                                                         == 'Liquid']['Anchorage Time']
306            liq_cargo[f'Cargo_{seed}'] = df_seed[df_seed['Terminal Type'] ==
307                                                 'Liquid']['Unloading Time'] + df_seed[df_seed['Terminal Type'] == 'Liquid']['Loading Time']
308
309            db_dwell[f'Dwell_{seed}'] = df_seed[df_seed['Terminal Type']
310                                                == 'Dry Bulk']['Dwell Time']
311            db_turn[f'Turn_{seed}'] = df_seed[df_seed['Terminal Type']
312                                              == 'Dry Bulk']['Turn Time']
313            db_anchorage[f'Anchorage_{seed}'] = df_seed[df_seed['Terminal Type']
314                                                        == 'Dry Bulk']['Anchorage Time']
315            db_cargo[f'Cargo_{seed}'] = df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Unloading Time'] + \
316                df_seed[df_seed['Terminal Type'] == 'Dry Bulk']['Loading Time']
317
318            # Anchorage queue
319            combined = pd.read_csv(
320                f'.{run_id}/logs/waiting_in_anchorage_Container.csv')
321            container_queue[f'ctr_{seed}'] = combined['waiting_in_anchorage']
322
323            combined = pd.read_csv(
324                f'.{run_id}/logs/waiting_in_anchorage_Liquid.csv')
325            liquid_queue[f'liq_{seed}'] = combined['waiting_in_anchorage']
326
327            combined = pd.read_csv(
328                f'.{run_id}/logs/waiting_in_anchorage_DryBulk.csv')
329            drybulk_queue[f'db_{seed}'] = combined['waiting_in_anchorage']
330
331            combined = pd.read_csv(
332                f'.{run_id}/logs/waiting_in_anchorage_all.csv')
333            all_queue[f'all_{seed}'] = combined['waiting_in_anchorage']
334
335            # Channel utilization
336            total_ships_in_channel = pd.read_csv(
337                f'./.{run_id}/logs/total_ships_in_channel.csv')
338            channel[f'channel_{seed}'] = total_ships_in_channel.iloc[:, 1]
339        except Exception as e:
340            print(f"Error in processing seed {seed} {e}")
341            with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
342                f.write(f"Error in processing seed {seed} {e})\n")
343
344    print("\n")
345    print("Mean ships entered:", sum(ships_entered)/len(ships_entered))
346    print("Mean ships exited:", sum(ships_exited)/len(ships_exited))
347    print("Mean ships entered channel:", sum(
348        ships_entered_channel)/len(ships_entered_channel))
349    print("\n")
350
351    print(f"Mean annualised throughput (million tons): {round(((((sum(ctr_throughput)/len(ctr_throughput)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS))) + ((sum(liq_throughput)/len(liq_throughput)) / (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS))) + (sum(dblk_throughput)/len(dblk_throughput))) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
352    print("\n")
353    print(f"Mean annualised container throughput (million FEU containers): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
354    print(f"Mean annualised container throughput (million tons): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS)) / 1e6, 2)}")
355    print("\n")
356    print(f"Mean annualised liquid throughput (million cbm): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
357    print(f"Mean annualised liquid throughput (million tons): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS)) / 1e6, 2)}")
358    print("\n")
359    print(f"Mean annualised dry bulk throughput (million tons): {round(((sum(dblk_throughput)/len(dblk_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}")
360
361    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
362        f.write("\n")
363        f.write(
364            f"Mean ships entered: {sum(ships_entered)/len(ships_entered)}\n")
365        f.write(f"Mean ships exited: {sum(ships_exited)/len(ships_exited)}\n")
366        f.write(
367            f"Mean ships entered channel: {sum(ships_entered_channel)/len(ships_entered_channel)}\n")
368        f.write("\n")
369        f.write(
370            f"Mean ships entered for container: {sum(ctr_ships_entered)/len(ctr_ships_entered)}\n")
371        f.write(
372            f"Mean ships exited for container: {sum(ctr_ships_exited)/len(ctr_ships_exited)}\n")
373        f.write(
374            f"Mean ships entered channel for container: {sum(ctr_ships_entered_channel)/len(ctr_ships_entered_channel)}\n")
375        f.write("\n")
376        f.write(
377            f"Mean ships entered for liquid: {sum(liq_ships_entered)/len(liq_ships_entered)}\n")
378        f.write(
379            f"Mean ships exited for liquid: {sum(liq_ships_exited)/len(liq_ships_exited)}\n")
380        f.write(
381            f"Mean ships entered channel for liquid: {sum(liq_ships_entered_channel)/len(liq_ships_entered_channel)}\n")
382        f.write("\n")
383        f.write(
384            f"Mean ships entered for dry bulk: {sum(db_ships_entered)/len(db_ships_entered)}\n")
385        f.write(
386            f"Mean ships exited for dry bulk: {sum(db_ships_exited)/len(db_ships_exited)}\n")
387        f.write(
388            f"Mean ships entered channel for dry bulk: {sum(db_ships_entered_channel)/len(db_ships_entered_channel)}\n")
389        f.write("\n")
390
391        f.write(f"Mean annualised throughput (million tons): {round(((((sum(ctr_throughput)/len(ctr_throughput)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS))) + ((sum(liq_throughput)/len(liq_throughput)) / (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS))) + (sum(dblk_throughput)/len(dblk_throughput))) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
392        f.write(f"Mean annualised container throughput (million FEU containers): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
393        f.write(f"Mean annualised container throughput (million tons): {round(((sum(ctr_throughput)/len(ctr_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.CONTAINER_CONVERSION_FACTORS)/len(constants.CONTAINER_CONVERSION_FACTORS)) / 1e6, 2)}\n")
394        f.write(f"Mean annualised liquid throughput (million cbm): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
395        f.write(f"Mean annualised liquid throughput (million tons): {round(((sum(liq_throughput)/len(liq_throughput)) * (12/constants.NUM_MONTHS)) * (sum(constants.LIQUID_CONVERSION_FACTORS)/len(constants.LIQUID_CONVERSION_FACTORS)) / 1e6, 2)}\n")
396        f.write(f"Mean annualised dry bulk throughput (million tons): {round(((sum(dblk_throughput)/len(dblk_throughput)) * (12/constants.NUM_MONTHS)) / 1e6, 2)}\n")
397
398
399        f.write("\n")
400
401    # Plot multirun combined anchorage queues
402    print("\nAnchorage queue lengths:\n")
403    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
404        f.write("\nAnchorage queue lengths:\n")
405    list_of_df = [container_queue, liquid_queue, drybulk_queue, all_queue]
406    df_name = ['container', 'liquid', 'drybulk', 'all']
407    for i, df in enumerate(list_of_df):
408        # remove last 5 rows
409        df = df[:-5].copy()
410        df_analysis = pd.DataFrame()
411        ship_type = df_name[i]
412        df_analysis['mean'] = df.mean(axis=1)
413        df_analysis['std'] = df.std(axis=1)
414        df_analysis['max'] = df.max(axis=1)
415        df_analysis['min'] = df.min(axis=1)
416        df_analysis['rolling_mean'] = df_analysis['mean'].rolling(
417            window=200).mean()
418        mean = df_analysis['mean'][WARMUP_ITERS:].mean()
419        plt.plot(df_analysis['mean'], label='Mean')
420        plt.plot(df_analysis['rolling_mean'], label='Rolling Mean')
421        plt.fill_between(
422            df_analysis.index, df_analysis['min'], df_analysis['max'], color='skyblue', alpha=0.4, label='Min-Max Range')
423        plt.fill_between(df_analysis.index, df_analysis['mean'] - df_analysis['std'], df_analysis['mean'] +
424                         df_analysis['std'], color='lightgreen', alpha=0.4, label='Mean ± Std Dev')
425        plt.axvspan(0, WARMUP_ITERS, color='lightgray',
426                    alpha=0.5, label='Warmup Period', zorder=-10)
427        plt.xticks(fontsize=12)
428        plt.yticks(fontsize=12)
429        plt.xlim(left=0)
430        plt.title(f'Anchorage queue of {ship_type} ships', fontsize=18)
431        plt.xlabel('Time (hr)', fontsize=15)
432        plt.ylabel('Number of ships', fontsize=15)
433        plt.grid(True)
434        plt.legend()
435        plt.tight_layout()
436        plt.savefig(
437            f'collatedResults/{logfileid}/queuePlots/Queue_{ship_type}.pdf')
438        # plt.show()
439        plt.close()
440        print(
441            f"Mean (+- std) for anchorage queue of {ship_type} ships: {df_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {df_analysis['std'][WARMUP_ITERS:].mean():.1f})")
442        with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
443            f.write(
444                f"Mean (+- std) for anchorage queue of {ship_type} ships: {df_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {df_analysis['std'][WARMUP_ITERS:].mean():.1f})\n")
445    print("\n")
446
447    mean_queue_length = df_analysis['mean'][WARMUP_ITERS:].mean()
448
449    # Plot wait and processing times with error bars for multirun output
450    print("\nTime taken for different ship types:")
451    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
452        f.write("\nTime taken for different ship types:\n")
453    list_of_df = [dwell, turn, anchorage, cargo, ctr_dwell, ctr_turn, ctr_anchorage, ctr_cargo,
454                  liq_dwell, liq_turn, liq_anchorage, liq_cargo, db_dwell, db_turn, db_anchorage, db_cargo]
455    what = ['dwell', 'turn', 'anchorage', 'cargo'] * 4
456    which = ['all'] * 4 + ['container'] * 4 + ['liquid'] * 4 + ['drybulk'] * 4
457    for i, df in enumerate(list_of_df):
458        ship_type = which[i]
459        time_type = what[i]
460        df['mean'] = df.mean(axis=1)
461        df['std'] = df.std(axis=1)
462
463        # Calculate histogram
464        num_bins = 10
465        bins = np.linspace(df['mean'].min(), df['mean'].max(), num_bins + 1)
466        counts, bin_edges = np.histogram(df['mean'], bins=bins)
467        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
468        std_in_bins = []
469        for i in range(len(bins) - 1):
470            std_in_bins.append(df[(df['mean'] > bins[i]) & (
471                df['mean'] <= bins[i + 1])]['std'].mean())
472
473        plt.bar(bin_centers, counts, width=(
474            bin_edges[1] - bin_edges[0]) * 0.8, align='center', alpha=0.7, label='Histogram')
475        plt.errorbar(bin_centers, counts, yerr=std_in_bins,
476                     fmt='o', color='red', label='Error bars')
477        plt.xlabel('Time (hr)')
478        plt.ylabel('Frequency')
479        plt.title(f'Histogram of {time_type} time for {ship_type} ships')
480        plt.legend()
481        plt.savefig(
482            f'collatedResults/{logfileid}/timePlots/{time_type}_{ship_type}.pdf')
483        plt.close()
484
485        print(
486            f"Mean (+- std) for {time_type} time for {ship_type} ships: {df['mean'].mean():.1f} (+- {df['std'].mean():.1f})")
487        with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
488            f.write(
489                f"Mean (+- std) for {time_type} time for {ship_type} ships: {df['mean'].mean():.1f} (+- {df['std'].mean():.1f})\n")
490
491    # Capacity calculation
492
493    # anchorage queue wait
494    mean_all_anc_wait = anchorage.mean(axis=1).mean()
495
496    # mean queue lengths
497    mean_ctr_queue = container_queue.mean(axis=1).mean()
498    mean_liq_queue = liquid_queue.mean(axis=1).mean()
499    mean_db_queue = drybulk_queue.mean(axis=1).mean()
500    queue_lengths = [mean_ctr_queue, mean_liq_queue, mean_db_queue]
501
502    # arrival rates
503    mean_ctr_arrival = constants.mean_interarrival_time_container
504    mean_liq_arrival = constants.mean_interarrival_time_tanker
505    mean_db_arrival = constants.mean_interarrival_time_gencargo
506    arrival_rates = [1/mean_ctr_arrival, 1/mean_liq_arrival, 1/mean_db_arrival]
507    current_mean_arrival_rate = sum(arrival_rates)
508
509
510    print("\nChannel utilization:")
511    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
512        f.write("\nChannel utilization:\n")
513    channel_analysis = pd.DataFrame()
514    channel_analysis['mean'] = channel.mean(axis=1)
515    channel_analysis['std'] = channel.std(axis=1)
516    channel_analysis['max'] = channel.max(axis=1)
517    channel_analysis['min'] = channel.min(axis=1)
518    channel_analysis['rolling_mean'] = channel_analysis['mean'].rolling(
519        window=200).mean()
520    mean = channel_analysis['mean'][WARMUP_ITERS:].mean()
521    plt.plot(channel_analysis['mean'], label='Mean')
522    plt.plot(channel_analysis['rolling_mean'], label='Rolling Mean')
523    plt.fill_between(channel_analysis.index,
524                     channel_analysis['min'], channel_analysis['max'], color='skyblue', alpha=0.4, label='Min-Max Range')
525    plt.fill_between(channel_analysis.index, channel_analysis['mean'] - channel_analysis['std'],
526                     channel_analysis['mean'] + channel_analysis['std'], color='lightgreen', alpha=0.4, label='Mean ± Std Dev')
527    plt.axvspan(0, WARMUP_ITERS, color='lightgray',
528                alpha=0.5, label='Warmup Period', zorder=-10)
529    plt.xticks(fontsize=12)
530    plt.yticks(fontsize=12)
531    plt.xlim(left=0)
532    plt.title('Channel utilization', fontsize=18)
533    plt.xlabel('Time (hr)', fontsize=15)
534    plt.ylabel('Number of ships', fontsize=15)
535    plt.grid(True)
536    plt.legend()
537    plt.tight_layout()
538    plt.savefig(f'collatedResults/{logfileid}/channel.pdf')
539    plt.close()
540    print(
541        f"Mean (+- std) for channel utilization: {channel_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {channel_analysis['std'][WARMUP_ITERS:].mean():.1f})\n")
542    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
543        f.write(
544            f"Mean (+- std) for channel utilization: {channel_analysis['mean'][WARMUP_ITERS:].mean():.1f} (+- {channel_analysis['std'][WARMUP_ITERS:].mean():.1f})\n")
545
546    try:
547        output = calculate_mean_wait_time(
548            arrival_rates, queue_lengths, mean_all_anc_wait, logfilename)
549        op_cap = output['Service Rate']
550    except Exception as e:
551        print("Operating capacity calculation failed")
552        print("Error:", e)
553        op_cap = 0.0
554
555    print(f"Operating capacity: {op_cap:.2f} vessels / hr")
556
557    print("SAVING RESULTS")
558    if config.SCENARIO_NAME == 'BottleneckDetection':
559        print("Bottleneck detection scenario, not saving results")
560        if np.isclose(current_mean_arrival_rate, base_arrival_rate, atol=1e-1):
561            with open(f'bottleneckAnalysis/logs/{SCENARIO_NAME}_BASE.txt', 'w') as f: #use the scenario name from constants
562                f.write(f"Operating capacity: {op_cap:.2f} vessels / hr\n")
563                f.write(f"Arrival rate: {current_mean_arrival_rate:.2f} vessels / hr\n")
564                f.write(f"Exited ships: {sum(ships_exited)/len(ships_exited):.2f} vessels / hr\n")
565                f.write(f"Dwell time (Container): {ctr_dwell['mean'].mean():.1f} hr\n")
566                f.write(f"Dwell time (Liquid): {liq_dwell['mean'].mean():.1f} hr\n")
567                f.write(f"Dwell time (Dry Bulk): {db_dwell['mean'].mean():.1f} hr\n")
568                f.write(f"Turn time (Average): {turn['mean'].mean():.1f} hr\n")
569                f.write(f"Anchorage queue length: {mean_queue_length:.2f} ships\n")
570                f.write(f"Anchorage queue length (Container): {mean_ctr_queue:.2f} ships\n")
571                f.write(f"Anchorage queue length (Liquid): {mean_liq_queue:.2f} ships\n")
572                f.write(f"Anchorage queue length (Dry Bulk): {mean_db_queue:.2f} ships\n")      
573                f.write(f"Mean wait time in anchorage: {mean_all_anc_wait:.2f} hr\n")
574                f.write(f"Mean wait time in anchorage (Container): {mean_ctr_queue:.2f} hr\n")
575                f.write(f"Mean wait time in anchorage (Liquid): {mean_liq_queue:.2f} hr\n")
576                f.write(f"Mean wait time in anchorage (Dry Bulk): {mean_db_queue:.2f} hr\n")
577                f.write(f"Mean channel utilization: {channel_analysis['mean'][WARMUP_ITERS:].mean():.1f} ships\n")
578        else:
579            with open(f'bottleneckAnalysis/logs/{SCENARIO_NAME}_{ARRIVAL_INCREASE_FACTOR}.txt', 'w') as f: #use the scenario name from constants
580                f.write("Arrival rate: {:.2f} vessels / hr\n".format(current_mean_arrival_rate))
581                f.write("Exited ships: {:.2f} vessels / hr\n".format(sum(ships_exited)/len(ships_exited)))
582
583    # Cu_opt = calculate_ultimate_capacity()
584
585    if config.SCENARIO_NAME != "Base":
586
587        try:
588            Cu_opt,theta_opt = calculate_ultimate_capacity()
589            with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
590                f.write(f"Ultimate capacity: {Cu_opt:.2f} vessels / hr\n")
591                f.write(f"Optimal theta: {theta_opt:.2f}\n")
592        except Exception as e:
593            print("Ultimate capacity calculation failed")
594            print("Error:", e)
595            print("More simulation runs may be needed to calculate ultimate capacity.")
596    
597
598    # save the dataframes
599    dwell.to_csv(f'collatedResults/{logfileid}/data/dwell.csv')
600    turn.to_csv(f'collatedResults/{logfileid}/data/turn.csv')
601    anchorage.to_csv(f'collatedResults/{logfileid}/data/anchorage.csv')
602    cargo.to_csv(f'collatedResults/{logfileid}/data/cargo.csv')
603
604    ctr_dwell.to_csv(f'collatedResults/{logfileid}/data/ctr_dwell.csv')
605    ctr_turn.to_csv(f'collatedResults/{logfileid}/data/ctr_turn.csv')
606    ctr_anchorage.to_csv(f'collatedResults/{logfileid}/data/ctr_anchorage.csv')
607    ctr_cargo.to_csv(f'collatedResults/{logfileid}/data/ctr_cargo.csv')
608
609    liq_dwell.to_csv(f'collatedResults/{logfileid}/data/liq_dwell.csv')
610    liq_turn.to_csv(f'collatedResults/{logfileid}/data/liq_turn.csv')
611    liq_anchorage.to_csv(f'collatedResults/{logfileid}/data/liq_anchorage.csv')
612    liq_cargo.to_csv(f'collatedResults/{logfileid}/data/liq_cargo.csv')
613
614    db_dwell.to_csv(f'collatedResults/{logfileid}/data/db_dwell.csv')
615    db_turn.to_csv(f'collatedResults/{logfileid}/data/db_turn.csv')
616    db_anchorage.to_csv(f'collatedResults/{logfileid}/data/db_anchorage.csv')
617    db_cargo.to_csv(f'collatedResults/{logfileid}/data/db_cargo.csv')
618
619    container_queue.to_csv(
620        f'collatedResults/{logfileid}/data/container_queue.csv')
621    liquid_queue.to_csv(f'collatedResults/{logfileid}/data/liquid_queue.csv')
622    drybulk_queue.to_csv(f'collatedResults/{logfileid}/data/drybulk_queue.csv')
623    all_queue.to_csv(f'collatedResults/{logfileid}/data/all_queue.csv')
624
625    channel.to_csv(f'collatedResults/{logfileid}/data/channel.csv')
626
627    # restrictions
628
629    # Replace these with your actual list of run_ids
630    run_ids = []
631    for seed in range(constants.START_SEED, constants.START_SEED + num_runs):
632        try:
633            run_id = f"Results_{seed}_{int(constants.NUM_MONTHS)}_months_{constants.ARRIVAL_INCREASE_FACTOR}"
634            run_ids.append(run_id)
635        except:
636            print(f"Error in processing seed {seed} (does not exist)")
637            continue
638
639    in_path_template = '.{}/bottlenecks/Waterway/in_hist_percentage.csv'
640    out_path_template = '.{}/bottlenecks/Waterway/out_hist_percentage.csv'
641    in_hist_list = [pd.read_csv(in_path_template.format(
642        run_id), index_col=0) for run_id in run_ids]
643    out_hist_list = [pd.read_csv(out_path_template.format(
644        run_id), index_col=0) for run_id in run_ids]
645
646    restriction_types = ['Beam', 'Draft', 'Daylight', 'Total']
647
648    def compute_mean_std(hist_list, suffix):
649        means = {}
650        stds = {}
651        for r in restriction_types:
652            col = f"{r} {suffix}"
653            # shape: (bins, seeds)
654            stacked = np.stack([df[col].values for df in hist_list], axis=1)
655            means[r] = np.mean(stacked, axis=1)
656            stds[r] = np.std(stacked, axis=1)
657        bin_labels = hist_list[0].index
658        return means, stds, bin_labels
659
660    mean_in, std_in, index_in = compute_mean_std(in_hist_list, 'In')
661    mean_out, std_out, index_out = compute_mean_std(out_hist_list, 'Out')
662
663    def save_individual_errorbar_pdfs(mean_dict, std_dict, index_labels, suffix, output_folder):
664        os.makedirs(output_folder, exist_ok=True)
665        x = np.arange(len(index_labels))
666        xticks = ['0' if str(label) == '(-1, 0]' else str(label)
667                  for label in index_labels]
668
669        for r in restriction_types:
670            mean_vals = mean_dict[r]
671            std_vals = std_dict[r]
672
673            plt.figure(figsize=(10, 6))
674            plt.bar(x, mean_vals, yerr=std_vals, capsize=5, alpha=0.7)
675            plt.xticks(ticks=x, labels=xticks, rotation=45)
676            plt.ylabel('Percentage', fontsize=12)
677            plt.xlabel('Waiting Time (hr)', fontsize=12)
678            plt.title(
679                f'{r} waiting time distribution ({suffix.lower()})', fontsize=14)
680            plt.tight_layout()
681
682            pdf_path = os.path.join(
683                output_folder, f'{r.lower()}_{suffix.lower()}_errorbar.pdf')
684            plt.savefig(pdf_path)
685            plt.close()
686
687    output_folder = f'collatedResults/{logfileid}/restrictionPlots/'
688
689    save_individual_errorbar_pdfs(
690        mean_in, std_in, index_in, 'In', output_folder)
691    save_individual_errorbar_pdfs(
692        mean_out, std_out, index_out, 'Out', output_folder)
693
694    collate_results_time = round((time.time() - start_time) / 60, 2)
695
696    with open(f'collatedResults/{logfileid}/run_details_{logfileid}.txt', 'a') as f:
697        f.write(
698            f"\nTotal simulation runtime: {round(total_time + collate_results_time, 2)} minutes\n")
699
700    print(
701        f"Total time taken: {round(total_time + collate_results_time, 2)} minutes\n")

Collates results from multiple simulation runs and generates various plots and statistics. The function creates directories for storing results, processes data from each run, and generates plots for wait times, queue lengths, and channel utilization. It also calculates mean wait times, standard deviations, and other statistics for different ship types. The results are saved in a folder structure under 'collatedResults'.

Arguments:
  • num_runs (int): Number of simulation runs to collate.
  • total_time (int): Total time for the simulation in hours.

Returns:

def compute_throughput( logs_fp: str, data_fp: str, *, strict_missing: bool = True, type_map: dict | None = None) -> dict:
703def compute_throughput(
704    logs_fp: str,
705    data_fp: str,
706    *,
707    strict_missing: bool = True,
708    type_map: dict | None = None,
709) -> dict:
710    """
711    Compute throughput for a single run given paths to:
712      - logs_fp: Excel/CSV with 'Ship_Id' and 'End Time'
713      - data_fp: CSV with 'ship_id', 'ship_type',
714                 'num_container_or_liq_tons_or_dry_tons_to_load',
715                 'num_container_or_liq_tons_or_dry_tons_to_unload'
716
717    Returns:
718      {
719        "total": float,
720        "by_type": { "<type>": float, ... },
721        "ships_exited": int,
722        "exited_ids": list[str]
723      }
724
725    Raises:
726      KeyError / ValueError with clear messages if required columns are missing
727      or (if strict_missing=True) any exited ship isn't found in ship_data.
728    """
729    # --- read ---
730    if logs_fp.lower().endswith((".xls", ".xlsx")):
731        df_logs = pd.read_excel(logs_fp)
732    else:
733        df_logs = pd.read_csv(logs_fp)
734
735    df_data = pd.read_csv(data_fp)
736
737    # --- sanity: required columns ---
738    req_logs = {"Ship_Id", "End Time"}
739    req_data = {
740        "ship_id",
741        "ship_type",
742        "num_container_or_liq_tons_or_dry_tons_to_load",
743        "num_container_or_liq_tons_or_dry_tons_to_unload",
744    }
745    missing_logs = req_logs - set(df_logs.columns)
746    missing_data = req_data - set(df_data.columns)
747    if missing_logs:
748        raise KeyError(f"Ship Logs missing columns: {sorted(missing_logs)}")
749    if missing_data:
750        raise KeyError(f"Ship data missing columns: {sorted(missing_data)}")
751
752    # --- normalize ids ---
753    # logs are 0-based ("Ship_0", "Ship_1", ...)
754    df_logs["Ship_Id"] = (
755        df_logs["Ship_Id"]
756        .str.extract(r"(\d+)")[0]   # pull out the number part
757        .astype(int)
758        .add(1)                     # shift to match ship_data (1-based)
759        .astype(str)
760    )
761
762    df_logs["Ship_Id"] = df_logs["Ship_Id"].astype(int)
763    df_data["ship_id"] = df_data["ship_id"].astype(int)
764
765    # --- exited ships set ---
766    exited_ids = (
767        df_logs.loc[df_logs["End Time"].notna(), "Ship_Id"]
768        .dropna()
769        .unique()
770        .tolist()
771    )
772    ships_exited = len(exited_ids)
773
774    # --- subset data to exited; validate coverage ---
775    data_exited = df_data[df_data["ship_id"].isin(exited_ids)].copy()
776    missing = sorted(set(exited_ids) - set(data_exited["ship_id"]))
777    if missing and strict_missing:
778        raise ValueError(
779            f"{len(missing)} exited ship(s) not found in ship_data: "
780            f"{missing[:10]}{' ...' if len(missing) > 10 else ''}"
781        )
782
783    # --- (optional) standardize type labels ---
784    if type_map:
785        data_exited["ship_type"] = data_exited["ship_type"].map(
786            lambda x: type_map.get(x, x)
787        )
788
789    # --- numeric quantities ---
790    for c in [
791        "num_container_or_liq_tons_or_dry_tons_to_load",
792        "num_container_or_liq_tons_or_dry_tons_to_unload",
793    ]:
794        data_exited[c] = pd.to_numeric(data_exited[c], errors="coerce").fillna(0)
795
796    # --- collapse to one row per ship_id (+ keep ship_type) ---
797    grouped = (
798        data_exited.groupby(["ship_id", "ship_type"], as_index=False)[
799            [
800                "num_container_or_liq_tons_or_dry_tons_to_load",
801                "num_container_or_liq_tons_or_dry_tons_to_unload",
802            ]
803        ]
804        .sum()
805    )
806    grouped["ship_throughput"] = (
807        grouped["num_container_or_liq_tons_or_dry_tons_to_load"]
808        + grouped["num_container_or_liq_tons_or_dry_tons_to_unload"]
809    )
810
811    # --- aggregates ---
812    by_type = grouped.groupby("ship_type")["ship_throughput"].sum().to_dict()
813    total = float(grouped["ship_throughput"].sum())
814
815    return {
816        "total": float(total),
817        "by_type": {k: float(v) for k, v in by_type.items()},
818        "ships_exited": ships_exited,
819        "exited_ids": exited_ids,
820    }
Compute throughput for a single run given paths to:
  • logs_fp: Excel/CSV with 'Ship_Id' and 'End Time'
  • data_fp: CSV with 'ship_id', 'ship_type', 'num_container_or_liq_tons_or_dry_tons_to_load', 'num_container_or_liq_tons_or_dry_tons_to_unload'
Returns:

{ "total": float, "by_type": { "": float, ... }, "ships_exited": int, "exited_ids": list[str] }

Raises:
  • KeyError / ValueError with clear messages if required columns are missing
  • or (if strict_missing=True) any exited ship isn't found in ship_data.