simulation_analysis.resource_utilization

This module provides functions to analyze bottlenecks in a simulation run. It includes parsing report files, collecting utilization data, plotting utilization trends, saving mean utilization, and analyzing channel restrictions.

View Source

  1"""
  2This module provides functions to analyze bottlenecks in a simulation run.
  3It includes parsing report files, collecting utilization data, plotting utilization trends,
  4saving mean utilization, and analyzing channel restrictions.
  5"""
  6import os
  7import re
  8
  9import pandas as pd
 10import matplotlib.pyplot as plt
 11import numpy as np
 12
 13import constants
 14
 15
 16def parse_report(file_path):
 17    """
 18    Parses a report file to extract the timestep and utilization data for each resource.
 19    Args:
 20        file_path (str): The path to the report file.
 21    Returns:
 22        tuple: A tuple containing the timestep (float) and a dictionary with utilization data.
 23               The dictionary has resource names as keys and another dictionary as values,
 24               which contains terminal names and their respective utilization percentages.
 25    """
 26    with open(file_path, 'r') as file:
 27        lines = file.readlines()
 28
 29    timestep = float(re.search(r"Time Step: (\d+\.\d+)", lines[0]).group(1))
 30
 31    utilization_data = {}
 32    resource = None
 33
 34    for line in lines[1:]:
 35        if line.startswith("Mean "):
 36            resource = line.split("Mean ")[1].split(" Utilization")[0]
 37            utilization_data[resource] = {}
 38        elif resource and ":" in line:
 39            if "Overall" in line:
 40                utilization_data[resource]["Overall"] = float(
 41                    line.split(": ")[1].replace('%', '')) / 100
 42            else:
 43                terminal, utilization = line.split(": ")
 44                utilization_data[resource][terminal] = float(
 45                    utilization.replace('%', '')) / 100
 46
 47    return timestep, utilization_data
 48
 49
 50def collect_data_from_reports(directory):
 51    """
 52    Collects utilization data from all report files in the specified directory.
 53    Args:
 54        directory (str): The path to the directory containing report files.
 55    Returns:
 56        dict: A dictionary where keys are resource names and values are dictionaries containing
 57              timesteps and utilization data for each terminal.
 58    """
 59    data = {}
 60    for filename in sorted(os.listdir(directory)):
 61        if filename.endswith(".txt"):
 62            timestep, utilization_data = parse_report(
 63                os.path.join(directory, filename))
 64            for resource, terminals in utilization_data.items():
 65
 66                if resource not in data:
 67                    data[resource] = {'timestep': [], 'Overall': []}
 68                data[resource]['timestep'].append(timestep)
 69                for terminal, utilization in terminals.items():
 70                    if terminal not in data[resource]:
 71                        data[resource][terminal] = []
 72                    data[resource][terminal].append(utilization)
 73
 74    return data
 75
 76
 77def save_mean_utilization(data, resource, output_dir, run_id):
 78    """
 79    Saves the mean utilization of a specific resource across all terminals to a text file.
 80    Args:
 81        data (dict): The utilization data collected from report files.
 82        resource (str): The name of the resource to analyze.
 83        output_dir (str): The directory where the output file will be saved.
 84        run_id (str): The run identifier for the simulation.
 85    Returns:
 86        None
 87    """
 88    mean_utilization = {terminal: np.mean(utilizations)
 89                        for terminal, utilizations in data[resource].items() if terminal != 'timestep'}
 90
 91    resource_name = resource.split(' ')[1]
 92    terminal_type = resource.split(' ')[0]
 93    output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}'
 94    os.makedirs(output_dir, exist_ok=True)
 95    output_path = os.path.join(output_dir, f"{resource_name}_utilization.txt")
 96
 97    with open(output_path, 'w') as file:
 98        file.write(f"Mean {resource} utilization over time steps:\n")
 99        for terminal, mean_util in mean_utilization.items():
100            file.write(f"{terminal}: {mean_util:.2%}\n")
101        overall_mean = mean_utilization.get('Overall', 0)
102        file.write(f"Overall {resource} utilization: {overall_mean:.2%}\n")
103
104
105def plot_utilization(data, resource, run_id):
106    """
107    Plots the utilization of a specific resource across all terminals over time.
108    Args:
109        data (dict): The utilization data collected from report files.
110        resource (str): The name of the resource to analyze.
111        run_id (str): The run identifier for the simulation.
112    Returns:
113        None
114    """
115    plt.figure(figsize=(8, 6))
116
117    sorted_indices = np.argsort(data[resource]['timestep'])
118    sorted_timesteps = np.array(data[resource]['timestep'])[sorted_indices]
119
120    colors = plt.cm.tab20.colors
121    color_cycle = plt.cycler(color=colors)
122    plt.gca().set_prop_cycle(color_cycle)
123
124    mean = {}
125    for idx, (terminal, utilization) in enumerate(data[resource].items()):
126        if terminal != 'timestep':
127            sorted_utilization = np.array(utilization)[sorted_indices]
128            plt.plot(sorted_timesteps, sorted_utilization, label=terminal)
129            mean[terminal] = round(
130                float(np.mean(sorted_utilization[constants.WARMUP_ITERS:]) * 100), 1)
131
132    items_per_line = 5
133    lines = [
134        ", ".join([f"{key}: {value:.2f}" for key, value in list(
135            mean.items())[i:i + items_per_line]])
136        for i in range(0, len(mean), items_per_line)
137    ]
138
139    formatted_mean = "\n".join(lines)
140
141    # set x and y axis limits
142    plt.ylim([0, 1.0])
143    plt.xlim(left=0)
144
145    plt.xlabel('Time step')
146    plt.ylabel('Utilization')
147    plt.title(f'{resource} utilization')
148    plt.legend(loc='best', ncol=2)
149    plt.grid(True)
150
151    resource_name = resource.split(' ')[1]
152    terminal_type = resource.split(' ')[0]
153    output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}'
154    os.makedirs(output_dir, exist_ok=True)
155    plt.tight_layout()
156    plt.savefig(f'{output_dir}/utilization.pdf')
157    plt.close()
158
159
160def save_utilization_thresholds(data, resource, output_dir):
161    """
162    Saves the percentage of time each terminal's utilization is above specified thresholds.
163    Args:
164        data (dict): The utilization data collected from report files.
165        resource (str): The name of the resource to analyze.
166        output_dir (str): The directory where the output file will be saved.
167    Returns:
168        tuple: A tuple containing the thresholds and a dictionary with utilization percentages for each terminal.
169    """
170    thresholds = [1.00, 0.90, 0.80, 0.70, 0.6, 0.5,
171                  0.4, 0.3, 0.2, 0.1, 0]  # 100%, 90%, 80%, 70%
172    output_path = os.path.join(
173        output_dir, f"{resource}_Utilization_Thresholds.txt")
174    os.makedirs(output_dir, exist_ok=True)
175    utilization_percentages = {}  # To store results for plotting
176
177    with open(output_path, 'w') as file:
178        file.write(f"{resource} Utilization Thresholds:\n")
179
180        for terminal, utilizations in data[resource].items():
181            if terminal != 'timestep':  # Skip the timestep key
182                file.write(f"\nTerminal: {terminal}\n")
183                total_steps = len(utilizations)
184                utilization_percentages[terminal] = []
185                for threshold in thresholds:
186                    count_above_threshold = sum(
187                        1 for u in utilizations if u >= threshold)
188                    percentage_above_threshold = (
189                        count_above_threshold / total_steps) * 100
190                    file.write(
191                        f"Time at or above {threshold * 100:.0f}% utilization: {percentage_above_threshold:.2f}%\n")
192                    utilization_percentages[terminal].append(
193                        percentage_above_threshold)
194
195    return thresholds, utilization_percentages  # Return for plotting
196
197
198def plot_utilization_thresholds(thresholds, utilization_percentages, resource, output_dir):
199    """
200    Plots the percentage of time each terminal's utilization is above specified thresholds.
201    Args:
202        thresholds (list): The list of utilization thresholds.
203        utilization_percentages (dict): A dictionary with utilization percentages for each terminal.
204        resource (str): The name of the resource to analyze.
205        output_dir (str): The directory where the plots will be saved.
206    Returns:
207        None
208    """
209    for terminal, percentages in utilization_percentages.items():
210        plt.figure(figsize=(6, 6))
211        plt.plot([t * 100 for t in thresholds], percentages,
212                 marker='o', linestyle='-', color='b')
213        plt.xlabel('Utilization more than (%)')
214        plt.ylabel('Percentage of time (%)')
215        plt.title(f'{resource} - {terminal} utilization')
216        plt.grid(True)
217
218        # start at zero axis
219        plt.ylim(bottom=0)
220        plt.xlim(left=0)
221        # max at 100
222        plt.ylim(top=100)
223        plt.xlim(right=100)
224
225        # Save plot
226        os.makedirs(output_dir, exist_ok=True)
227        plot_path = os.path.join(
228            output_dir, f"{resource}_{terminal}_Utilization_Thresholds.pdf")
229        plt.savefig(plot_path)
230        plt.close()
231
232
233def parse_restrictions(restriction_str):
234    """
235    Parses a string of restrictions into a dictionary.
236    Args:
237        restriction_str (str): A string containing restrictions in the format "B:1.0, D:2.0, DL:3.0, T:4.0".
238    Returns:
239        dict: A dictionary with restriction types as keys and their values as floats.
240    """
241    restrictions = {}
242    for item in restriction_str.split(', '):
243        key, value = item.split(':')
244        restrictions[key] = float(value)
245    return restrictions
246
247
248def save_individual_restriction_plots(data_in, data_out, title, folder_name):
249    """
250    Saves individual restriction plots for "In" and "Out" phases.
251    Args:
252        data_in (pd.Series): Series containing the "In" phase data.
253        data_out (pd.Series): Series containing the "Out" phase data.
254        title (str): Title for the plots.
255        folder_name (str): Folder name where the plots will be saved.
256    Returns:
257        None
258    """
259    os.makedirs(folder_name, exist_ok=True)
260
261    # Custom tick labels: replace (-1, 0] with 0 and keep the rest as they are
262    custom_ticks = ['0' if str(label) == '(-1, 0]' else str(label)
263                    for label in data_in.index]
264
265    # Plot for "In" restriction
266    ax = data_in.plot(kind='bar', figsize=(10, 6))
267    plt.title(f'{title} waiting time distribution (in)')
268    plt.ylabel('Percentage')
269    plt.xlabel('Waiting time (hr)')
270    plt.xticks(ticks=range(len(data_in.index)),
271               labels=custom_ticks, rotation=45)
272
273    # Adding percentage labels on top of each bar
274    for container in ax.containers:
275        ax.bar_label(container, fmt='%.1f', label_type='edge')
276
277    plt.tight_layout()
278    plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_in.png')
279    plt.close()
280
281    # Plot for "Out" restriction
282    custom_ticks_out = ['0' if str(
283        label) == '(-1, 0]' else str(label) for label in data_out.index]
284    ax = data_out.plot(kind='bar', figsize=(10, 6))
285    plt.title(f'{title} waiting time distribution (out)')
286    plt.ylabel('Percentage')
287    plt.xlabel('Waiting time (hr)')
288    plt.xticks(ticks=range(len(data_out.index)),
289               labels=custom_ticks_out, rotation=45)
290
291    # Adding percentage labels on top of each bar
292    for container in ax.containers:
293        ax.bar_label(container, fmt='%.1f', label_type='edge')
294
295    plt.tight_layout()
296    plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_out.png')
297    plt.close()
298
299
300def channel_restriction_analysis(run_id):
301    """
302    Analyzes channel restrictions based on the ship logs and generates histograms and plots.
303    Args:
304        run_id (str): The run identifier for the simulation.
305    Returns:
306        None
307    """
308    # Load the dataset
309    df = pd.read_excel(f'.{run_id}/logs/ship_logs.xlsx')
310    os.makedirs(f'.{run_id}/bottlenecks/Waterway', exist_ok=True)
311
312    # Filtering rows where restriction values are strings
313    df_filtered = df[(df['Time for Restriction In'].apply(lambda x: isinstance(x, str))) &
314                     (df['Time for Restriction Out'].apply(lambda x: isinstance(x, str)))].copy()
315
316    # Applying the parsing function to the filtered dataframe
317    df_filtered.loc[:, 'Restrictions_In'] = df_filtered['Time for Restriction In'].apply(
318        parse_restrictions)
319    df_filtered.loc[:, 'Restrictions_Out'] = df_filtered['Time for Restriction Out'].apply(
320        parse_restrictions)
321
322    # Extracting the individual components for analysis
323    restriction_in_df_filtered = pd.json_normalize(
324        df_filtered['Restrictions_In'])
325    restriction_out_df_filtered = pd.json_normalize(
326        df_filtered['Restrictions_Out'])
327
328    # Remove the "Q" column from both the filtered DataFrames if present
329    restriction_in_df_filtered = restriction_in_df_filtered.drop(
330        columns=['Q'], errors='ignore')
331    restriction_out_df_filtered = restriction_out_df_filtered.drop(
332        columns=['Q'], errors='ignore')
333
334    # Creating histograms for each restriction category in the "in" and "out" phases, including "T"
335    bins = [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
336
337    beam_in_hist, _ = pd.cut(
338        restriction_in_df_filtered['B'], bins=bins, right=True, retbins=True)
339    draft_in_hist, _ = pd.cut(
340        restriction_in_df_filtered['D'], bins=bins, right=True, retbins=True)
341    daylight_in_hist, _ = pd.cut(
342        restriction_in_df_filtered['DL'], bins=bins, right=True, retbins=True)
343    total_in_hist, _ = pd.cut(
344        restriction_in_df_filtered['T'], bins=bins, right=True, retbins=True)
345
346    beam_out_hist, _ = pd.cut(
347        restriction_out_df_filtered['B'], bins=bins, right=True, retbins=True)
348    draft_out_hist, _ = pd.cut(
349        restriction_out_df_filtered['D'], bins=bins, right=True, retbins=True)
350    daylight_out_hist, _ = pd.cut(
351        restriction_out_df_filtered['DL'], bins=bins, right=True, retbins=True)
352    total_out_hist, _ = pd.cut(
353        restriction_out_df_filtered['T'], bins=bins, right=True, retbins=True)
354
355    # Creating dataframes for each restriction category's counts, including "T"
356    in_hist_df = pd.DataFrame({
357        'Beam In': beam_in_hist.value_counts().sort_index(),
358        'Draft In': draft_in_hist.value_counts().sort_index(),
359        'Daylight In': daylight_in_hist.value_counts().sort_index(),
360        'Total In': total_in_hist.value_counts().sort_index()
361    })
362
363    out_hist_df = pd.DataFrame({
364        'Beam Out': beam_out_hist.value_counts().sort_index(),
365        'Draft Out': draft_out_hist.value_counts().sort_index(),
366        'Daylight Out': daylight_out_hist.value_counts().sort_index(),
367        'Total Out': total_out_hist.value_counts().sort_index()
368    })
369
370    # Convert counts to percentages
371    total_in = in_hist_df.sum()
372    total_out = out_hist_df.sum()
373    in_hist_percentage_df = (in_hist_df / total_in) * 100
374    out_hist_percentage_df = (out_hist_df / total_out) * 100
375
376    # Save analysis to a text file, including "T"
377    output_text_file = f'.{run_id}/bottlenecks/Waterway/restriction_simulation_analysis.txt'
378
379    analysis_text = f"""
380    Percentage distributions of the waiting times for each restriction category across hourly bins, including Total:
381
382    **In Phase (Percentages):**
383    {in_hist_percentage_df.to_string()}
384
385    **Out Phase (Percentages):**
386    {out_hist_percentage_df.to_string()}
387    """
388
389    with open(output_text_file, 'w') as file:
390        file.write(analysis_text)
391
392    # save two dataframes to csv
393    in_hist_percentage_df.to_csv(
394        f'.{run_id}/bottlenecks/Waterway/in_hist_percentage.csv')
395    out_hist_percentage_df.to_csv(
396        f'.{run_id}/bottlenecks/Waterway/out_hist_percentage.csv')
397
398    # Saving plots for each restriction separately, including "T"
399    save_individual_restriction_plots(
400        in_hist_percentage_df['Beam In'], out_hist_percentage_df['Beam Out'], 'Beam', f'.{run_id}/bottlenecks/Waterway/beam')
401    save_individual_restriction_plots(
402        in_hist_percentage_df['Draft In'], out_hist_percentage_df['Draft Out'], 'Draft', f'.{run_id}/bottlenecks/Waterway/draft')
403    save_individual_restriction_plots(
404        in_hist_percentage_df['Daylight In'], out_hist_percentage_df['Daylight Out'], 'Daylight', f'.{run_id}/bottlenecks/Waterway/daylight')
405    save_individual_restriction_plots(
406        in_hist_percentage_df['Total In'], out_hist_percentage_df['Total Out'], 'Total', f'.{run_id}/bottlenecks/Waterway/total')
407
408
409def terminal_analysis(run_id):
410    """
411    Analyzes terminal utilization data and generates plots and reports.
412    Args:
413        run_id (str): The run identifier for the simulation.
414    Returns:
415        None
416    """
417
418    data = collect_data_from_reports(directory=f'.{run_id}/logs/availability/')
419    for resource in data.keys():
420        plot_utilization(data, resource, run_id)
421        save_mean_utilization(
422            data, resource, output_dir='./logs/', run_id=run_id)
423        resource_name = resource.split(' ')[1]
424        terminal_type = resource.split(' ')[0]
425        thresholds, utilization_percentages = save_utilization_thresholds(
426            data, resource, output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}')
427        plot_utilization_thresholds(thresholds, utilization_percentages, resource,
428                                    output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}/thresholds')
429
430
431def bottleneckAnalysis(run_id):
432    """
433    Main function to run the bottleneck simulation_analysis.
434    Args:
435        run_id (str): The run identifier for the simulation.
436    Returns:
437        None
438    """
439    terminal_analysis(run_id)
440    channel_restriction_analysis(run_id)

def parse_report(file_path): View Source

17def parse_report(file_path):
18    """
19    Parses a report file to extract the timestep and utilization data for each resource.
20    Args:
21        file_path (str): The path to the report file.
22    Returns:
23        tuple: A tuple containing the timestep (float) and a dictionary with utilization data.
24               The dictionary has resource names as keys and another dictionary as values,
25               which contains terminal names and their respective utilization percentages.
26    """
27    with open(file_path, 'r') as file:
28        lines = file.readlines()
29
30    timestep = float(re.search(r"Time Step: (\d+\.\d+)", lines[0]).group(1))
31
32    utilization_data = {}
33    resource = None
34
35    for line in lines[1:]:
36        if line.startswith("Mean "):
37            resource = line.split("Mean ")[1].split(" Utilization")[0]
38            utilization_data[resource] = {}
39        elif resource and ":" in line:
40            if "Overall" in line:
41                utilization_data[resource]["Overall"] = float(
42                    line.split(": ")[1].replace('%', '')) / 100
43            else:
44                terminal, utilization = line.split(": ")
45                utilization_data[resource][terminal] = float(
46                    utilization.replace('%', '')) / 100
47
48    return timestep, utilization_data

Parses a report file to extract the timestep and utilization data for each resource.

Arguments:

file_path (str): The path to the report file.

Returns:

def collect_data_from_reports(directory): View Source

51def collect_data_from_reports(directory):
52    """
53    Collects utilization data from all report files in the specified directory.
54    Args:
55        directory (str): The path to the directory containing report files.
56    Returns:
57        dict: A dictionary where keys are resource names and values are dictionaries containing
58              timesteps and utilization data for each terminal.
59    """
60    data = {}
61    for filename in sorted(os.listdir(directory)):
62        if filename.endswith(".txt"):
63            timestep, utilization_data = parse_report(
64                os.path.join(directory, filename))
65            for resource, terminals in utilization_data.items():
66
67                if resource not in data:
68                    data[resource] = {'timestep': [], 'Overall': []}
69                data[resource]['timestep'].append(timestep)
70                for terminal, utilization in terminals.items():
71                    if terminal not in data[resource]:
72                        data[resource][terminal] = []
73                    data[resource][terminal].append(utilization)
74
75    return data

Collects utilization data from all report files in the specified directory.

Arguments:

directory (str): The path to the directory containing report files.

Returns:

def save_mean_utilization(data, resource, output_dir, run_id): View Source

 78def save_mean_utilization(data, resource, output_dir, run_id):
 79    """
 80    Saves the mean utilization of a specific resource across all terminals to a text file.
 81    Args:
 82        data (dict): The utilization data collected from report files.
 83        resource (str): The name of the resource to analyze.
 84        output_dir (str): The directory where the output file will be saved.
 85        run_id (str): The run identifier for the simulation.
 86    Returns:
 87        None
 88    """
 89    mean_utilization = {terminal: np.mean(utilizations)
 90                        for terminal, utilizations in data[resource].items() if terminal != 'timestep'}
 91
 92    resource_name = resource.split(' ')[1]
 93    terminal_type = resource.split(' ')[0]
 94    output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}'
 95    os.makedirs(output_dir, exist_ok=True)
 96    output_path = os.path.join(output_dir, f"{resource_name}_utilization.txt")
 97
 98    with open(output_path, 'w') as file:
 99        file.write(f"Mean {resource} utilization over time steps:\n")
100        for terminal, mean_util in mean_utilization.items():
101            file.write(f"{terminal}: {mean_util:.2%}\n")
102        overall_mean = mean_utilization.get('Overall', 0)
103        file.write(f"Overall {resource} utilization: {overall_mean:.2%}\n")

Saves the mean utilization of a specific resource across all terminals to a text file.

Arguments:

data (dict): The utilization data collected from report files.
resource (str): The name of the resource to analyze.
output_dir (str): The directory where the output file will be saved.
run_id (str): The run identifier for the simulation.

Returns:

def plot_utilization(data, resource, run_id): View Source

106def plot_utilization(data, resource, run_id):
107    """
108    Plots the utilization of a specific resource across all terminals over time.
109    Args:
110        data (dict): The utilization data collected from report files.
111        resource (str): The name of the resource to analyze.
112        run_id (str): The run identifier for the simulation.
113    Returns:
114        None
115    """
116    plt.figure(figsize=(8, 6))
117
118    sorted_indices = np.argsort(data[resource]['timestep'])
119    sorted_timesteps = np.array(data[resource]['timestep'])[sorted_indices]
120
121    colors = plt.cm.tab20.colors
122    color_cycle = plt.cycler(color=colors)
123    plt.gca().set_prop_cycle(color_cycle)
124
125    mean = {}
126    for idx, (terminal, utilization) in enumerate(data[resource].items()):
127        if terminal != 'timestep':
128            sorted_utilization = np.array(utilization)[sorted_indices]
129            plt.plot(sorted_timesteps, sorted_utilization, label=terminal)
130            mean[terminal] = round(
131                float(np.mean(sorted_utilization[constants.WARMUP_ITERS:]) * 100), 1)
132
133    items_per_line = 5
134    lines = [
135        ", ".join([f"{key}: {value:.2f}" for key, value in list(
136            mean.items())[i:i + items_per_line]])
137        for i in range(0, len(mean), items_per_line)
138    ]
139
140    formatted_mean = "\n".join(lines)
141
142    # set x and y axis limits
143    plt.ylim([0, 1.0])
144    plt.xlim(left=0)
145
146    plt.xlabel('Time step')
147    plt.ylabel('Utilization')
148    plt.title(f'{resource} utilization')
149    plt.legend(loc='best', ncol=2)
150    plt.grid(True)
151
152    resource_name = resource.split(' ')[1]
153    terminal_type = resource.split(' ')[0]
154    output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}'
155    os.makedirs(output_dir, exist_ok=True)
156    plt.tight_layout()
157    plt.savefig(f'{output_dir}/utilization.pdf')
158    plt.close()

Plots the utilization of a specific resource across all terminals over time.

Arguments:

data (dict): The utilization data collected from report files.
resource (str): The name of the resource to analyze.
run_id (str): The run identifier for the simulation.

Returns:

def save_utilization_thresholds(data, resource, output_dir): View Source

161def save_utilization_thresholds(data, resource, output_dir):
162    """
163    Saves the percentage of time each terminal's utilization is above specified thresholds.
164    Args:
165        data (dict): The utilization data collected from report files.
166        resource (str): The name of the resource to analyze.
167        output_dir (str): The directory where the output file will be saved.
168    Returns:
169        tuple: A tuple containing the thresholds and a dictionary with utilization percentages for each terminal.
170    """
171    thresholds = [1.00, 0.90, 0.80, 0.70, 0.6, 0.5,
172                  0.4, 0.3, 0.2, 0.1, 0]  # 100%, 90%, 80%, 70%
173    output_path = os.path.join(
174        output_dir, f"{resource}_Utilization_Thresholds.txt")
175    os.makedirs(output_dir, exist_ok=True)
176    utilization_percentages = {}  # To store results for plotting
177
178    with open(output_path, 'w') as file:
179        file.write(f"{resource} Utilization Thresholds:\n")
180
181        for terminal, utilizations in data[resource].items():
182            if terminal != 'timestep':  # Skip the timestep key
183                file.write(f"\nTerminal: {terminal}\n")
184                total_steps = len(utilizations)
185                utilization_percentages[terminal] = []
186                for threshold in thresholds:
187                    count_above_threshold = sum(
188                        1 for u in utilizations if u >= threshold)
189                    percentage_above_threshold = (
190                        count_above_threshold / total_steps) * 100
191                    file.write(
192                        f"Time at or above {threshold * 100:.0f}% utilization: {percentage_above_threshold:.2f}%\n")
193                    utilization_percentages[terminal].append(
194                        percentage_above_threshold)
195
196    return thresholds, utilization_percentages  # Return for plotting

Saves the percentage of time each terminal's utilization is above specified thresholds.

Arguments:

data (dict): The utilization data collected from report files.
resource (str): The name of the resource to analyze.
output_dir (str): The directory where the output file will be saved.

Returns:

def plot_utilization_thresholds(thresholds, utilization_percentages, resource, output_dir): View Source

199def plot_utilization_thresholds(thresholds, utilization_percentages, resource, output_dir):
200    """
201    Plots the percentage of time each terminal's utilization is above specified thresholds.
202    Args:
203        thresholds (list): The list of utilization thresholds.
204        utilization_percentages (dict): A dictionary with utilization percentages for each terminal.
205        resource (str): The name of the resource to analyze.
206        output_dir (str): The directory where the plots will be saved.
207    Returns:
208        None
209    """
210    for terminal, percentages in utilization_percentages.items():
211        plt.figure(figsize=(6, 6))
212        plt.plot([t * 100 for t in thresholds], percentages,
213                 marker='o', linestyle='-', color='b')
214        plt.xlabel('Utilization more than (%)')
215        plt.ylabel('Percentage of time (%)')
216        plt.title(f'{resource} - {terminal} utilization')
217        plt.grid(True)
218
219        # start at zero axis
220        plt.ylim(bottom=0)
221        plt.xlim(left=0)
222        # max at 100
223        plt.ylim(top=100)
224        plt.xlim(right=100)
225
226        # Save plot
227        os.makedirs(output_dir, exist_ok=True)
228        plot_path = os.path.join(
229            output_dir, f"{resource}_{terminal}_Utilization_Thresholds.pdf")
230        plt.savefig(plot_path)
231        plt.close()

Plots the percentage of time each terminal's utilization is above specified thresholds.

Arguments:

thresholds (list): The list of utilization thresholds.
utilization_percentages (dict): A dictionary with utilization percentages for each terminal.
resource (str): The name of the resource to analyze.
output_dir (str): The directory where the plots will be saved.

Returns:

def parse_restrictions(restriction_str): View Source

234def parse_restrictions(restriction_str):
235    """
236    Parses a string of restrictions into a dictionary.
237    Args:
238        restriction_str (str): A string containing restrictions in the format "B:1.0, D:2.0, DL:3.0, T:4.0".
239    Returns:
240        dict: A dictionary with restriction types as keys and their values as floats.
241    """
242    restrictions = {}
243    for item in restriction_str.split(', '):
244        key, value = item.split(':')
245        restrictions[key] = float(value)
246    return restrictions

Parses a string of restrictions into a dictionary.

Arguments:

restriction_str (str): A string containing restrictions in the format "B:1.0, D:2.0, DL:3.0, T:4.0".

Returns:

def save_individual_restriction_plots(data_in, data_out, title, folder_name): View Source

249def save_individual_restriction_plots(data_in, data_out, title, folder_name):
250    """
251    Saves individual restriction plots for "In" and "Out" phases.
252    Args:
253        data_in (pd.Series): Series containing the "In" phase data.
254        data_out (pd.Series): Series containing the "Out" phase data.
255        title (str): Title for the plots.
256        folder_name (str): Folder name where the plots will be saved.
257    Returns:
258        None
259    """
260    os.makedirs(folder_name, exist_ok=True)
261
262    # Custom tick labels: replace (-1, 0] with 0 and keep the rest as they are
263    custom_ticks = ['0' if str(label) == '(-1, 0]' else str(label)
264                    for label in data_in.index]
265
266    # Plot for "In" restriction
267    ax = data_in.plot(kind='bar', figsize=(10, 6))
268    plt.title(f'{title} waiting time distribution (in)')
269    plt.ylabel('Percentage')
270    plt.xlabel('Waiting time (hr)')
271    plt.xticks(ticks=range(len(data_in.index)),
272               labels=custom_ticks, rotation=45)
273
274    # Adding percentage labels on top of each bar
275    for container in ax.containers:
276        ax.bar_label(container, fmt='%.1f', label_type='edge')
277
278    plt.tight_layout()
279    plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_in.png')
280    plt.close()
281
282    # Plot for "Out" restriction
283    custom_ticks_out = ['0' if str(
284        label) == '(-1, 0]' else str(label) for label in data_out.index]
285    ax = data_out.plot(kind='bar', figsize=(10, 6))
286    plt.title(f'{title} waiting time distribution (out)')
287    plt.ylabel('Percentage')
288    plt.xlabel('Waiting time (hr)')
289    plt.xticks(ticks=range(len(data_out.index)),
290               labels=custom_ticks_out, rotation=45)
291
292    # Adding percentage labels on top of each bar
293    for container in ax.containers:
294        ax.bar_label(container, fmt='%.1f', label_type='edge')
295
296    plt.tight_layout()
297    plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_out.png')
298    plt.close()

Saves individual restriction plots for "In" and "Out" phases.

Arguments:

data_in (pd.Series): Series containing the "In" phase data.
data_out (pd.Series): Series containing the "Out" phase data.
title (str): Title for the plots.
folder_name (str): Folder name where the plots will be saved.

Returns:

def channel_restriction_analysis(run_id): View Source

301def channel_restriction_analysis(run_id):
302    """
303    Analyzes channel restrictions based on the ship logs and generates histograms and plots.
304    Args:
305        run_id (str): The run identifier for the simulation.
306    Returns:
307        None
308    """
309    # Load the dataset
310    df = pd.read_excel(f'.{run_id}/logs/ship_logs.xlsx')
311    os.makedirs(f'.{run_id}/bottlenecks/Waterway', exist_ok=True)
312
313    # Filtering rows where restriction values are strings
314    df_filtered = df[(df['Time for Restriction In'].apply(lambda x: isinstance(x, str))) &
315                     (df['Time for Restriction Out'].apply(lambda x: isinstance(x, str)))].copy()
316
317    # Applying the parsing function to the filtered dataframe
318    df_filtered.loc[:, 'Restrictions_In'] = df_filtered['Time for Restriction In'].apply(
319        parse_restrictions)
320    df_filtered.loc[:, 'Restrictions_Out'] = df_filtered['Time for Restriction Out'].apply(
321        parse_restrictions)
322
323    # Extracting the individual components for analysis
324    restriction_in_df_filtered = pd.json_normalize(
325        df_filtered['Restrictions_In'])
326    restriction_out_df_filtered = pd.json_normalize(
327        df_filtered['Restrictions_Out'])
328
329    # Remove the "Q" column from both the filtered DataFrames if present
330    restriction_in_df_filtered = restriction_in_df_filtered.drop(
331        columns=['Q'], errors='ignore')
332    restriction_out_df_filtered = restriction_out_df_filtered.drop(
333        columns=['Q'], errors='ignore')
334
335    # Creating histograms for each restriction category in the "in" and "out" phases, including "T"
336    bins = [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
337
338    beam_in_hist, _ = pd.cut(
339        restriction_in_df_filtered['B'], bins=bins, right=True, retbins=True)
340    draft_in_hist, _ = pd.cut(
341        restriction_in_df_filtered['D'], bins=bins, right=True, retbins=True)
342    daylight_in_hist, _ = pd.cut(
343        restriction_in_df_filtered['DL'], bins=bins, right=True, retbins=True)
344    total_in_hist, _ = pd.cut(
345        restriction_in_df_filtered['T'], bins=bins, right=True, retbins=True)
346
347    beam_out_hist, _ = pd.cut(
348        restriction_out_df_filtered['B'], bins=bins, right=True, retbins=True)
349    draft_out_hist, _ = pd.cut(
350        restriction_out_df_filtered['D'], bins=bins, right=True, retbins=True)
351    daylight_out_hist, _ = pd.cut(
352        restriction_out_df_filtered['DL'], bins=bins, right=True, retbins=True)
353    total_out_hist, _ = pd.cut(
354        restriction_out_df_filtered['T'], bins=bins, right=True, retbins=True)
355
356    # Creating dataframes for each restriction category's counts, including "T"
357    in_hist_df = pd.DataFrame({
358        'Beam In': beam_in_hist.value_counts().sort_index(),
359        'Draft In': draft_in_hist.value_counts().sort_index(),
360        'Daylight In': daylight_in_hist.value_counts().sort_index(),
361        'Total In': total_in_hist.value_counts().sort_index()
362    })
363
364    out_hist_df = pd.DataFrame({
365        'Beam Out': beam_out_hist.value_counts().sort_index(),
366        'Draft Out': draft_out_hist.value_counts().sort_index(),
367        'Daylight Out': daylight_out_hist.value_counts().sort_index(),
368        'Total Out': total_out_hist.value_counts().sort_index()
369    })
370
371    # Convert counts to percentages
372    total_in = in_hist_df.sum()
373    total_out = out_hist_df.sum()
374    in_hist_percentage_df = (in_hist_df / total_in) * 100
375    out_hist_percentage_df = (out_hist_df / total_out) * 100
376
377    # Save analysis to a text file, including "T"
378    output_text_file = f'.{run_id}/bottlenecks/Waterway/restriction_simulation_analysis.txt'
379
380    analysis_text = f"""
381    Percentage distributions of the waiting times for each restriction category across hourly bins, including Total:
382
383    **In Phase (Percentages):**
384    {in_hist_percentage_df.to_string()}
385
386    **Out Phase (Percentages):**
387    {out_hist_percentage_df.to_string()}
388    """
389
390    with open(output_text_file, 'w') as file:
391        file.write(analysis_text)
392
393    # save two dataframes to csv
394    in_hist_percentage_df.to_csv(
395        f'.{run_id}/bottlenecks/Waterway/in_hist_percentage.csv')
396    out_hist_percentage_df.to_csv(
397        f'.{run_id}/bottlenecks/Waterway/out_hist_percentage.csv')
398
399    # Saving plots for each restriction separately, including "T"
400    save_individual_restriction_plots(
401        in_hist_percentage_df['Beam In'], out_hist_percentage_df['Beam Out'], 'Beam', f'.{run_id}/bottlenecks/Waterway/beam')
402    save_individual_restriction_plots(
403        in_hist_percentage_df['Draft In'], out_hist_percentage_df['Draft Out'], 'Draft', f'.{run_id}/bottlenecks/Waterway/draft')
404    save_individual_restriction_plots(
405        in_hist_percentage_df['Daylight In'], out_hist_percentage_df['Daylight Out'], 'Daylight', f'.{run_id}/bottlenecks/Waterway/daylight')
406    save_individual_restriction_plots(
407        in_hist_percentage_df['Total In'], out_hist_percentage_df['Total Out'], 'Total', f'.{run_id}/bottlenecks/Waterway/total')

Analyzes channel restrictions based on the ship logs and generates histograms and plots.

Arguments:

run_id (str): The run identifier for the simulation.

Returns:

def terminal_analysis(run_id): View Source

410def terminal_analysis(run_id):
411    """
412    Analyzes terminal utilization data and generates plots and reports.
413    Args:
414        run_id (str): The run identifier for the simulation.
415    Returns:
416        None
417    """
418
419    data = collect_data_from_reports(directory=f'.{run_id}/logs/availability/')
420    for resource in data.keys():
421        plot_utilization(data, resource, run_id)
422        save_mean_utilization(
423            data, resource, output_dir='./logs/', run_id=run_id)
424        resource_name = resource.split(' ')[1]
425        terminal_type = resource.split(' ')[0]
426        thresholds, utilization_percentages = save_utilization_thresholds(
427            data, resource, output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}')
428        plot_utilization_thresholds(thresholds, utilization_percentages, resource,
429                                    output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}/thresholds')

Analyzes terminal utilization data and generates plots and reports.

Arguments:

run_id (str): The run identifier for the simulation.

Returns:

def bottleneckAnalysis(run_id): View Source

432def bottleneckAnalysis(run_id):
433    """
434    Main function to run the bottleneck simulation_analysis.
435    Args:
436        run_id (str): The run identifier for the simulation.
437    Returns:
438        None
439    """
440    terminal_analysis(run_id)
441    channel_restriction_analysis(run_id)

Main function to run the bottleneck simulation_analysis.

Arguments:

run_id (str): The run identifier for the simulation.

Returns: