simulation_analysis.resource_utilization
This module provides functions to analyze bottlenecks in a simulation run. It includes parsing report files, collecting utilization data, plotting utilization trends, saving mean utilization, and analyzing channel restrictions.
1""" 2This module provides functions to analyze bottlenecks in a simulation run. 3It includes parsing report files, collecting utilization data, plotting utilization trends, 4saving mean utilization, and analyzing channel restrictions. 5""" 6import os 7import re 8 9import pandas as pd 10import matplotlib.pyplot as plt 11import numpy as np 12 13import constants 14 15 16def parse_report(file_path): 17 """ 18 Parses a report file to extract the timestep and utilization data for each resource. 19 Args: 20 file_path (str): The path to the report file. 21 Returns: 22 tuple: A tuple containing the timestep (float) and a dictionary with utilization data. 23 The dictionary has resource names as keys and another dictionary as values, 24 which contains terminal names and their respective utilization percentages. 25 """ 26 with open(file_path, 'r') as file: 27 lines = file.readlines() 28 29 timestep = float(re.search(r"Time Step: (\d+\.\d+)", lines[0]).group(1)) 30 31 utilization_data = {} 32 resource = None 33 34 for line in lines[1:]: 35 if line.startswith("Mean "): 36 resource = line.split("Mean ")[1].split(" Utilization")[0] 37 utilization_data[resource] = {} 38 elif resource and ":" in line: 39 if "Overall" in line: 40 utilization_data[resource]["Overall"] = float( 41 line.split(": ")[1].replace('%', '')) / 100 42 else: 43 terminal, utilization = line.split(": ") 44 utilization_data[resource][terminal] = float( 45 utilization.replace('%', '')) / 100 46 47 return timestep, utilization_data 48 49 50def collect_data_from_reports(directory): 51 """ 52 Collects utilization data from all report files in the specified directory. 53 Args: 54 directory (str): The path to the directory containing report files. 55 Returns: 56 dict: A dictionary where keys are resource names and values are dictionaries containing 57 timesteps and utilization data for each terminal. 58 """ 59 data = {} 60 for filename in sorted(os.listdir(directory)): 61 if filename.endswith(".txt"): 62 timestep, utilization_data = parse_report( 63 os.path.join(directory, filename)) 64 for resource, terminals in utilization_data.items(): 65 66 if resource not in data: 67 data[resource] = {'timestep': [], 'Overall': []} 68 data[resource]['timestep'].append(timestep) 69 for terminal, utilization in terminals.items(): 70 if terminal not in data[resource]: 71 data[resource][terminal] = [] 72 data[resource][terminal].append(utilization) 73 74 return data 75 76 77def save_mean_utilization(data, resource, output_dir, run_id): 78 """ 79 Saves the mean utilization of a specific resource across all terminals to a text file. 80 Args: 81 data (dict): The utilization data collected from report files. 82 resource (str): The name of the resource to analyze. 83 output_dir (str): The directory where the output file will be saved. 84 run_id (str): The run identifier for the simulation. 85 Returns: 86 None 87 """ 88 mean_utilization = {terminal: np.mean(utilizations) 89 for terminal, utilizations in data[resource].items() if terminal != 'timestep'} 90 91 resource_name = resource.split(' ')[1] 92 terminal_type = resource.split(' ')[0] 93 output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}' 94 os.makedirs(output_dir, exist_ok=True) 95 output_path = os.path.join(output_dir, f"{resource_name}_utilization.txt") 96 97 with open(output_path, 'w') as file: 98 file.write(f"Mean {resource} utilization over time steps:\n") 99 for terminal, mean_util in mean_utilization.items(): 100 file.write(f"{terminal}: {mean_util:.2%}\n") 101 overall_mean = mean_utilization.get('Overall', 0) 102 file.write(f"Overall {resource} utilization: {overall_mean:.2%}\n") 103 104 105def plot_utilization(data, resource, run_id): 106 """ 107 Plots the utilization of a specific resource across all terminals over time. 108 Args: 109 data (dict): The utilization data collected from report files. 110 resource (str): The name of the resource to analyze. 111 run_id (str): The run identifier for the simulation. 112 Returns: 113 None 114 """ 115 plt.figure(figsize=(8, 6)) 116 117 sorted_indices = np.argsort(data[resource]['timestep']) 118 sorted_timesteps = np.array(data[resource]['timestep'])[sorted_indices] 119 120 colors = plt.cm.tab20.colors 121 color_cycle = plt.cycler(color=colors) 122 plt.gca().set_prop_cycle(color_cycle) 123 124 mean = {} 125 for idx, (terminal, utilization) in enumerate(data[resource].items()): 126 if terminal != 'timestep': 127 sorted_utilization = np.array(utilization)[sorted_indices] 128 plt.plot(sorted_timesteps, sorted_utilization, label=terminal) 129 mean[terminal] = round( 130 float(np.mean(sorted_utilization[constants.WARMUP_ITERS:]) * 100), 1) 131 132 items_per_line = 5 133 lines = [ 134 ", ".join([f"{key}: {value:.2f}" for key, value in list( 135 mean.items())[i:i + items_per_line]]) 136 for i in range(0, len(mean), items_per_line) 137 ] 138 139 formatted_mean = "\n".join(lines) 140 141 # set x and y axis limits 142 plt.ylim([0, 1.0]) 143 plt.xlim(left=0) 144 145 plt.xlabel('Time step') 146 plt.ylabel('Utilization') 147 plt.title(f'{resource} utilization') 148 plt.legend(loc='best', ncol=2) 149 plt.grid(True) 150 151 resource_name = resource.split(' ')[1] 152 terminal_type = resource.split(' ')[0] 153 output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}' 154 os.makedirs(output_dir, exist_ok=True) 155 plt.tight_layout() 156 plt.savefig(f'{output_dir}/utilization.pdf') 157 plt.close() 158 159 160def save_utilization_thresholds(data, resource, output_dir): 161 """ 162 Saves the percentage of time each terminal's utilization is above specified thresholds. 163 Args: 164 data (dict): The utilization data collected from report files. 165 resource (str): The name of the resource to analyze. 166 output_dir (str): The directory where the output file will be saved. 167 Returns: 168 tuple: A tuple containing the thresholds and a dictionary with utilization percentages for each terminal. 169 """ 170 thresholds = [1.00, 0.90, 0.80, 0.70, 0.6, 0.5, 171 0.4, 0.3, 0.2, 0.1, 0] # 100%, 90%, 80%, 70% 172 output_path = os.path.join( 173 output_dir, f"{resource}_Utilization_Thresholds.txt") 174 os.makedirs(output_dir, exist_ok=True) 175 utilization_percentages = {} # To store results for plotting 176 177 with open(output_path, 'w') as file: 178 file.write(f"{resource} Utilization Thresholds:\n") 179 180 for terminal, utilizations in data[resource].items(): 181 if terminal != 'timestep': # Skip the timestep key 182 file.write(f"\nTerminal: {terminal}\n") 183 total_steps = len(utilizations) 184 utilization_percentages[terminal] = [] 185 for threshold in thresholds: 186 count_above_threshold = sum( 187 1 for u in utilizations if u >= threshold) 188 percentage_above_threshold = ( 189 count_above_threshold / total_steps) * 100 190 file.write( 191 f"Time at or above {threshold * 100:.0f}% utilization: {percentage_above_threshold:.2f}%\n") 192 utilization_percentages[terminal].append( 193 percentage_above_threshold) 194 195 return thresholds, utilization_percentages # Return for plotting 196 197 198def plot_utilization_thresholds(thresholds, utilization_percentages, resource, output_dir): 199 """ 200 Plots the percentage of time each terminal's utilization is above specified thresholds. 201 Args: 202 thresholds (list): The list of utilization thresholds. 203 utilization_percentages (dict): A dictionary with utilization percentages for each terminal. 204 resource (str): The name of the resource to analyze. 205 output_dir (str): The directory where the plots will be saved. 206 Returns: 207 None 208 """ 209 for terminal, percentages in utilization_percentages.items(): 210 plt.figure(figsize=(6, 6)) 211 plt.plot([t * 100 for t in thresholds], percentages, 212 marker='o', linestyle='-', color='b') 213 plt.xlabel('Utilization more than (%)') 214 plt.ylabel('Percentage of time (%)') 215 plt.title(f'{resource} - {terminal} utilization') 216 plt.grid(True) 217 218 # start at zero axis 219 plt.ylim(bottom=0) 220 plt.xlim(left=0) 221 # max at 100 222 plt.ylim(top=100) 223 plt.xlim(right=100) 224 225 # Save plot 226 os.makedirs(output_dir, exist_ok=True) 227 plot_path = os.path.join( 228 output_dir, f"{resource}_{terminal}_Utilization_Thresholds.pdf") 229 plt.savefig(plot_path) 230 plt.close() 231 232 233def parse_restrictions(restriction_str): 234 """ 235 Parses a string of restrictions into a dictionary. 236 Args: 237 restriction_str (str): A string containing restrictions in the format "B:1.0, D:2.0, DL:3.0, T:4.0". 238 Returns: 239 dict: A dictionary with restriction types as keys and their values as floats. 240 """ 241 restrictions = {} 242 for item in restriction_str.split(', '): 243 key, value = item.split(':') 244 restrictions[key] = float(value) 245 return restrictions 246 247 248def save_individual_restriction_plots(data_in, data_out, title, folder_name): 249 """ 250 Saves individual restriction plots for "In" and "Out" phases. 251 Args: 252 data_in (pd.Series): Series containing the "In" phase data. 253 data_out (pd.Series): Series containing the "Out" phase data. 254 title (str): Title for the plots. 255 folder_name (str): Folder name where the plots will be saved. 256 Returns: 257 None 258 """ 259 os.makedirs(folder_name, exist_ok=True) 260 261 # Custom tick labels: replace (-1, 0] with 0 and keep the rest as they are 262 custom_ticks = ['0' if str(label) == '(-1, 0]' else str(label) 263 for label in data_in.index] 264 265 # Plot for "In" restriction 266 ax = data_in.plot(kind='bar', figsize=(10, 6)) 267 plt.title(f'{title} waiting time distribution (in)') 268 plt.ylabel('Percentage') 269 plt.xlabel('Waiting time (hr)') 270 plt.xticks(ticks=range(len(data_in.index)), 271 labels=custom_ticks, rotation=45) 272 273 # Adding percentage labels on top of each bar 274 for container in ax.containers: 275 ax.bar_label(container, fmt='%.1f', label_type='edge') 276 277 plt.tight_layout() 278 plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_in.png') 279 plt.close() 280 281 # Plot for "Out" restriction 282 custom_ticks_out = ['0' if str( 283 label) == '(-1, 0]' else str(label) for label in data_out.index] 284 ax = data_out.plot(kind='bar', figsize=(10, 6)) 285 plt.title(f'{title} waiting time distribution (out)') 286 plt.ylabel('Percentage') 287 plt.xlabel('Waiting time (hr)') 288 plt.xticks(ticks=range(len(data_out.index)), 289 labels=custom_ticks_out, rotation=45) 290 291 # Adding percentage labels on top of each bar 292 for container in ax.containers: 293 ax.bar_label(container, fmt='%.1f', label_type='edge') 294 295 plt.tight_layout() 296 plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_out.png') 297 plt.close() 298 299 300def channel_restriction_analysis(run_id): 301 """ 302 Analyzes channel restrictions based on the ship logs and generates histograms and plots. 303 Args: 304 run_id (str): The run identifier for the simulation. 305 Returns: 306 None 307 """ 308 # Load the dataset 309 df = pd.read_excel(f'.{run_id}/logs/ship_logs.xlsx') 310 os.makedirs(f'.{run_id}/bottlenecks/Waterway', exist_ok=True) 311 312 # Filtering rows where restriction values are strings 313 df_filtered = df[(df['Time for Restriction In'].apply(lambda x: isinstance(x, str))) & 314 (df['Time for Restriction Out'].apply(lambda x: isinstance(x, str)))].copy() 315 316 # Applying the parsing function to the filtered dataframe 317 df_filtered.loc[:, 'Restrictions_In'] = df_filtered['Time for Restriction In'].apply( 318 parse_restrictions) 319 df_filtered.loc[:, 'Restrictions_Out'] = df_filtered['Time for Restriction Out'].apply( 320 parse_restrictions) 321 322 # Extracting the individual components for analysis 323 restriction_in_df_filtered = pd.json_normalize( 324 df_filtered['Restrictions_In']) 325 restriction_out_df_filtered = pd.json_normalize( 326 df_filtered['Restrictions_Out']) 327 328 # Remove the "Q" column from both the filtered DataFrames if present 329 restriction_in_df_filtered = restriction_in_df_filtered.drop( 330 columns=['Q'], errors='ignore') 331 restriction_out_df_filtered = restriction_out_df_filtered.drop( 332 columns=['Q'], errors='ignore') 333 334 # Creating histograms for each restriction category in the "in" and "out" phases, including "T" 335 bins = [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 336 337 beam_in_hist, _ = pd.cut( 338 restriction_in_df_filtered['B'], bins=bins, right=True, retbins=True) 339 draft_in_hist, _ = pd.cut( 340 restriction_in_df_filtered['D'], bins=bins, right=True, retbins=True) 341 daylight_in_hist, _ = pd.cut( 342 restriction_in_df_filtered['DL'], bins=bins, right=True, retbins=True) 343 total_in_hist, _ = pd.cut( 344 restriction_in_df_filtered['T'], bins=bins, right=True, retbins=True) 345 346 beam_out_hist, _ = pd.cut( 347 restriction_out_df_filtered['B'], bins=bins, right=True, retbins=True) 348 draft_out_hist, _ = pd.cut( 349 restriction_out_df_filtered['D'], bins=bins, right=True, retbins=True) 350 daylight_out_hist, _ = pd.cut( 351 restriction_out_df_filtered['DL'], bins=bins, right=True, retbins=True) 352 total_out_hist, _ = pd.cut( 353 restriction_out_df_filtered['T'], bins=bins, right=True, retbins=True) 354 355 # Creating dataframes for each restriction category's counts, including "T" 356 in_hist_df = pd.DataFrame({ 357 'Beam In': beam_in_hist.value_counts().sort_index(), 358 'Draft In': draft_in_hist.value_counts().sort_index(), 359 'Daylight In': daylight_in_hist.value_counts().sort_index(), 360 'Total In': total_in_hist.value_counts().sort_index() 361 }) 362 363 out_hist_df = pd.DataFrame({ 364 'Beam Out': beam_out_hist.value_counts().sort_index(), 365 'Draft Out': draft_out_hist.value_counts().sort_index(), 366 'Daylight Out': daylight_out_hist.value_counts().sort_index(), 367 'Total Out': total_out_hist.value_counts().sort_index() 368 }) 369 370 # Convert counts to percentages 371 total_in = in_hist_df.sum() 372 total_out = out_hist_df.sum() 373 in_hist_percentage_df = (in_hist_df / total_in) * 100 374 out_hist_percentage_df = (out_hist_df / total_out) * 100 375 376 # Save analysis to a text file, including "T" 377 output_text_file = f'.{run_id}/bottlenecks/Waterway/restriction_simulation_analysis.txt' 378 379 analysis_text = f""" 380 Percentage distributions of the waiting times for each restriction category across hourly bins, including Total: 381 382 **In Phase (Percentages):** 383 {in_hist_percentage_df.to_string()} 384 385 **Out Phase (Percentages):** 386 {out_hist_percentage_df.to_string()} 387 """ 388 389 with open(output_text_file, 'w') as file: 390 file.write(analysis_text) 391 392 # save two dataframes to csv 393 in_hist_percentage_df.to_csv( 394 f'.{run_id}/bottlenecks/Waterway/in_hist_percentage.csv') 395 out_hist_percentage_df.to_csv( 396 f'.{run_id}/bottlenecks/Waterway/out_hist_percentage.csv') 397 398 # Saving plots for each restriction separately, including "T" 399 save_individual_restriction_plots( 400 in_hist_percentage_df['Beam In'], out_hist_percentage_df['Beam Out'], 'Beam', f'.{run_id}/bottlenecks/Waterway/beam') 401 save_individual_restriction_plots( 402 in_hist_percentage_df['Draft In'], out_hist_percentage_df['Draft Out'], 'Draft', f'.{run_id}/bottlenecks/Waterway/draft') 403 save_individual_restriction_plots( 404 in_hist_percentage_df['Daylight In'], out_hist_percentage_df['Daylight Out'], 'Daylight', f'.{run_id}/bottlenecks/Waterway/daylight') 405 save_individual_restriction_plots( 406 in_hist_percentage_df['Total In'], out_hist_percentage_df['Total Out'], 'Total', f'.{run_id}/bottlenecks/Waterway/total') 407 408 409def terminal_analysis(run_id): 410 """ 411 Analyzes terminal utilization data and generates plots and reports. 412 Args: 413 run_id (str): The run identifier for the simulation. 414 Returns: 415 None 416 """ 417 418 data = collect_data_from_reports(directory=f'.{run_id}/logs/availability/') 419 for resource in data.keys(): 420 plot_utilization(data, resource, run_id) 421 save_mean_utilization( 422 data, resource, output_dir='./logs/', run_id=run_id) 423 resource_name = resource.split(' ')[1] 424 terminal_type = resource.split(' ')[0] 425 thresholds, utilization_percentages = save_utilization_thresholds( 426 data, resource, output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}') 427 plot_utilization_thresholds(thresholds, utilization_percentages, resource, 428 output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}/thresholds') 429 430 431def bottleneckAnalysis(run_id): 432 """ 433 Main function to run the bottleneck simulation_analysis. 434 Args: 435 run_id (str): The run identifier for the simulation. 436 Returns: 437 None 438 """ 439 terminal_analysis(run_id) 440 channel_restriction_analysis(run_id)
17def parse_report(file_path): 18 """ 19 Parses a report file to extract the timestep and utilization data for each resource. 20 Args: 21 file_path (str): The path to the report file. 22 Returns: 23 tuple: A tuple containing the timestep (float) and a dictionary with utilization data. 24 The dictionary has resource names as keys and another dictionary as values, 25 which contains terminal names and their respective utilization percentages. 26 """ 27 with open(file_path, 'r') as file: 28 lines = file.readlines() 29 30 timestep = float(re.search(r"Time Step: (\d+\.\d+)", lines[0]).group(1)) 31 32 utilization_data = {} 33 resource = None 34 35 for line in lines[1:]: 36 if line.startswith("Mean "): 37 resource = line.split("Mean ")[1].split(" Utilization")[0] 38 utilization_data[resource] = {} 39 elif resource and ":" in line: 40 if "Overall" in line: 41 utilization_data[resource]["Overall"] = float( 42 line.split(": ")[1].replace('%', '')) / 100 43 else: 44 terminal, utilization = line.split(": ") 45 utilization_data[resource][terminal] = float( 46 utilization.replace('%', '')) / 100 47 48 return timestep, utilization_data
Parses a report file to extract the timestep and utilization data for each resource.
Arguments:
- file_path (str): The path to the report file.
Returns:
tuple: A tuple containing the timestep (float) and a dictionary with utilization data. The dictionary has resource names as keys and another dictionary as values, which contains terminal names and their respective utilization percentages.
51def collect_data_from_reports(directory): 52 """ 53 Collects utilization data from all report files in the specified directory. 54 Args: 55 directory (str): The path to the directory containing report files. 56 Returns: 57 dict: A dictionary where keys are resource names and values are dictionaries containing 58 timesteps and utilization data for each terminal. 59 """ 60 data = {} 61 for filename in sorted(os.listdir(directory)): 62 if filename.endswith(".txt"): 63 timestep, utilization_data = parse_report( 64 os.path.join(directory, filename)) 65 for resource, terminals in utilization_data.items(): 66 67 if resource not in data: 68 data[resource] = {'timestep': [], 'Overall': []} 69 data[resource]['timestep'].append(timestep) 70 for terminal, utilization in terminals.items(): 71 if terminal not in data[resource]: 72 data[resource][terminal] = [] 73 data[resource][terminal].append(utilization) 74 75 return data
Collects utilization data from all report files in the specified directory.
Arguments:
- directory (str): The path to the directory containing report files.
Returns:
dict: A dictionary where keys are resource names and values are dictionaries containing timesteps and utilization data for each terminal.
78def save_mean_utilization(data, resource, output_dir, run_id): 79 """ 80 Saves the mean utilization of a specific resource across all terminals to a text file. 81 Args: 82 data (dict): The utilization data collected from report files. 83 resource (str): The name of the resource to analyze. 84 output_dir (str): The directory where the output file will be saved. 85 run_id (str): The run identifier for the simulation. 86 Returns: 87 None 88 """ 89 mean_utilization = {terminal: np.mean(utilizations) 90 for terminal, utilizations in data[resource].items() if terminal != 'timestep'} 91 92 resource_name = resource.split(' ')[1] 93 terminal_type = resource.split(' ')[0] 94 output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}' 95 os.makedirs(output_dir, exist_ok=True) 96 output_path = os.path.join(output_dir, f"{resource_name}_utilization.txt") 97 98 with open(output_path, 'w') as file: 99 file.write(f"Mean {resource} utilization over time steps:\n") 100 for terminal, mean_util in mean_utilization.items(): 101 file.write(f"{terminal}: {mean_util:.2%}\n") 102 overall_mean = mean_utilization.get('Overall', 0) 103 file.write(f"Overall {resource} utilization: {overall_mean:.2%}\n")
Saves the mean utilization of a specific resource across all terminals to a text file.
Arguments:
- data (dict): The utilization data collected from report files.
- resource (str): The name of the resource to analyze.
- output_dir (str): The directory where the output file will be saved.
- run_id (str): The run identifier for the simulation.
Returns:
None
106def plot_utilization(data, resource, run_id): 107 """ 108 Plots the utilization of a specific resource across all terminals over time. 109 Args: 110 data (dict): The utilization data collected from report files. 111 resource (str): The name of the resource to analyze. 112 run_id (str): The run identifier for the simulation. 113 Returns: 114 None 115 """ 116 plt.figure(figsize=(8, 6)) 117 118 sorted_indices = np.argsort(data[resource]['timestep']) 119 sorted_timesteps = np.array(data[resource]['timestep'])[sorted_indices] 120 121 colors = plt.cm.tab20.colors 122 color_cycle = plt.cycler(color=colors) 123 plt.gca().set_prop_cycle(color_cycle) 124 125 mean = {} 126 for idx, (terminal, utilization) in enumerate(data[resource].items()): 127 if terminal != 'timestep': 128 sorted_utilization = np.array(utilization)[sorted_indices] 129 plt.plot(sorted_timesteps, sorted_utilization, label=terminal) 130 mean[terminal] = round( 131 float(np.mean(sorted_utilization[constants.WARMUP_ITERS:]) * 100), 1) 132 133 items_per_line = 5 134 lines = [ 135 ", ".join([f"{key}: {value:.2f}" for key, value in list( 136 mean.items())[i:i + items_per_line]]) 137 for i in range(0, len(mean), items_per_line) 138 ] 139 140 formatted_mean = "\n".join(lines) 141 142 # set x and y axis limits 143 plt.ylim([0, 1.0]) 144 plt.xlim(left=0) 145 146 plt.xlabel('Time step') 147 plt.ylabel('Utilization') 148 plt.title(f'{resource} utilization') 149 plt.legend(loc='best', ncol=2) 150 plt.grid(True) 151 152 resource_name = resource.split(' ')[1] 153 terminal_type = resource.split(' ')[0] 154 output_dir = f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}' 155 os.makedirs(output_dir, exist_ok=True) 156 plt.tight_layout() 157 plt.savefig(f'{output_dir}/utilization.pdf') 158 plt.close()
Plots the utilization of a specific resource across all terminals over time.
Arguments:
- data (dict): The utilization data collected from report files.
- resource (str): The name of the resource to analyze.
- run_id (str): The run identifier for the simulation.
Returns:
None
161def save_utilization_thresholds(data, resource, output_dir): 162 """ 163 Saves the percentage of time each terminal's utilization is above specified thresholds. 164 Args: 165 data (dict): The utilization data collected from report files. 166 resource (str): The name of the resource to analyze. 167 output_dir (str): The directory where the output file will be saved. 168 Returns: 169 tuple: A tuple containing the thresholds and a dictionary with utilization percentages for each terminal. 170 """ 171 thresholds = [1.00, 0.90, 0.80, 0.70, 0.6, 0.5, 172 0.4, 0.3, 0.2, 0.1, 0] # 100%, 90%, 80%, 70% 173 output_path = os.path.join( 174 output_dir, f"{resource}_Utilization_Thresholds.txt") 175 os.makedirs(output_dir, exist_ok=True) 176 utilization_percentages = {} # To store results for plotting 177 178 with open(output_path, 'w') as file: 179 file.write(f"{resource} Utilization Thresholds:\n") 180 181 for terminal, utilizations in data[resource].items(): 182 if terminal != 'timestep': # Skip the timestep key 183 file.write(f"\nTerminal: {terminal}\n") 184 total_steps = len(utilizations) 185 utilization_percentages[terminal] = [] 186 for threshold in thresholds: 187 count_above_threshold = sum( 188 1 for u in utilizations if u >= threshold) 189 percentage_above_threshold = ( 190 count_above_threshold / total_steps) * 100 191 file.write( 192 f"Time at or above {threshold * 100:.0f}% utilization: {percentage_above_threshold:.2f}%\n") 193 utilization_percentages[terminal].append( 194 percentage_above_threshold) 195 196 return thresholds, utilization_percentages # Return for plotting
Saves the percentage of time each terminal's utilization is above specified thresholds.
Arguments:
- data (dict): The utilization data collected from report files.
- resource (str): The name of the resource to analyze.
- output_dir (str): The directory where the output file will be saved.
Returns:
tuple: A tuple containing the thresholds and a dictionary with utilization percentages for each terminal.
199def plot_utilization_thresholds(thresholds, utilization_percentages, resource, output_dir): 200 """ 201 Plots the percentage of time each terminal's utilization is above specified thresholds. 202 Args: 203 thresholds (list): The list of utilization thresholds. 204 utilization_percentages (dict): A dictionary with utilization percentages for each terminal. 205 resource (str): The name of the resource to analyze. 206 output_dir (str): The directory where the plots will be saved. 207 Returns: 208 None 209 """ 210 for terminal, percentages in utilization_percentages.items(): 211 plt.figure(figsize=(6, 6)) 212 plt.plot([t * 100 for t in thresholds], percentages, 213 marker='o', linestyle='-', color='b') 214 plt.xlabel('Utilization more than (%)') 215 plt.ylabel('Percentage of time (%)') 216 plt.title(f'{resource} - {terminal} utilization') 217 plt.grid(True) 218 219 # start at zero axis 220 plt.ylim(bottom=0) 221 plt.xlim(left=0) 222 # max at 100 223 plt.ylim(top=100) 224 plt.xlim(right=100) 225 226 # Save plot 227 os.makedirs(output_dir, exist_ok=True) 228 plot_path = os.path.join( 229 output_dir, f"{resource}_{terminal}_Utilization_Thresholds.pdf") 230 plt.savefig(plot_path) 231 plt.close()
Plots the percentage of time each terminal's utilization is above specified thresholds.
Arguments:
- thresholds (list): The list of utilization thresholds.
- utilization_percentages (dict): A dictionary with utilization percentages for each terminal.
- resource (str): The name of the resource to analyze.
- output_dir (str): The directory where the plots will be saved.
Returns:
None
234def parse_restrictions(restriction_str): 235 """ 236 Parses a string of restrictions into a dictionary. 237 Args: 238 restriction_str (str): A string containing restrictions in the format "B:1.0, D:2.0, DL:3.0, T:4.0". 239 Returns: 240 dict: A dictionary with restriction types as keys and their values as floats. 241 """ 242 restrictions = {} 243 for item in restriction_str.split(', '): 244 key, value = item.split(':') 245 restrictions[key] = float(value) 246 return restrictions
Parses a string of restrictions into a dictionary.
Arguments:
- restriction_str (str): A string containing restrictions in the format "B:1.0, D:2.0, DL:3.0, T:4.0".
Returns:
dict: A dictionary with restriction types as keys and their values as floats.
249def save_individual_restriction_plots(data_in, data_out, title, folder_name): 250 """ 251 Saves individual restriction plots for "In" and "Out" phases. 252 Args: 253 data_in (pd.Series): Series containing the "In" phase data. 254 data_out (pd.Series): Series containing the "Out" phase data. 255 title (str): Title for the plots. 256 folder_name (str): Folder name where the plots will be saved. 257 Returns: 258 None 259 """ 260 os.makedirs(folder_name, exist_ok=True) 261 262 # Custom tick labels: replace (-1, 0] with 0 and keep the rest as they are 263 custom_ticks = ['0' if str(label) == '(-1, 0]' else str(label) 264 for label in data_in.index] 265 266 # Plot for "In" restriction 267 ax = data_in.plot(kind='bar', figsize=(10, 6)) 268 plt.title(f'{title} waiting time distribution (in)') 269 plt.ylabel('Percentage') 270 plt.xlabel('Waiting time (hr)') 271 plt.xticks(ticks=range(len(data_in.index)), 272 labels=custom_ticks, rotation=45) 273 274 # Adding percentage labels on top of each bar 275 for container in ax.containers: 276 ax.bar_label(container, fmt='%.1f', label_type='edge') 277 278 plt.tight_layout() 279 plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_in.png') 280 plt.close() 281 282 # Plot for "Out" restriction 283 custom_ticks_out = ['0' if str( 284 label) == '(-1, 0]' else str(label) for label in data_out.index] 285 ax = data_out.plot(kind='bar', figsize=(10, 6)) 286 plt.title(f'{title} waiting time distribution (out)') 287 plt.ylabel('Percentage') 288 plt.xlabel('Waiting time (hr)') 289 plt.xticks(ticks=range(len(data_out.index)), 290 labels=custom_ticks_out, rotation=45) 291 292 # Adding percentage labels on top of each bar 293 for container in ax.containers: 294 ax.bar_label(container, fmt='%.1f', label_type='edge') 295 296 plt.tight_layout() 297 plt.savefig(f'{folder_name}/{title.lower().replace(" ", "_")}_out.png') 298 plt.close()
Saves individual restriction plots for "In" and "Out" phases.
Arguments:
- data_in (pd.Series): Series containing the "In" phase data.
- data_out (pd.Series): Series containing the "Out" phase data.
- title (str): Title for the plots.
- folder_name (str): Folder name where the plots will be saved.
Returns:
None
301def channel_restriction_analysis(run_id): 302 """ 303 Analyzes channel restrictions based on the ship logs and generates histograms and plots. 304 Args: 305 run_id (str): The run identifier for the simulation. 306 Returns: 307 None 308 """ 309 # Load the dataset 310 df = pd.read_excel(f'.{run_id}/logs/ship_logs.xlsx') 311 os.makedirs(f'.{run_id}/bottlenecks/Waterway', exist_ok=True) 312 313 # Filtering rows where restriction values are strings 314 df_filtered = df[(df['Time for Restriction In'].apply(lambda x: isinstance(x, str))) & 315 (df['Time for Restriction Out'].apply(lambda x: isinstance(x, str)))].copy() 316 317 # Applying the parsing function to the filtered dataframe 318 df_filtered.loc[:, 'Restrictions_In'] = df_filtered['Time for Restriction In'].apply( 319 parse_restrictions) 320 df_filtered.loc[:, 'Restrictions_Out'] = df_filtered['Time for Restriction Out'].apply( 321 parse_restrictions) 322 323 # Extracting the individual components for analysis 324 restriction_in_df_filtered = pd.json_normalize( 325 df_filtered['Restrictions_In']) 326 restriction_out_df_filtered = pd.json_normalize( 327 df_filtered['Restrictions_Out']) 328 329 # Remove the "Q" column from both the filtered DataFrames if present 330 restriction_in_df_filtered = restriction_in_df_filtered.drop( 331 columns=['Q'], errors='ignore') 332 restriction_out_df_filtered = restriction_out_df_filtered.drop( 333 columns=['Q'], errors='ignore') 334 335 # Creating histograms for each restriction category in the "in" and "out" phases, including "T" 336 bins = [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 337 338 beam_in_hist, _ = pd.cut( 339 restriction_in_df_filtered['B'], bins=bins, right=True, retbins=True) 340 draft_in_hist, _ = pd.cut( 341 restriction_in_df_filtered['D'], bins=bins, right=True, retbins=True) 342 daylight_in_hist, _ = pd.cut( 343 restriction_in_df_filtered['DL'], bins=bins, right=True, retbins=True) 344 total_in_hist, _ = pd.cut( 345 restriction_in_df_filtered['T'], bins=bins, right=True, retbins=True) 346 347 beam_out_hist, _ = pd.cut( 348 restriction_out_df_filtered['B'], bins=bins, right=True, retbins=True) 349 draft_out_hist, _ = pd.cut( 350 restriction_out_df_filtered['D'], bins=bins, right=True, retbins=True) 351 daylight_out_hist, _ = pd.cut( 352 restriction_out_df_filtered['DL'], bins=bins, right=True, retbins=True) 353 total_out_hist, _ = pd.cut( 354 restriction_out_df_filtered['T'], bins=bins, right=True, retbins=True) 355 356 # Creating dataframes for each restriction category's counts, including "T" 357 in_hist_df = pd.DataFrame({ 358 'Beam In': beam_in_hist.value_counts().sort_index(), 359 'Draft In': draft_in_hist.value_counts().sort_index(), 360 'Daylight In': daylight_in_hist.value_counts().sort_index(), 361 'Total In': total_in_hist.value_counts().sort_index() 362 }) 363 364 out_hist_df = pd.DataFrame({ 365 'Beam Out': beam_out_hist.value_counts().sort_index(), 366 'Draft Out': draft_out_hist.value_counts().sort_index(), 367 'Daylight Out': daylight_out_hist.value_counts().sort_index(), 368 'Total Out': total_out_hist.value_counts().sort_index() 369 }) 370 371 # Convert counts to percentages 372 total_in = in_hist_df.sum() 373 total_out = out_hist_df.sum() 374 in_hist_percentage_df = (in_hist_df / total_in) * 100 375 out_hist_percentage_df = (out_hist_df / total_out) * 100 376 377 # Save analysis to a text file, including "T" 378 output_text_file = f'.{run_id}/bottlenecks/Waterway/restriction_simulation_analysis.txt' 379 380 analysis_text = f""" 381 Percentage distributions of the waiting times for each restriction category across hourly bins, including Total: 382 383 **In Phase (Percentages):** 384 {in_hist_percentage_df.to_string()} 385 386 **Out Phase (Percentages):** 387 {out_hist_percentage_df.to_string()} 388 """ 389 390 with open(output_text_file, 'w') as file: 391 file.write(analysis_text) 392 393 # save two dataframes to csv 394 in_hist_percentage_df.to_csv( 395 f'.{run_id}/bottlenecks/Waterway/in_hist_percentage.csv') 396 out_hist_percentage_df.to_csv( 397 f'.{run_id}/bottlenecks/Waterway/out_hist_percentage.csv') 398 399 # Saving plots for each restriction separately, including "T" 400 save_individual_restriction_plots( 401 in_hist_percentage_df['Beam In'], out_hist_percentage_df['Beam Out'], 'Beam', f'.{run_id}/bottlenecks/Waterway/beam') 402 save_individual_restriction_plots( 403 in_hist_percentage_df['Draft In'], out_hist_percentage_df['Draft Out'], 'Draft', f'.{run_id}/bottlenecks/Waterway/draft') 404 save_individual_restriction_plots( 405 in_hist_percentage_df['Daylight In'], out_hist_percentage_df['Daylight Out'], 'Daylight', f'.{run_id}/bottlenecks/Waterway/daylight') 406 save_individual_restriction_plots( 407 in_hist_percentage_df['Total In'], out_hist_percentage_df['Total Out'], 'Total', f'.{run_id}/bottlenecks/Waterway/total')
Analyzes channel restrictions based on the ship logs and generates histograms and plots.
Arguments:
- run_id (str): The run identifier for the simulation.
Returns:
None
410def terminal_analysis(run_id): 411 """ 412 Analyzes terminal utilization data and generates plots and reports. 413 Args: 414 run_id (str): The run identifier for the simulation. 415 Returns: 416 None 417 """ 418 419 data = collect_data_from_reports(directory=f'.{run_id}/logs/availability/') 420 for resource in data.keys(): 421 plot_utilization(data, resource, run_id) 422 save_mean_utilization( 423 data, resource, output_dir='./logs/', run_id=run_id) 424 resource_name = resource.split(' ')[1] 425 terminal_type = resource.split(' ')[0] 426 thresholds, utilization_percentages = save_utilization_thresholds( 427 data, resource, output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}') 428 plot_utilization_thresholds(thresholds, utilization_percentages, resource, 429 output_dir=f'.{run_id}/bottlenecks/{terminal_type}/{resource_name}/thresholds')
Analyzes terminal utilization data and generates plots and reports.
Arguments:
- run_id (str): The run identifier for the simulation.
Returns:
None
432def bottleneckAnalysis(run_id): 433 """ 434 Main function to run the bottleneck simulation_analysis. 435 Args: 436 run_id (str): The run identifier for the simulation. 437 Returns: 438 None 439 """ 440 terminal_analysis(run_id) 441 channel_restriction_analysis(run_id)
Main function to run the bottleneck simulation_analysis.
Arguments:
- run_id (str): The run identifier for the simulation.
Returns:
None