diff --git a/docs/api_reference.md b/docs/api_reference.md index f819ea3eb..7e7473180 100644 --- a/docs/api_reference.md +++ b/docs/api_reference.md @@ -40,3 +40,10 @@ and .. automodule:: mdio.core.dimension :members: ``` + +## Optimization + +```{eval-rst} +.. automodule:: mdio.optimize.access_pattern + :members: +``` diff --git a/docs/tutorials/rechunking.ipynb b/docs/tutorials/rechunking.ipynb index f9d4b12da..aeaae43d9 100644 --- a/docs/tutorials/rechunking.ipynb +++ b/docs/tutorials/rechunking.ipynb @@ -17,257 +17,1325 @@ "## Introduction\n", "\n", "In this page we will be showing how we can take an existing MDIO and add\n", - "fast access, lossy, versions of the data in X/Y/Z cross-sections (slices).\n", + "fast access, lossy, versions of the data in IL/XL/TWT cross-sections (slices).\n", "\n", "We can re-use the MDIO dataset we created in the [Quickstart](#quickstart) page.\n", "Please run it first.\n", "\n", - "We will define our compression levels first. We will use this to adjust the quality\n", - "of the lossy compression." + "Let's open the original MDIO first." ] }, { "cell_type": "code", - "execution_count": 1, - "id": "initial_id", - "metadata": { - "ExecuteTime": { - "end_time": "2025-04-16T18:38:02.462276Z", - "start_time": "2025-04-16T18:38:02.459882Z" - } - }, - "outputs": [], - "source": [ - "from enum import Enum\n", - "\n", - "\n", - "class MdioZfpQuality(float, Enum):\n", - " \"\"\"Config options for ZFP compression.\"\"\"\n", - "\n", - " VERY_LOW = 6\n", - " LOW = 3\n", - " MEDIUM = 1\n", - " HIGH = 0.1\n", - " VERY_HIGH = 0.01\n", - " ULTRA = 0.001" - ] - }, - { - "cell_type": "markdown", - "id": "c2a09a89-b453-4c3e-b879-14caaedd29de", - "metadata": {}, - "source": [ - "We will use the lower level `MDIOAccessor` to open the existing file in write mode that\n", - "allows us to modify its raw metadata. This can be dangerous, we recommend using only provided\n", - "tools to avoid data corruption.\n", - "\n", - "We specify the original access pattern of the source data `\"012\"` with some parameters like\n", - "caching. For the rechunking, we recommend using the single threaded `\"zarr\"` backend to avoid\n", - "race conditions.\n", - "\n", - "We also define a `dict` for common arguments in rechunking." - ] - }, - { - "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "45558306-ab9c-46aa-a299-8758a911b373", - "metadata": { - "ExecuteTime": { - "end_time": "2025-04-16T18:38:04.107696Z", - "start_time": "2025-04-16T18:38:04.101239Z" + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset> Size: 403MB\n",
+ "Dimensions: (inline: 345, crossline: 188, time: 1501)\n",
+ "Coordinates:\n",
+ " * inline (inline) int32 1kB 1 2 3 4 5 6 ... 340 341 342 343 344 345\n",
+ " * crossline (crossline) int32 752B 1 2 3 4 5 6 ... 184 185 186 187 188\n",
+ " * time (time) int32 6kB 0 2 4 6 8 10 ... 2992 2994 2996 2998 3000\n",
+ " cdp_y (inline, crossline) float64 519kB ...\n",
+ " cdp_x (inline, crossline) float64 519kB ...\n",
+ "Data variables:\n",
+ " amplitude (inline, crossline, time) float32 389MB ...\n",
+ " headers (inline, crossline) [('trace_seq_num_line', '<i4'), ('trace_seq_num_reel', '<i4'), ('orig_field_record_num', '<i4'), ('trace_num_orig_record', '<i4'), ('energy_source_point_num', '<i4'), ('ensemble_num', '<i4'), ('trace_num_ensemble', '<i4'), ('trace_id_code', '<i2'), ('vertically_summed_traces', '<i2'), ('horizontally_stacked_traces', '<i2'), ('data_use', '<i2'), ('source_to_receiver_distance', '<i4'), ('receiver_group_elevation', '<i4'), ('source_surface_elevation', '<i4'), ('source_depth_below_surface', '<i4'), ('receiver_datum_elevation', '<i4'), ('source_datum_elevation', '<i4'), ('source_water_depth', '<i4'), ('receiver_water_depth', '<i4'), ('elevation_depth_scalar', '<i2'), ('coordinate_scalar', '<i2'), ('source_coord_x', '<i4'), ('source_coord_y', '<i4'), ('group_coord_x', '<i4'), ('group_coord_y', '<i4'), ('coordinate_unit', '<i2'), ('weathering_velocity', '<i2'), ('subweathering_velocity', '<i2'), ('source_uphole_time', '<i2'), ('group_uphole_time', '<i2'), ('source_static_correction', '<i2'), ('receiver_static_correction', '<i2'), ('total_static_applied', '<i2'), ('lag_time_a', '<i2'), ('lag_time_b', '<i2'), ('delay_recording_time', '<i2'), ('mute_time_start', '<i2'), ('mute_time_end', '<i2'), ('samples_per_trace', '<i2'), ('sample_interval', '<i2'), ('instrument_gain_type', '<i2'), ('instrument_gain_const', '<i2'), ('instrument_gain_initial', '<i2'), ('correlated_data', '<i2'), ('sweep_freq_start', '<i2'), ('sweep_freq_end', '<i2'), ('sweep_length', '<i2'), ('sweep_type', '<i2'), ('sweep_taper_start', '<i2'), ('sweep_taper_end', '<i2'), ('taper_type', '<i2'), ('alias_filter_freq', '<i2'), ('alias_filter_slope', '<i2'), ('notch_filter_freq', '<i2'), ('notch_filter_slope', '<i2'), ('low_cut_freq', '<i2'), ('high_cut_freq', '<i2'), ('low_cut_slope', '<i2'), ('high_cut_slope', '<i2'), ('year_recorded', '<i2'), ('day_of_year', '<i2'), ('hour_of_day', '<i2'), ('minute_of_hour', '<i2'), ('second_of_minute', '<i2'), ('time_basis_code', '<i2'), ('trace_weighting_factor', '<i2'), ('group_num_roll_switch', '<i2'), ('group_num_first_trace', '<i2'), ('group_num_last_trace', '<i2'), ('gap_size', '<i2'), ('taper_overtravel', '<i2'), ('inline', '<i4'), ('crossline', '<i4'), ('cdp_x', '<i4'), ('cdp_y', '<i4')] 13MB ...\n",
+ " segy_file_header <U1 4B ...\n",
+ " trace_mask (inline, crossline) bool 65kB ...\n",
+ "Attributes:\n",
+ " apiVersion: 1.1.1\n",
+ " createdOn: 2025-12-19 16:05:58.230520+00:00\n",
+ " name: PostStack3DTime\n",
+ " attributes: {'surveyType': '3D', 'gatherType': 'stacked', 'defaultVariab...<xarray.Dataset> Size: 2GB\n",
+ "Dimensions: (inline: 345, crossline: 188, time: 1501)\n",
+ "Coordinates:\n",
+ " * inline (inline) int32 1kB 1 2 3 4 5 6 ... 340 341 342 343 344 345\n",
+ " * crossline (crossline) int32 752B 1 2 3 4 5 6 ... 184 185 186 187 188\n",
+ " * time (time) int32 6kB 0 2 4 6 8 10 ... 2992 2994 2996 2998 3000\n",
+ " cdp_x (inline, crossline) float64 519kB ...\n",
+ " cdp_y (inline, crossline) float64 519kB ...\n",
+ "Data variables:\n",
+ " segy_file_header <U1 4B ...\n",
+ " trace_mask (inline, crossline) bool 65kB ...\n",
+ " amplitude (inline, crossline, time) float32 389MB ...\n",
+ " headers (inline, crossline) [('trace_seq_num_line', '<i4'), ('trace_seq_num_reel', '<i4'), ('orig_field_record_num', '<i4'), ('trace_num_orig_record', '<i4'), ('energy_source_point_num', '<i4'), ('ensemble_num', '<i4'), ('trace_num_ensemble', '<i4'), ('trace_id_code', '<i2'), ('vertically_summed_traces', '<i2'), ('horizontally_stacked_traces', '<i2'), ('data_use', '<i2'), ('source_to_receiver_distance', '<i4'), ('receiver_group_elevation', '<i4'), ('source_surface_elevation', '<i4'), ('source_depth_below_surface', '<i4'), ('receiver_datum_elevation', '<i4'), ('source_datum_elevation', '<i4'), ('source_water_depth', '<i4'), ('receiver_water_depth', '<i4'), ('elevation_depth_scalar', '<i2'), ('coordinate_scalar', '<i2'), ('source_coord_x', '<i4'), ('source_coord_y', '<i4'), ('group_coord_x', '<i4'), ('group_coord_y', '<i4'), ('coordinate_unit', '<i2'), ('weathering_velocity', '<i2'), ('subweathering_velocity', '<i2'), ('source_uphole_time', '<i2'), ('group_uphole_time', '<i2'), ('source_static_correction', '<i2'), ('receiver_static_correction', '<i2'), ('total_static_applied', '<i2'), ('lag_time_a', '<i2'), ('lag_time_b', '<i2'), ('delay_recording_time', '<i2'), ('mute_time_start', '<i2'), ('mute_time_end', '<i2'), ('samples_per_trace', '<i2'), ('sample_interval', '<i2'), ('instrument_gain_type', '<i2'), ('instrument_gain_const', '<i2'), ('instrument_gain_initial', '<i2'), ('correlated_data', '<i2'), ('sweep_freq_start', '<i2'), ('sweep_freq_end', '<i2'), ('sweep_length', '<i2'), ('sweep_type', '<i2'), ('sweep_taper_start', '<i2'), ('sweep_taper_end', '<i2'), ('taper_type', '<i2'), ('alias_filter_freq', '<i2'), ('alias_filter_slope', '<i2'), ('notch_filter_freq', '<i2'), ('notch_filter_slope', '<i2'), ('low_cut_freq', '<i2'), ('high_cut_freq', '<i2'), ('low_cut_slope', '<i2'), ('high_cut_slope', '<i2'), ('year_recorded', '<i2'), ('day_of_year', '<i2'), ('hour_of_day', '<i2'), ('minute_of_hour', '<i2'), ('second_of_minute', '<i2'), ('time_basis_code', '<i2'), ('trace_weighting_factor', '<i2'), ('group_num_roll_switch', '<i2'), ('group_num_first_trace', '<i2'), ('group_num_last_trace', '<i2'), ('gap_size', '<i2'), ('taper_overtravel', '<i2'), ('inline', '<i4'), ('crossline', '<i4'), ('cdp_x', '<i4'), ('cdp_y', '<i4')] 13MB ...\n",
+ " fast_crossline (inline, crossline, time) float32 389MB ...\n",
+ " fast_inline (inline, crossline, time) float32 389MB ...\n",
+ " fast_time (inline, crossline, time) float32 389MB ...\n",
+ "Attributes:\n",
+ " apiVersion: 1.1.1\n",
+ " createdOn: 2025-12-19 16:05:58.230520+00:00\n",
+ " name: PostStack3DTime\n",
+ " attributes: {'surveyType': '3D', 'gatherType': 'stacked', 'defaultVariab..."
+ ],
+ "text/plain": [
+ "