diff --git a/imap_processing/mag/l1c/interpolation_methods.py b/imap_processing/mag/l1c/interpolation_methods.py index 717e53cdb1..70ac74ae52 100644 --- a/imap_processing/mag/l1c/interpolation_methods.py +++ b/imap_processing/mag/l1c/interpolation_methods.py @@ -296,7 +296,7 @@ def linear_filtered( input_filtered, vectors_filtered = cic_filter( input_vectors, input_timestamps, output_timestamps, input_rate, output_rate ) - return linear(vectors_filtered, input_filtered, output_timestamps) + return linear(vectors_filtered, input_filtered, output_timestamps, extrapolate=True) def quadratic_filtered( diff --git a/imap_processing/mag/l1c/mag_l1c.py b/imap_processing/mag/l1c/mag_l1c.py index 6b6eec9dbc..af58f61d0b 100644 --- a/imap_processing/mag/l1c/mag_l1c.py +++ b/imap_processing/mag/l1c/mag_l1c.py @@ -13,6 +13,8 @@ logger = logging.getLogger(__name__) +GAP_TOLERANCE = 0.075 + def mag_l1c( first_input_dataset: xr.Dataset, @@ -461,6 +463,8 @@ def interpolate_gaps( 6-7 - compression flags. """ burst_epochs = burst_dataset["epoch"].data + norm_epochs = filled_norm_timeline[:, 0] + has_norm_context = np.any(filled_norm_timeline[:, 5] == ModeFlags.NORM.value) # Exclude range values burst_vectors = burst_dataset["vectors"].data # Default to two vectors per second @@ -497,14 +501,18 @@ def interpolate_gaps( burst_buffer = int(required_seconds * burst_rate.value) burst_start = max(0, burst_gap_start - burst_buffer) - burst_end = min(len(burst_epochs) - 1, burst_gap_end + burst_buffer) + burst_end = min(len(burst_epochs), burst_gap_end + burst_buffer + 1) - gap_timeline = filled_norm_timeline[ - (filled_norm_timeline > gap[0]) & (filled_norm_timeline < gap[1]) - ] + gap_timeline = norm_epochs[(norm_epochs > gap[0]) & (norm_epochs < gap[1])] + + short_end = burst_epochs[burst_end - 1] + if not has_norm_context: + # In the burst-only fallback, CIC delay compensation shortens the usable + # filtered range at the trailing edge by roughly one output cadence. + short_end -= int(1e9 / norm_rate.value) short = (gap_timeline >= burst_epochs[burst_start]) & ( - gap_timeline <= burst_epochs[burst_end] + gap_timeline <= short_end ) num_short = int(short.sum()) @@ -524,7 +532,7 @@ def interpolate_gaps( # gaps should not have data in timeline, still check it for index, timestamp in enumerate(adjusted_gap_timeline): - timeline_index = np.searchsorted(filled_norm_timeline[:, 0], timestamp) + timeline_index = np.searchsorted(norm_epochs, timestamp) if sum( filled_norm_timeline[timeline_index, 1:4] ) == 0 and burst_gap_start + index < len(burst_vectors): @@ -542,13 +550,12 @@ def interpolate_gaps( missing_timeline = np.setdiff1d(gap_timeline, adjusted_gap_timeline) for timestamp in missing_timeline: - timeline_index = np.searchsorted(filled_norm_timeline[:, 0], timestamp) + timeline_index = np.searchsorted(norm_epochs, timestamp) if filled_norm_timeline[timeline_index, 5] != ModeFlags.MISSING.value: raise RuntimeError( "Self-inconsistent data. " "Gaps not included in final timeline should be missing." ) - np.delete(filled_norm_timeline, timeline_index) return filled_norm_timeline @@ -557,8 +564,8 @@ def generate_timeline(epoch_data: np.ndarray, gaps: np.ndarray) -> np.ndarray: """ Generate a new timeline from existing, gap-filled timeline and gaps. - The gaps are generated at a .5 second cadence, regardless of the cadence of the - existing data. + The gaps are generated at the cadence implied by the gap rate. If no rate is + provided, a default cadence of 0.5 seconds is used. Parameters ---------- @@ -573,7 +580,8 @@ def generate_timeline(epoch_data: np.ndarray, gaps: np.ndarray) -> np.ndarray: numpy.ndarray The new timeline, filled with the existing data and the generated gaps. """ - full_timeline: np.ndarray = np.array([]) + epoch_data = np.asarray(epoch_data) + full_timeline: np.ndarray = np.array([], dtype=epoch_data.dtype) last_index = 0 for gap in gaps: epoch_start_index = np.searchsorted(epoch_data, gap[0], side="left") @@ -582,6 +590,7 @@ def generate_timeline(epoch_data: np.ndarray, gaps: np.ndarray) -> np.ndarray: ) generated_timestamps = generate_missing_timestamps(gap) if generated_timestamps.size == 0: + last_index = int(np.searchsorted(epoch_data, gap[1], side="left")) continue # Remove any generated timestamps that are already in the timeline @@ -639,37 +648,48 @@ def find_all_gaps( specified as (start, end, vector_rate) where start and end both exist in the timeline. """ - gaps: np.ndarray = np.zeros((0, 3)) + gaps: np.ndarray = np.empty((0, 3), dtype=np.int64) # TODO: when we go back to the previous file, also retrieve expected # vectors per second vecsec_dict = {0: VecSec.TWO_VECS_PER_S.value} | (vecsec_dict or {}) - end_index = epoch_data.shape[0] + rate_segments = _find_rate_segments(epoch_data, vecsec_dict) + if rate_segments: + first_rate = rate_segments[0][1] + last_rate = rate_segments[-1][1] + else: + default_rate = next(iter(vecsec_dict.values())) + first_rate = default_rate + last_rate = default_rate if start_of_day_ns is not None and epoch_data[0] > start_of_day_ns: # Add a gap from the start of the day to the first timestamp - gaps = np.concatenate( - (gaps, np.array([[start_of_day_ns, epoch_data[0], vecsec_dict[0]]])) - ) - - for start_time in reversed(sorted(vecsec_dict.keys())): - # Find the start index that is equal to or immediately after start_time - start_index = np.searchsorted(epoch_data, start_time, side="left") gaps = np.concatenate( ( - find_gaps( - epoch_data[start_index : end_index + 1], vecsec_dict[start_time] - ), gaps, + np.array( + [[start_of_day_ns, epoch_data[0], first_rate]], dtype=np.int64 + ), ) ) - end_index = start_index + + for index, (start_index, vectors_per_second) in enumerate(rate_segments): + next_start_index = ( + rate_segments[index + 1][0] + if index + 1 < len(rate_segments) + else epoch_data.shape[0] - 1 + ) + epoch_slice = epoch_data[start_index : next_start_index + 1] + gaps = np.concatenate((gaps, find_gaps(epoch_slice, vectors_per_second))) if end_of_day_ns is not None and epoch_data[-1] < end_of_day_ns: gaps = np.concatenate( - (gaps, np.array([[epoch_data[-1], end_of_day_ns, vecsec_dict[start_time]]])) + ( + gaps, + np.array([[epoch_data[-1], end_of_day_ns, last_rate]], dtype=np.int64), + ) ) return gaps @@ -696,14 +716,19 @@ def find_gaps(timeline_data: np.ndarray, vectors_per_second: int) -> np.ndarray: end_gap, as well as vectors_per_second. Start_gap and end_gap both correspond to points in timeline_data. """ + if timeline_data.shape[0] < 2: + return np.empty((0, 3), dtype=np.int64) + # Expected difference between timestamps in nanoseconds. expected_gap = 1 / vectors_per_second * 1e9 diffs = abs(np.diff(timeline_data)) # Gap can be up to 7.5% larger than expected vectors per second due to clock drift - gap_index = np.asarray(diffs - expected_gap > expected_gap * 0.075).nonzero()[0] - output: np.ndarray = np.zeros((len(gap_index), 3)) + gap_index = np.asarray( + diffs - expected_gap > expected_gap * GAP_TOLERANCE + ).nonzero()[0] + output: np.ndarray = np.zeros((len(gap_index), 3), dtype=np.int64) for index, gap in enumerate(gap_index): output[index, :] = [ @@ -719,8 +744,8 @@ def generate_missing_timestamps(gap: np.ndarray) -> np.ndarray: """ Generate a new timeline from input gaps. - Any gaps specified in gaps will be filled with timestamps that are 0.5 seconds - apart. + Any gaps specified in gaps will be filled with timestamps at the gap rate. If the + gap rate is not included, the default cadence is 0.5 seconds. Parameters ---------- @@ -734,12 +759,84 @@ def generate_missing_timestamps(gap: np.ndarray) -> np.ndarray: full_timeline: numpy.ndarray Completed timeline. """ - # Generated timestamps should always be 0.5 seconds apart - difference_ns = 0.5 * 1e9 - output: np.ndarray = np.arange(gap[0], gap[1], difference_ns) + difference_ns = int(0.5 * 1e9) + if len(gap) > 2: + difference_ns = int(1e9 / int(gap[2])) + output: np.ndarray = np.arange( + int(np.rint(gap[0])), + int(np.rint(gap[1])), + difference_ns, + dtype=np.int64, + ) return output +def _is_expected_rate(timestamp_difference: float, vectors_per_second: int) -> bool: + """ + Determine whether a timestamp spacing matches an expected cadence. + + Parameters + ---------- + timestamp_difference : float + The observed spacing between adjacent timestamps, in nanoseconds. + vectors_per_second : int + The expected number of vectors per second for the cadence being checked. + + Returns + ------- + bool + True when the observed spacing is within `GAP_TOLERANCE` of the expected + cadence. + """ + expected_gap = 1 / vectors_per_second * 1e9 + return abs(timestamp_difference - expected_gap) <= expected_gap * GAP_TOLERANCE + + +def _find_rate_segments( + epoch_data: np.ndarray, vecsec_dict: dict[int, int] +) -> list[tuple[int, int]]: + """ + Build contiguous rate segments using observed cadence near each transition. + + Walk each configured transition backward while the observed cadence already matches + the new rate so gaps stay attached to the correct segment instead of producing + spurious single-sample micro-gaps at delayed Config boundaries. + + Parameters + ---------- + epoch_data : numpy.ndarray + The sorted epoch timestamps for the current timeline, in nanoseconds. + vecsec_dict : dict[int, int] + Mapping of transition start time to expected vectors-per-second rate. + + Returns + ------- + list[tuple[int, int]] + Pairs of `(start_index, vectors_per_second)` describing contiguous rate + segments in `epoch_data`. + """ + if epoch_data.shape[0] == 0: + return [] + + segments: list[tuple[int, int]] = [] + for start_time, vectors_per_second in sorted(vecsec_dict.items()): + start_index = int(np.searchsorted(epoch_data, start_time, side="left")) + start_index = min(start_index, epoch_data.shape[0] - 1) + lower_bound = segments[-1][0] if segments else 0 + + while start_index > lower_bound and _is_expected_rate( + epoch_data[start_index] - epoch_data[start_index - 1], vectors_per_second + ): + start_index -= 1 + + if segments and start_index == segments[-1][0]: + segments[-1] = (start_index, vectors_per_second) + else: + segments.append((start_index, vectors_per_second)) + + return segments + + def vectors_per_second_from_string(vecsec_string: str) -> dict: """ Extract the vectors per second from a string into a dictionary. diff --git a/imap_processing/tests/mag/test_mag_l1c.py b/imap_processing/tests/mag/test_mag_l1c.py index d19cb1a8aa..69b6383751 100644 --- a/imap_processing/tests/mag/test_mag_l1c.py +++ b/imap_processing/tests/mag/test_mag_l1c.py @@ -13,6 +13,7 @@ fill_normal_data, find_all_gaps, find_gaps, + generate_missing_timestamps, generate_timeline, interpolate_gaps, mag_l1c, @@ -112,7 +113,9 @@ def test_interpolation_methods(): def test_process_mag_l1c(norm_dataset, burst_dataset): l1c = process_mag_l1c(norm_dataset, burst_dataset, InterpolationFunction.linear) expected_output_timeline = ( - np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.25, 4.75, 5.25, 5.5, 5.75, 6]) + np.array( + [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.25, 4.5, 4.75, 5, 5.25, 5.5, 5.75, 6] + ) * 1e9 ) assert np.array_equal(l1c[:, 0], expected_output_timeline) @@ -122,12 +125,12 @@ def test_process_mag_l1c(norm_dataset, burst_dataset): np.count_nonzero([np.sum(l1c[i, 1:4]) for i in range(l1c.shape[0])]) == l1c.shape[0] - 1 ) - expected_flags = np.zeros(15) + expected_flags = np.zeros(17) # filled sections should have 1 as a flag expected_flags[5:8] = 1 - expected_flags[10:11] = 1 + expected_flags[10:13] = 1 # last datapoint in the gap is missing a value - expected_flags[11] = -1 + expected_flags[13] = -1 assert np.array_equal(l1c[:, 5], expected_flags) assert np.array_equal(l1c[:5, 1:5], norm_dataset["vectors"].data[:5, :]) for i in range(5, 8): @@ -140,7 +143,7 @@ def test_process_mag_l1c(norm_dataset, burst_dataset): assert np.allclose(l1c[i, 1:5], burst_vectors, rtol=0, atol=1) assert np.array_equal(l1c[8:10, 1:5], norm_dataset["vectors"].data[5:7, :]) - for i in range(10, 11): + for i in range(10, 13): e = l1c[i, 0] burst_vectors = burst_dataset.sel(epoch=int(e), method="nearest")[ "vectors" @@ -149,7 +152,8 @@ def test_process_mag_l1c(norm_dataset, burst_dataset): # identical. assert np.allclose(l1c[i, 1:5], burst_vectors, rtol=0, atol=1) - assert np.array_equal(l1c[11, 1:5], [0, 0, 0, 0]) + assert np.array_equal(l1c[13, 1:5], [0, 0, 0, 0]) + assert np.array_equal(l1c[14:, 1:5], norm_dataset["vectors"].data[7:, :]) def test_interpolate_gaps(norm_dataset, mag_l1b_dataset): @@ -435,6 +439,26 @@ def test_find_gaps(): assert np.array_equal(gaps, expected_return) +def test_find_all_gaps_uses_observed_transition_boundary(): + epoch_transition = np.array([0, 0.5, 1, 1.5, 2, 10, 11, 12, 13, 14, 15, 16]) * 1e9 + vectors_per_second_transition = vectors_per_second_from_string("0:2,15000000000:1") + output_transition = find_all_gaps(epoch_transition, vectors_per_second_transition) + expected_transition = np.array([[2 * 1e9, 10 * 1e9, 2]]) + assert np.array_equal(output_transition, expected_transition) + + +def test_generate_missing_timestamps_uses_gap_rate(): + gap = np.array([1_000_000_000, 2_000_000_000, 4], dtype=np.int64) + expected_output = np.array( + [1_000_000_000, 1_250_000_000, 1_500_000_000, 1_750_000_000], dtype=np.int64 + ) + assert np.array_equal(generate_missing_timestamps(gap), expected_output) + + legacy_gap = np.array([1_000_000_000, 2_000_000_000], dtype=np.int64) + legacy_expected = np.array([1_000_000_000, 1_500_000_000], dtype=np.int64) + assert np.array_equal(generate_missing_timestamps(legacy_gap), legacy_expected) + + def test_generate_timeline(): epoch_test = generate_test_epoch( 3, [VecSec.FOUR_VECS_PER_S], gaps=[[0.5, 1], [2, 3]] @@ -504,6 +528,49 @@ def test_generate_timeline(): expected_edge = np.array([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4]) * 1e9 assert np.array_equal(output_edge, expected_edge) + # Test Case: Gap fill uses the gap rate instead of a fixed 0.5 second cadence + epoch_rate_gap = np.array([0, 0.25, 0.5, 0.75, 1, 4, 4.25, 4.5]) * 1e9 + gaps_rate_gap = np.array([[1_000_000_000, 4_000_000_000, 4]]) + output_rate_gap = generate_timeline(epoch_rate_gap, gaps_rate_gap) + expected_rate_gap = ( + np.array( + [ + 0, + 0.25, + 0.5, + 0.75, + 1, + 1.25, + 1.5, + 1.75, + 2, + 2.25, + 2.5, + 2.75, + 3, + 3.25, + 3.5, + 3.75, + 4, + 4.25, + 4.5, + ] + ) + * 1e9 + ) + assert np.array_equal(output_rate_gap, expected_rate_gap) + + # Test Case: Adjacent gaps share a real epoch boundary at gap[0]. + # generate_timeline() should not duplicate that boundary timestamp: + # np.arange() is end-exclusive for each generated segment, and the + # epoch-copy/final-append logic preserves the real boundary sample once. + epoch_adj = np.array([0, 0.5, 1, 2, 3, 3.5, 4]) * 1e9 + gaps_adj = np.array([[1e9, 2e9, 2], [2e9, 3e9, 4]]) + output_adj = generate_timeline(epoch_adj, gaps_adj) + expected_adj = np.array([0, 0.5, 1, 1.5, 2, 2.25, 2.5, 2.75, 3, 3.5, 4]) * 1e9 + assert np.array_equal(output_adj, expected_adj) + assert len(output_adj) == len(np.unique(output_adj)) + def test_gap_detection_timeline_generation_workflow(): # Create a test dataset with gaps diff --git a/imap_processing/tests/mag/test_mag_validation.py b/imap_processing/tests/mag/test_mag_validation.py index 696383495d..14c5a79228 100644 --- a/imap_processing/tests/mag/test_mag_validation.py +++ b/imap_processing/tests/mag/test_mag_validation.py @@ -260,10 +260,6 @@ def test_mag_l1b_validation(test_number, mocks): @pytest.mark.parametrize(("sensor"), ["mago", "magi"]) @pytest.mark.external_test_data def test_mag_l1c_validation(test_number, sensor): - if test_number not in ["013", "014", "024"]: - pytest.skip("All L1C edge cases are not yet complete") - - # We expect tests 013 and 014 to pass. 015 and 016 are not yet complete. # timestamp = ( # (np.datetime64("2025-03-11T12:22:50.706034") - np.datetime64(TTJ2000_EPOCH)) # / np.timedelta64(1, "ns") @@ -308,6 +304,7 @@ def test_mag_l1c_validation(test_number, sensor): expected_output = pd.read_csv( source_directory / f"mag-l1b-l1c-t{test_number}-{sensor}-normal-out.csv" ) + assert len(expected_output.index) == len(l1c["epoch"].data) for index in expected_output.index: assert np.allclose( diff --git a/poetry.lock b/poetry.lock index a72f1673d2..3c286eb122 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1082,6 +1082,16 @@ files = [ {file = "netCDF4-1.7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:572f71459ef4b30e8554dcc4e1e6f55de515acc82a50968b48fe622244a64548"}, {file = "netCDF4-1.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f77e72281acc5f331f82271e5f7f014d46f5ca9bcaa5aafe3e46d66cee21320"}, {file = "netCDF4-1.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:d0fa7a9674fae8ae4877e813173c3ff7a6beee166b8730bdc847f517b282ed31"}, + {file = "netcdf4-1.7.2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:16c3ba053930ed990e58827de6ab03184e407549004fb77438b98e5777e8cf3b"}, + {file = "netcdf4-1.7.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:142c9ed2db8a87a15ae0530c8a99f4f045435b0f495df733e9f111995e389d4f"}, + {file = "netcdf4-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76cb3bbbbe4cd5fca612578eb105c16217380f7f93af2b549e8f38296bc906bb"}, + {file = "netcdf4-1.7.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:835ae7bcef666c967241baeeee9bef9376ddb7527297b24735597131f6f628e2"}, + {file = "netcdf4-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:73bd7eda3cefb04c4076e76911f652f5ed56bf434e0a3958e367932953437557"}, + {file = "netcdf4-1.7.2-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:7e81c3c47f2772eab0b93fba8bb05b17b58dce17720e1bed25e9d76551deecd0"}, + {file = "netcdf4-1.7.2-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:cb2791dba37fc98fd1ac4e236c97822909f54efbcdf7f1415c9777810e0a28f4"}, + {file = "netcdf4-1.7.2-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf11480f6b8a5b246818ffff6b4d90481e51f8b9555b41af0c372eb0aaf8b65f"}, + {file = "netcdf4-1.7.2-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ccc05328a8ff31921b539821791aeb20b054879f3fdf6d1d505bf6422824fec"}, + {file = "netcdf4-1.7.2-cp311-abi3-win_amd64.whl", hash = "sha256:999bfc4acebf400ed724d5e7329e2e768accc7ee1fa1d82d505da782f730301b"}, {file = "netcdf4-1.7.2.tar.gz", hash = "sha256:a4c6375540b19989896136943abb6d44850ff6f1fa7d3f063253b1ad3f8b7fce"}, ]