From c53fedded8c0bf6688c258fcbd54865f4c9761ef Mon Sep 17 00:00:00 2001 From: Phani Velicheti Date: Tue, 3 Mar 2026 11:33:39 -0800 Subject: [PATCH] SP-2017: Performance optimizations --- tractor/lsqr_optimizer.py | 21 ++++++++++++++++----- tractor/optimize.py | 27 +++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/tractor/lsqr_optimizer.py b/tractor/lsqr_optimizer.py index 438305a4..165e2585 100644 --- a/tractor/lsqr_optimizer.py +++ b/tractor/lsqr_optimizer.py @@ -28,7 +28,8 @@ def _optimize_forcedphot_core( for um, dd in zip(umods, derivs): if um is None: continue - dd.append((um * scale, tim)) + dd.append((um , scale, tim)) # When you do um * scale, + # Tractor’s Patch.__mul__ allocates a new numpy array, increasing memory #logverb('forced phot: derivs', Time() - t0) if sky: # Sky derivatives are part of the image derivatives, so go @@ -369,7 +370,11 @@ def getUpdateDirection(self, tractor, allderivs, damp=0., priors=True, imgoffs = {} nextrow = 0 for param in allderivs: - for deriv, img in param: + for item in param: + if len(item) == 3: + deriv, deriv_scale, img = item + else: + deriv, img = item if img in imgoffs: continue imgoffs[img] = nextrow @@ -389,7 +394,14 @@ def getUpdateDirection(self, tractor, allderivs, damp=0., priors=True, RR = [] VV = [] WW = [] - for (deriv, img) in param: + for item in param: + + if len(item) == 3: + deriv, deriv_scale, img = item + else: + deriv, img = item + deriv_scale = 1.0 + inverrs = img.getInvError() (H, W) = img.shape row0 = imgoffs[img] @@ -409,7 +421,7 @@ def getUpdateDirection(self, tractor, allderivs, damp=0., priors=True, continue rows = row0 + pix[nz] #print('Adding derivative', deriv.getName(), 'for image', img.name) - vals = dimg.flat[nz] + vals = dimg.flat[nz] * deriv_scale w = inverrs[deriv.getSlice(img)].flat[nz] assert(vals.shape == w.shape) # if not scales_only: @@ -669,7 +681,6 @@ def getUpdateDirection(self, tractor, allderivs, damp=0., priors=True, return X, 1./np.array(var) return X - # def getParameterScales(self): # print(self.getName()+': Finding derivs...') # allderivs = self.getDerivs() diff --git a/tractor/optimize.py b/tractor/optimize.py index 601f734a..88e6eac2 100644 --- a/tractor/optimize.py +++ b/tractor/optimize.py @@ -3,7 +3,28 @@ from astrometry.util.ttime import Time from tractor.engine import logverb, OptResult, logmsg - +import numba + +@numba.njit(fastmath=True, nogil=True) +def fast_add_to(mod_img, patch_data, counts, x0, y0): + img_h, img_w = mod_img.shape + patch_h, patch_w = patch_data.shape + + # 1. Equivalent to get_overlapping_region for Y + y_start = max(0, -y0) + y_end = min(patch_h, img_h - y0) + + # 2. Equivalent to get_overlapping_region for X + x_start = max(0, -x0) + x_end = min(patch_w, img_w - x0) + + # 3. Add to image (avoids empty list checks, if start >= end, loop just doesn't run) + for y in range(y_start, y_end): + for x in range(x_start, x_end): + # mod_img[y0 + y, x0 + x] is the 'out' coordinate + # patch_data[y, x] is the 'in' coordinate + mod_img[y0 + y, x0 + x] += patch_data[y, x] * counts + class Optimizer(object): def optimize(self, tractor, alphas=None, damp=0, priors=True, scale_columns=True, shared_params=True, variance=False, @@ -221,6 +242,7 @@ def _get_umodels(self, tractor, srcs, imgs, minsb, rois, **kwargs): umodels.append(umods) return umodels, umodtosource, umodsforsource + def _optimize_forcedphot_core( self, tractor, result, umodels, imlist, mod0, scales, skyderivs, minFlux, @@ -531,7 +553,8 @@ def _getims(self, fluxes, imgs, umodels, mod0, scales, sky, minFlux, rois): assert(np.isfinite(counts)) assert(np.all(np.isfinite(um.patch))) # print 'Adding umod', um, 'with counts', counts, 'to mod', mod.shape - (um * counts).addTo(mod) + # (um * counts).addTo(mod) + fast_add_to(mod, um.patch, counts, um.x0, um.y0) ie = img.getInvError() im = img.getImage()