Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llmc/compression/quantization/quarot.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def get_orthogonal_matrix(self):
raise ValueError(f'Unsupported mode {self.mode}')

def block_transform(self, block):
logger.info(f'Start transform the {self.block_idx+1}-th block')
logger.info(f'Start transform the {self.block_idx + 1}-th block')

if self.online_rotate:
self.replace_rotate_linears(block)
Expand All @@ -108,7 +108,7 @@ def block_transform(self, block):
gc.collect()

logger.info(f'block:{block}')
logger.info(f'End transform the {self.block_idx+1}-th block')
logger.info(f'End transform the {self.block_idx + 1}-th block')

@torch.no_grad()
def subset_transform(self, block, subset):
Expand Down
4 changes: 2 additions & 2 deletions llmc/compression/quantization/spqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def block_transform_true_sequential(self, block, input_feat):

@torch.no_grad()
def block_transform(self, block, input_feat, *block_kwargs):
logger.info(f'Start transform the {self.block_idx+1}-th block')
logger.info(f'Start transform the {self.block_idx + 1}-th block')

if self.true_sequential:
self.block_transform_true_sequential(block, input_feat)
Expand All @@ -103,7 +103,7 @@ def block_transform(self, block, input_feat, *block_kwargs):
self.get_replacement_params(mode='fake_quant', w_only=True),
)

logger.info(f'End transform the {self.block_idx+1}-th block')
logger.info(f'End transform the {self.block_idx + 1}-th block')

@torch.no_grad()
def subset_transform(self, layers_dict):
Expand Down
8 changes: 4 additions & 4 deletions llmc/compression/quantization/tesseraq.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def collect_block_qparams(self, block, input_feat):

@torch.no_grad()
def block_transform(self, block, input_feat, block_kwargs):
logger.info(f'Start transform the {self.block_idx+1}-th block')
logger.info(f'Start transform the {self.block_idx + 1}-th block')

with torch.no_grad():
block.float()
Expand Down Expand Up @@ -204,7 +204,7 @@ def block_transform(self, block, input_feat, block_kwargs):
if self.reduce_memory:
block.to(self.model_dtype)

logger.info(f'End transform the {self.block_idx+1}-th block')
logger.info(f'End transform the {self.block_idx + 1}-th block')

def tesseraq_train(self, block):
self.set_dynamic_tmp_quant(block, on=True)
Expand Down Expand Up @@ -273,8 +273,8 @@ def tesseraq_train(self, block):
norm = loss_scaler(loss, optimizer, parameters=params_r + params_s)

logger.info(
f'block {self.block_idx} iter {i+1} loss:{loss.item():5f} \
norm:{norm.item():4f} HR progress:{(1-thresholds[i])*100:1f}% '
f'block {self.block_idx} iter {i + 1} loss:{loss.item():5f} \
norm:{norm.item():4f} HR progress:{(1 - thresholds[i]) * 100:1f}% '
)
for p in params_r + params_s:
p.requires_grad = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def block_opt(self, block):
self.block_transform(block)

def block_transform(self, block, input_feat, block_kwargs):
logger.info(f'Start transform the {self.block_idx+1}-th block')
logger.info(f'Start transform the {self.block_idx + 1}-th block')
subsets = self.model.get_subsets_in_block(block)
for index, subset in enumerate(subsets):
if not self.filter_subset(subset):
Expand All @@ -174,7 +174,7 @@ def block_transform(self, block, input_feat, block_kwargs):
inspect_module,
subset_kwargs
)
logger.info(f'End transform the {self.block_idx+1}-th block')
logger.info(f'End transform the {self.block_idx + 1}-th block')

def filter_subset(self, subset):
return True
Expand Down
4 changes: 2 additions & 2 deletions llmc/compression/sparsification/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ def __init__(self, model, sparsity_config, input, padding_mask, config):
super().__init__(model, sparsity_config, input, padding_mask, config)

def block_transform(self, block):
logger.info(f'Start transform the {self.block_idx+1}-th block')
logger.info(f'Start transform the {self.block_idx + 1}-th block')
logger.info(block)
logger.info(f'End transform the {self.block_idx+1}-th block')
logger.info(f'End transform the {self.block_idx + 1}-th block')
2 changes: 1 addition & 1 deletion llmc/compression/sparsification/shortgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def block_opt(self, block):
self.input['data'] = output_feat

def block_transform(self, input_feat, output_feat):
logger.info(f'Start transform the {self.block_idx+1}-th block')
logger.info(f'Start transform the {self.block_idx + 1}-th block')
self.subset_transform(
input_feat,
output_feat
Expand Down
4 changes: 2 additions & 2 deletions llmc/eval/eval_vqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ def _adjust_config(task_dict):
gen_max_mem = torch.cuda.max_memory_allocated() / 1024 / 1024

logger.info(f'peak memory: {gen_max_mem:.1f} MB.')
logger.info(f'prefill average time: {prefill *1000:.1f} ms.')
logger.info(f'decode average time: {decode *1000:.1f} ms.')
logger.info(f'prefill average time: {prefill * 1000:.1f} ms.')
logger.info(f'decode average time: {decode * 1000:.1f} ms.')

if hasattr(lm, '_model'):
del lm._model
Expand Down
5 changes: 3 additions & 2 deletions llmc/models/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def get_subsets_in_block(self, block):
return self._get_subsets_fused(block)

def _get_subsets_legacy(self, block):
"""transformers <5.0: block.block_sparse_moe with ModuleList experts."""
"""Transformers <5.0: block.block_sparse_moe with ModuleList
experts."""
moe = block.block_sparse_moe
return [
{
Expand Down Expand Up @@ -106,7 +107,7 @@ def _get_subsets_legacy(self, block):
]

def _get_subsets_fused(self, block):
"""transformers >=5.0: block.mlp with fused MixtralExperts."""
"""Transformers >=5.0: block.mlp with fused MixtralExperts."""
moe = block.mlp
return [
{
Expand Down
Loading