Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cms/db/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,9 @@ class Evaluation(Base):
nullable=False,
default=[])

# Admin-facing output from the grader.
admin_text: str | None = Column(String, nullable=True, default=None)

# Evaluation's time and wall-clock time, in seconds.
execution_time: float | None = Column(
Float,
Expand Down
12 changes: 10 additions & 2 deletions cms/grading/Job.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def __init__(
info: str | None = None,
success: bool | None = None,
text: list[str] | None = None,
admin_text: str | None = None,
files: dict[str, File] | None = None,
managers: dict[str, Manager] | None = None,
executables: dict[str, Executable] | None = None,
Expand Down Expand Up @@ -121,6 +122,8 @@ def __init__(
to be presented to the user. The first item is a string,
potentially with %-escaping; the following items are the
values to be %-formatted into the first.
admin_text: description of the outcome of the job,
to be shown to admins.
files: files submitted by the user.
managers: managers provided by the admins.
executables: executables created in the compilation.
Expand Down Expand Up @@ -155,6 +158,7 @@ def __init__(

self.success = success
self.text = text
self.admin_text = admin_text

self.files = files
self.managers = managers
Expand All @@ -178,6 +182,7 @@ def export_to_dict(self) -> dict:
'info': self.info,
'success': self.success,
'text': self.text,
'admin_text': self.admin_text,
'files': dict((k, v.digest)
for k, v in self.files.items()),
'managers': dict((k, v.digest)
Expand Down Expand Up @@ -316,6 +321,7 @@ def __init__(
compilation_success: bool | None = None,
executables: dict[str, Executable] | None = None,
text: list[str] | None = None,
admin_text: str | None = None,
plus: dict | None = None,
):
"""Initialization.
Expand All @@ -331,7 +337,7 @@ def __init__(
Job.__init__(self, operation, task_type, task_type_parameters,
language, multithreaded_sandbox, archive_sandbox,
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
text, files, managers, executables)
text, admin_text, files, managers, executables)
self.compilation_success = compilation_success
self.plus = plus

Expand Down Expand Up @@ -537,6 +543,7 @@ def __init__(
success: bool | None = None,
outcome: str | None = None,
text: list[str] | None = None,
admin_text: list[str] | None = None,
user_output: str | None = None,
plus: dict | None = None,
only_execution: bool | None = False,
Expand Down Expand Up @@ -567,7 +574,7 @@ def __init__(
Job.__init__(self, operation, task_type, task_type_parameters,
language, multithreaded_sandbox, archive_sandbox,
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
text, files, managers, executables)
text, admin_text, files, managers, executables)
self.input = input
self.output = output
self.time_limit = time_limit
Expand Down Expand Up @@ -653,6 +660,7 @@ def to_submission(self, sr: SubmissionResult):

sr.evaluations += [Evaluation(
text=self.text,
admin_text=self.admin_text,
outcome=self.outcome,
execution_time=self.plus.get('execution_time'),
execution_wall_clock_time=self.plus.get(
Expand Down
4 changes: 2 additions & 2 deletions cms/grading/scoretypes/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,8 @@ def get_html_details(
translation=translation,
gettext=_, ngettext=n_)
except Exception:
logger.error("Found an invalid score details string. "
"Try invalidating scores.")
logger.exception("Found an invalid score details string. "
"Try invalidating scores.")
return _("Score details temporarily unavailable.")

@abstractmethod
Expand Down
41 changes: 30 additions & 11 deletions cms/grading/steps/trusted.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,14 @@ def _sanitize_message(string: str) -> str:
return string.replace('%', '%%')


def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str], str]:
"""Extract the outcome and the text from the a standard manager output.

sandbox: the sandbox whose last execution was a manager writing
a standard manager output.

return: outcome and text.
return: outcome, contestant-facing text and admin-facing text
(not translated).

raise (ValueError): if cannot decode the data.
raise (FileNotFoundError): if any of the sandbox stdout or stderr file
Expand All @@ -108,6 +109,23 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
logger.error("Manager stderr (text) is malformed. %r", error)
raise error

# Parse special commands
admin_text = None
for line in stderr_file.readlines():
line = line.strip()
if not line:
continue

PREFIX = "ADMIN_MESSAGE:"
if line.startswith(PREFIX):
line = _sanitize_message(line[len(PREFIX):].strip())
if admin_text is not None:
admin_text = admin_text + " " + line
else:
admin_text = line
else:
logger.warning(f"Unknown special manager command `{line}`")

try:
outcome = float(outcome)
except ValueError:
Expand All @@ -125,7 +143,7 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
logger.warning("Manager asked to translate text, but string "
"'%s' is not recognized." % remaining)

return outcome, [text]
return outcome, [text], admin_text


def trusted_step(
Expand Down Expand Up @@ -213,7 +231,8 @@ def checker_step(
extra_args: extra arguments to pass to the checker.

return: success (true if the checker was able to check the solution
successfully), outcome and text (both None if success is False).
successfully), outcome, text and admin_text (all None if success
is False).

"""
# Check that the file we are going to inject in the sandbox are not already
Expand All @@ -224,12 +243,12 @@ def checker_step(
if sandbox.file_exists(filename):
logger.error("File %s already in the sandbox for the checker.",
filename)
return False, None, None
return False, None, None, None

# Copy the checker in the sandbox, after making sure it was provided.
if checker_digest is None:
logger.error("Configuration error: missing checker in task managers.")
return False, None, None
return False, None, None, None
sandbox.create_file_from_storage(CHECKER_FILENAME, checker_digest,
executable=True)

Expand All @@ -247,17 +266,17 @@ def checker_step(
if not box_success or not success:
logger.error("Sandbox failed during checker step. "
"See previous logs for the reason.")
return False, None, None
return False, None, None, None

# Extract outcome and text assuming a standard manager output.
try:
outcome, text = extract_outcome_and_text(sandbox)
outcome, text, admin_text = extract_outcome_and_text(sandbox)
except ValueError as e:
logger.error("Invalid output from checker: %s", e)
return False, None, None
return False, None, None, None
except FileNotFoundError as e:
# This should not happen, as the redirect is handled by the sandbox.
logger.error("Missing stdout or stderr file from checker: %s", e)
return False, None, None
return False, None, None, None

return True, outcome, text
return True, outcome, text, admin_text
27 changes: 20 additions & 7 deletions cms/grading/steps/whitediff.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,28 +89,40 @@ def _white_diff(output: typing.BinaryIO, res: typing.BinaryIO) -> bool:

"""

line = 0

while True:
lout = output.readline()
lres = res.readline()
line += 1

# Both files finished: comparison succeded
if len(lres) == 0 and len(lout) == 0:
return True
return True, None

# Only one file finished: ok if the other contains only blanks
elif len(lres) == 0 or len(lout) == 0:
lout = lout.strip(b''.join(_WHITES))
lres = lres.strip(b''.join(_WHITES))
if len(lout) > 0 or len(lres) > 0:
return False
if len(lout) > 0:
return False, "Contestant output too long"
if len(lres) > 0:
return False, "Contestant output too short"

# Both file still have lines to go: ok if they agree except
# for the number of whitespaces
else:
lout = _white_diff_canonicalize(lout)
lres = _white_diff_canonicalize(lres)
if lout != lres:
return False
LENGTH_LIMIT = 100
if len(lout) > LENGTH_LIMIT:
lout = lout[:LENGTH_LIMIT] + "..."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're adding a bytes to a str here, no? could you also add a unit test that covers this case?

if len(lres) > LENGTH_LIMIT:
lres = lres[:LENGTH_LIMIT] + "..."
lout = lout.decode("utf-8", errors='backslashreplace')
lres = lres.decode("utf-8", errors='backslashreplace')
return False, f"Expected `{lres}`, found `{lout}` on line {line}"


def white_diff_fobj_step(
Expand All @@ -129,10 +141,11 @@ def white_diff_fobj_step(
return: the outcome as above and a description text.

"""
if _white_diff(output_fobj, correct_output_fobj):
return 1.0, [EVALUATION_MESSAGES.get("success").message]
correct, admin_text = _white_diff(output_fobj, correct_output_fobj)
if correct:
return 1.0, [EVALUATION_MESSAGES.get("success").message], admin_text
else:
return 0.0, [EVALUATION_MESSAGES.get("wrong").message]
return 0.0, [EVALUATION_MESSAGES.get("wrong").message], admin_text


def white_diff_step(
Expand Down
5 changes: 4 additions & 1 deletion cms/grading/tasktypes/Batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,12 @@ def _execution_step(self, job, file_cacher):
return outcome, text, output_file_params, stats, box_success, sandbox

def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, stats, box_success, sandbox, extra_args):
admin_text = None

if box_success:
assert (output_file_params is None) == (outcome is not None)
if output_file_params is not None:
box_success, outcome, text = eval_output(
box_success, outcome, text, admin_text = eval_output(
file_cacher, job,
self.CHECKER_CODENAME
if self._uses_checker() else None,
Expand All @@ -378,6 +380,7 @@ def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, st
job.outcome = str(outcome) if outcome is not None else None
job.text = text
job.plus = stats
job.admin_text = admin_text

if sandbox is not None:
delete_sandbox(sandbox, job)
Expand Down
4 changes: 3 additions & 1 deletion cms/grading/tasktypes/Communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ def evaluate(self, job, file_cacher):
and box_success_mgr and evaluation_success_mgr
outcome = None
text = None
admin_text = None

# If at least one sandbox had problems, or the manager did not
# terminate correctly, we report an error (and no need for user stats).
Expand All @@ -415,7 +416,7 @@ def evaluate(self, job, file_cacher):

# Otherwise, we use the manager to obtain the outcome.
else:
outcome, text = extract_outcome_and_text(sandbox_mgr)
outcome, text, admin_text = extract_outcome_and_text(sandbox_mgr)

# If asked so, save the output file with additional information,
# provided that it exists.
Expand All @@ -433,6 +434,7 @@ def evaluate(self, job, file_cacher):
job.outcome = "%s" % outcome if outcome is not None else None
job.text = text
job.plus = stats_user
job.admin_text = admin_text

delete_sandbox(sandbox_mgr, job)
for s in sandbox_user:
Expand Down
3 changes: 2 additions & 1 deletion cms/grading/tasktypes/OutputOnly.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def evaluate(self, job, file_cacher):
return

# First and only step: eval the user output.
box_success, outcome, text = eval_output(
box_success, outcome, text, admin_text = eval_output(
file_cacher, job,
OutputOnly.CHECKER_CODENAME if self._uses_checker() else None,
user_output_digest=job.files[user_output_filename].digest)
Expand All @@ -133,5 +133,6 @@ def evaluate(self, job, file_cacher):
job.success = box_success
job.outcome = str(outcome) if outcome is not None else None
job.text = text
job.admin_text = admin_text
# There is no actual evaluation, so no statistics.
job.plus = {} if box_success else None
4 changes: 3 additions & 1 deletion cms/grading/tasktypes/TwoSteps.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ def evaluate(self, job, file_cacher):

outcome = None
text = None
admin_text = None

# Error in the sandbox: nothing to do!
if not box_success:
Expand Down Expand Up @@ -333,7 +334,7 @@ def evaluate(self, job, file_cacher):

# Otherwise evaluate the output file.
else:
box_success, outcome, text = eval_output(
box_success, outcome, text, admin_text = eval_output(
file_cacher, job,
TwoSteps.CHECKER_CODENAME
if self._uses_checker() else None,
Expand All @@ -344,6 +345,7 @@ def evaluate(self, job, file_cacher):
job.success = box_success
job.outcome = str(outcome) if outcome is not None else None
job.text = text
job.admin_text = admin_text
job.plus = stats

delete_sandbox(first_sandbox, job)
Expand Down
16 changes: 8 additions & 8 deletions cms/grading/tasktypes/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def eval_output(
user_output_digest: str | None = None,
user_output_filename: str = "",
extra_args: list[str] | None = None
) -> tuple[bool, float | None, list[str] | None]:
) -> tuple[bool, float | None, list[str] | None, str]:
"""Evaluate ("check") a user output using a white diff or a checker.
file_cacher: file cacher to use to get files.
Expand All @@ -237,8 +237,8 @@ def eval_output(
extra_args: additional arguments to pass to the checker
return: tuple of success (true if the checker was
able to check the solution successfully), outcome and text (both None
if success is False).
able to check the solution successfully), outcome, text and admin_text
(both None if success is False).
"""
if (user_output_path is None) == (user_output_digest is None):
Expand All @@ -256,7 +256,7 @@ def eval_output(

if checker_codename is not None:
if not check_manager_present(job, checker_codename):
return False, None, None
return False, None, None, None

# Create a brand-new sandbox just for checking.
sandbox = create_sandbox(file_cacher, name="check")
Expand All @@ -275,12 +275,12 @@ def eval_output(

checker_digest = job.managers[checker_codename].digest \
if checker_codename in job.managers else None
success, outcome, text = checker_step(
success, outcome, text, admin_text = checker_step(
sandbox, checker_digest, job.input, job.output,
EVAL_USER_OUTPUT_FILENAME, extra_args)

delete_sandbox(sandbox, job, success)
return success, outcome, text
return success, outcome, text, admin_text

else:
if user_output_path is not None:
Expand All @@ -289,6 +289,6 @@ def eval_output(
user_output_fobj = file_cacher.get_file(user_output_digest)
with user_output_fobj:
with file_cacher.get_file(job.output) as correct_output_fobj:
outcome, text = white_diff_fobj_step(
outcome, text, admin_text = white_diff_fobj_step(
user_output_fobj, correct_output_fobj)
return True, outcome, text
return True, outcome, text, admin_text
Loading
Loading