diff --git a/CodenameOne/src/com/codename1/annotations/Simd.java b/CodenameOne/src/com/codename1/annotations/Simd.java new file mode 100644 index 0000000000..dc67a09c36 --- /dev/null +++ b/CodenameOne/src/com/codename1/annotations/Simd.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012, Codename One and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Codename One designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Codename One through http://www.codenameone.com/ if you + * need additional information or have any questions. + */ +package com.codename1.annotations; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/// Helper annotations for SIMD/vectorization hints. +/// +/// These are intentionally hints only: runtimes/translators may ignore them, +/// and code should remain correct and performant without relying on them. +@SuppressWarnings("PMD.MissingStaticMethodInNonInstantiatableClass") +public final class Simd { + + /// Prohibited default constructor. + private Simd() { + throw new AssertionError("Simd should not be instantiated"); + } + + /// Marks a method as a SIMD vectorization candidate. + @Retention(RetentionPolicy.CLASS) + @Target({ElementType.METHOD, ElementType.CONSTRUCTOR}) + public @interface Candidate { + } + + /// Marks a method as likely containing a reduction pattern + /// (e.g. sum/min/max over an array). + @Retention(RetentionPolicy.CLASS) + @Target({ElementType.METHOD, ElementType.CONSTRUCTOR}) + public @interface Reduction { + } + + /// Optional preferred SIMD lane count for vectorized code generation. + /// + /// This is a hint only; translators may pick a different width based on + /// target architecture and ABI constraints. + @Retention(RetentionPolicy.CLASS) + @Target({ElementType.METHOD, ElementType.CONSTRUCTOR}) + public @interface WidthHint { + int value(); + } +} diff --git a/CodenameOne/src/com/codename1/util/Base64.java b/CodenameOne/src/com/codename1/util/Base64.java index 21a0da0f83..cf9cc8be3e 100644 --- a/CodenameOne/src/com/codename1/util/Base64.java +++ b/CodenameOne/src/com/codename1/util/Base64.java @@ -19,6 +19,8 @@ package com.codename1.util; +import com.codename1.annotations.Simd; + /// This class implements Base64 encoding/decoding functionality /// as specified in RFC 2045 (http://www.ietf.org/rfc/rfc2045.txt). public abstract class Base64 { @@ -184,6 +186,8 @@ public static int decode(byte[] in, byte[] out) { return decode(in, in.length, out); } + @Simd.Candidate + @Simd.WidthHint(16) private static int decodeNoWhitespace(byte[] in, int len, byte[] out) { if ((len & 0x3) != 0) { return -1; @@ -334,6 +338,8 @@ public static String encodeNoNewline(byte[] in) { * @param out destination buffer * @return number of bytes written to {@code out} */ + @Simd.Candidate + @Simd.WidthHint(16) public static int encodeNoNewline(byte[] in, byte[] out) { int inputLength = in.length; int outputLength = ((inputLength + 2) / 3) * 4; diff --git a/docs/developer-guide/performance.asciidoc b/docs/developer-guide/performance.asciidoc index e787ec7e78..b9c9249d6d 100644 --- a/docs/developer-guide/performance.asciidoc +++ b/docs/developer-guide/performance.asciidoc @@ -281,3 +281,205 @@ contactsDemo.addScrollListener(new ScrollListener() { ---- NOTE: Due to technical constraints we can't use a lambda in this specific case... + +==== SIMD Hint Annotations (ParparVM) + +SIMD stands for *Single Instruction, Multiple Data*. It is a CPU capability that lets one +instruction operate on multiple array elements at once (also called “lanes”). +For data-parallel code (e.g. transforms, codecs, DSP, image math), SIMD can reduce loop +overhead and improve throughput. + +Conceptually, this scalar loop: + +[source,java] +---- +for (int i = 0; i < n; i++) { + out[i] = (byte)(in[i] ^ 0x5a); +} +---- + +may be lowered by an optimizer into a vector loop plus a scalar tail: + +[source,java] +---- +int i = 0; +int vecEnd = n - (n % 16); // vector width example +for (; i < vecEnd; i += 16) { + // vectorized body over lanes [i ... i+15] +} +for (; i < n; i++) { + // scalar tail for remaining elements +} +---- + +ParparVM includes optional SIMD hint annotations that you can use to mark hot methods as vectorization candidates. + +These hints are advisory only: + +* They do **not** change correctness/semantics. +* They may be ignored on runtimes/targets that don't support a given optimization. +* You should still write clean scalar code first, then add hints where profiling shows bottlenecks. + +The annotations are in `com.codename1.annotations.Simd`: + +* `@Simd.Candidate` - marks a method as a likely SIMD candidate. +* `@Simd.Reduction` - marks methods containing reductions (e.g. sum/min/max loops). +* `@Simd.WidthHint(n)` - suggests a preferred SIMD lane width. + +===== `@Simd.Candidate` + +Use this when the method has a clear data-parallel loop where each iteration is mostly +independent (map/transform style operations). + +[source,java] +---- +@Simd.Candidate +public static int xorTransform(byte[] in, byte[] out) { + int n = Math.min(in.length, out.length); + for (int i = 0; i < n; i++) { + out[i] = (byte)(in[i] ^ 0x5a); + } + return n; +} +---- + +===== `@Simd.Reduction` + +Use this for accumulation patterns that combine many values into one result +(sum/min/max/dot product). Reductions often require special vector handling. + +[source,java] +---- +@Simd.Candidate +@Simd.Reduction +public static int sum(int[] values) { + int s = 0; + for (int i = 0; i < values.length; i++) { + s += values[i]; + } + return s; +} +---- + +===== `@Simd.WidthHint(n)` + +Use this to suggest a preferred lane count (for example 16-byte chunks for byte-oriented work). +This is only a hint; the runtime/translator may choose a different width. + +[source,java] +---- +@Simd.Candidate +@Simd.WidthHint(16) +public static int encodeChunked(byte[] in, byte[] out) { + // regular scalar implementation; translator may use hint for vector planning + int n = Math.min(in.length, out.length); + for (int i = 0; i < n; i++) { + out[i] = in[i]; + } + return n; +} +---- + +===== Using hints together + +You can combine hints when appropriate: + +[source,java] +---- +@Simd.Candidate +@Simd.Reduction +@Simd.WidthHint(8) +public static long sumLongs(long[] values) { + long s = 0L; + for (int i = 0; i < values.length; i++) { + s += values[i]; + } + return s; +} +---- + +===== What to avoid / FAQ + +SIMD hints work best when loops are regular and predictable. They are much less effective +when code has complex control flow, aliasing uncertainty, or side effects in the hot loop. + +*Avoid these patterns in the hot loop body when possible:* + +* Per-iteration object allocation. +* Method calls with unknown side effects. +* Multiple unpredictable branches in the same loop. +* Mixing unrelated work (I/O/logging/UI updates) with data-parallel math. + +*What if a method contains both SIMD-friendly and non-SIMD code?* + +That is common and fine. Prefer extracting the SIMD-friendly loop into a small helper method +and annotate that helper, while leaving orchestration/error handling in the caller: + +[source,java] +---- +public static int process(byte[] in, byte[] out) { + // setup/validation/non-SIMD control flow + int n = Math.min(in.length, out.length); + int written = processVectorFriendly(in, out, n); // SIMD-candidate helper + // non-SIMD post-processing + return written; +} + +@Simd.Candidate +@Simd.WidthHint(16) +private static int processVectorFriendly(byte[] in, byte[] out, int n) { + for (int i = 0; i < n; i++) { + out[i] = (byte)(in[i] ^ 0x5a); + } + return n; +} +---- + +*Should I annotate everything that has a loop?* + +No. Use profiling first and annotate genuine hot spots. Over-annotation adds noise and makes it +harder to tell where optimization effort should focus. + +*Do hints guarantee SIMD code generation?* + +No. They are hints, not directives. Translator/runtime safety checks and target capabilities +still decide whether vectorization is legal and profitable. + +[source,java] +---- +import com.codename1.annotations.Simd; + +public class FastOps { + @Simd.Candidate + @Simd.WidthHint(16) + public static int transform(byte[] in, byte[] out) { + int n = Math.min(in.length, out.length); + for (int i = 0; i < n; i++) { + out[i] = (byte)(in[i] ^ 0x5a); + } + return n; + } + + @Simd.Candidate + @Simd.Reduction + public static int sum(int[] values) { + int s = 0; + for (int i = 0; i < values.length; i++) { + s += values[i]; + } + return s; + } +} +---- + +Current ParparVM stages primarily consume these hints as optimizer metadata and diagnostics. +As SIMD passes mature, this same API will continue to be the forward-compatible way to provide intent. + +When generating C for eligible SIMD-candidate methods, ParparVM can also emit +Clang ARM/AArch64 NEON target pragmas, but this path is disabled by default and +must be explicitly enabled with `CN1_ENABLE_SIMD_PRAGMAS` at compile time. + +ParparVM now also emits a weak SIMD hook symbol for eligible methods +(`cn1_simd_`). Platform ports can provide a strong +implementation for this symbol to run a real SIMD/NEON fast-path, while keeping +the generated scalar method body as the default fallback. diff --git a/vm/ByteCodeTranslator/src/com/codename1/tools/translator/BytecodeMethod.java b/vm/ByteCodeTranslator/src/com/codename1/tools/translator/BytecodeMethod.java index 48e68ac973..3b35dfea22 100644 --- a/vm/ByteCodeTranslator/src/com/codename1/tools/translator/BytecodeMethod.java +++ b/vm/ByteCodeTranslator/src/com/codename1/tools/translator/BytecodeMethod.java @@ -101,6 +101,9 @@ public static void setDependencyGraph(MethodDependencyGraph dependencyGraph) { private static boolean acceptStaticOnEquals; private int methodOffset; private boolean forceVirtual; + private boolean simdCandidateHint; + private boolean simdReductionHint; + private int simdWidthHint = -1; private boolean virtualOverriden; private boolean finalMethod; private boolean synchronizedMethod; @@ -835,6 +838,10 @@ public void appendMethodC(StringBuilder b) { if(nativeMethod) { return; } + boolean emitSimdTargetPragmas = isSimdEligibleForCodegen(); + if (emitSimdTargetPragmas) { + appendSimdTargetPragmaPush(b); + } appendCMethodPrefix(b, ""); b.append(" {\n"); if(eliminated) { @@ -843,10 +850,16 @@ public void appendMethodC(StringBuilder b) { } else { b.append(" return 0;\n}\n\n"); } + if (emitSimdTargetPragmas) { + appendSimdTargetPragmaPop(b); + } return; } - + b.append(declaration); + if (isSimdEligibleForCodegen()) { + appendSimdHookCall(b); + } boolean hasInstructions = true; if(optimizerOn) { @@ -996,6 +1009,9 @@ public void appendMethodC(StringBuilder b) { } else { b.append(" return 0;\n}\n\n"); } + if (emitSimdTargetPragmas) { + appendSimdTargetPragmaPop(b); + } return; } Instruction inst = instructions.get(instructions.size() - 1); @@ -1016,6 +1032,9 @@ public void appendMethodC(StringBuilder b) { b.append(" return 0;\n}\n\n"); } } + if (emitSimdTargetPragmas) { + appendSimdTargetPragmaPop(b); + } } public void appendInterfaceMethodC(StringBuilder b) { @@ -1502,6 +1521,17 @@ public void setEliminated(boolean eliminated) { boolean optimize() { + if (simdCandidateHint) { + enforceValidSimdCandidate(); + } else if (simdReductionHint) { + throw new IllegalStateException("SIMD annotation validation failed for " + clsName + "." + + methodName + desc + ": @Simd.Reduction requires @Simd.Candidate"); + } + + if (ByteCodeTranslator.verbose && hasSimdHints()) { + logSimdHintStatus(getSimdIneligibilityReason()); + } + int instructionCount = instructions.size(); // optimize away a method that only contains the void return instruction e.g. blank constructors etc. @@ -2385,6 +2415,233 @@ public String getSignature() { return desc; } + public void setSimdCandidateHint(boolean simdCandidateHint) { + this.simdCandidateHint = simdCandidateHint; + } + + public boolean isSimdCandidateHint() { + return simdCandidateHint; + } + + public void setSimdReductionHint(boolean simdReductionHint) { + this.simdReductionHint = simdReductionHint; + } + + public boolean isSimdReductionHint() { + return simdReductionHint; + } + + public void setSimdWidthHint(int simdWidthHint) { + this.simdWidthHint = simdWidthHint > 0 ? simdWidthHint : -1; + } + + public int getSimdWidthHint() { + return simdWidthHint; + } + + public boolean hasSimdHints() { + return simdCandidateHint || simdReductionHint || simdWidthHint > 0; + } + + public String getSimdHintSummary() { + StringBuilder out = new StringBuilder(); + if (simdCandidateHint) { + out.append("candidate"); + } + if (simdReductionHint) { + if (out.length() > 0) { + out.append(", "); + } + out.append("reduction"); + } + if (simdWidthHint > 0) { + if (out.length() > 0) { + out.append(", "); + } + out.append("width=").append(simdWidthHint); + } + if (out.length() == 0) { + out.append("none"); + } + return out.toString(); + } + + private void logSimdHintStatus(String reason) { + String methodId = clsName + "." + methodName + desc; + if (reason == null || reason.length() == 0) { + System.out.println("SIMD hints accepted for " + methodId + ": " + getSimdHintSummary()); + } else { + System.out.println("SIMD hints noted but not currently vectorization-ready for " + methodId + + ": " + getSimdHintSummary() + " (" + reason + ")"); + } + } + + private void enforceValidSimdCandidate() { + String reason = getSimdIneligibilityReason(); + if (reason.length() == 0) { + return; + } + throw new IllegalStateException("SIMD annotation validation failed for " + clsName + "." + + methodName + desc + ": " + reason); + } + + private String getSimdIneligibilityReason() { + StringBuilder reason = new StringBuilder(); + if (nativeMethod || abstractMethod) { + appendReason(reason, "native/abstract method"); + } + if (synchronizedMethod) { + appendReason(reason, "synchronized method"); + } + if (hasExceptionHandlingOrMethodCalls()) { + appendReason(reason, "complex control flow or method calls"); + } + if (!hasArrayAccessOpcode()) { + appendReason(reason, "no primitive/object array access opcodes found"); + } + if (simdReductionHint && !hasReductionOpcode()) { + appendReason(reason, "marked reduction but no reduction-like arithmetic ops found"); + } + return reason.toString(); + } + + private boolean isSimdEligibleForCodegen() { + return simdCandidateHint && getSimdIneligibilityReason().length() == 0; + } + + private String getSimdHookName() { + StringBuilder out = new StringBuilder(); + out.append("cn1_simd_"); + out.append(clsName); + out.append("_"); + out.append(getCMethodName()); + out.append("__"); + for (ByteCodeMethodArg args : arguments) { + args.appendCMethodExt(out); + } + if (!returnType.isVoid()) { + out.append("_R"); + returnType.appendCMethodExt(out); + } + return out.toString(); + } + + private void appendSimdHookCall(StringBuilder b) { + String simdHookName = getSimdHookName(); + b.append(" #if defined(__GNUC__) || defined(__clang__)\n"); + b.append(" extern "); + returnType.appendCSig(b); + b.append(" ").append(simdHookName).append("(CODENAME_ONE_THREAD_STATE"); + int arg = 1; + if (!staticMethod) { + b.append(", "); + new ByteCodeMethodArg(clsName, 0).appendCSig(b); + b.append(" __cn1ThisObject"); + } + for (ByteCodeMethodArg args : arguments) { + b.append(", "); + args.appendCSig(b); + b.append("__cn1Arg"); + b.append(arg++); + } + b.append(") __attribute__((weak));\n"); + b.append(" if (").append(simdHookName).append(") {\n"); + if (!returnType.isVoid()) { + b.append(" return "); + } else { + b.append(" "); + } + b.append(simdHookName).append("(threadStateData"); + arg = 1; + if (!staticMethod) { + b.append(", __cn1ThisObject"); + } + for (int i = 0; i < arguments.size(); i++) { + b.append(", __cn1Arg").append(arg++); + } + b.append(");\n"); + if (returnType.isVoid()) { + b.append(" return;\n"); + } + b.append(" }\n"); + b.append(" #endif\n"); + } + + private static void appendSimdTargetPragmaPush(StringBuilder b) { + b.append("#if defined(CN1_ENABLE_SIMD_PRAGMAS) && defined(__clang__) && (defined(__arm__) || defined(__aarch64__))\n"); + b.append("#pragma clang attribute push(__attribute__((target(\"neon\"))), apply_to=function)\n"); + b.append("#endif\n"); + } + + private static void appendSimdTargetPragmaPop(StringBuilder b) { + b.append("#if defined(CN1_ENABLE_SIMD_PRAGMAS) && defined(__clang__) && (defined(__arm__) || defined(__aarch64__))\n"); + b.append("#pragma clang attribute pop\n"); + b.append("#endif\n"); + } + + private static void appendReason(StringBuilder sb, String value) { + if (sb.length() > 0) { + sb.append("; "); + } + sb.append(value); + } + + private boolean hasArrayAccessOpcode() { + for (Instruction ins : instructions) { + switch (ins.getOpcode()) { + case Opcodes.IALOAD: + case Opcodes.LALOAD: + case Opcodes.FALOAD: + case Opcodes.DALOAD: + case Opcodes.AALOAD: + case Opcodes.BALOAD: + case Opcodes.CALOAD: + case Opcodes.SALOAD: + case Opcodes.IASTORE: + case Opcodes.LASTORE: + case Opcodes.FASTORE: + case Opcodes.DASTORE: + case Opcodes.AASTORE: + case Opcodes.BASTORE: + case Opcodes.CASTORE: + case Opcodes.SASTORE: + return true; + default: + break; + } + } + return false; + } + + private boolean hasReductionOpcode() { + for (Instruction ins : instructions) { + switch (ins.getOpcode()) { + case Opcodes.IADD: + case Opcodes.LADD: + case Opcodes.FADD: + case Opcodes.DADD: + case Opcodes.ISUB: + case Opcodes.LSUB: + case Opcodes.FSUB: + case Opcodes.DSUB: + case Opcodes.IMUL: + case Opcodes.LMUL: + case Opcodes.FMUL: + case Opcodes.DMUL: + case Opcodes.IAND: + case Opcodes.LAND: + case Opcodes.IOR: + case Opcodes.LOR: + case Opcodes.IXOR: + case Opcodes.LXOR: + return true; + default: + break; + } + } + return false; + } + @Override public SignatureSet nextSignature() { return null; diff --git a/vm/ByteCodeTranslator/src/com/codename1/tools/translator/Parser.java b/vm/ByteCodeTranslator/src/com/codename1/tools/translator/Parser.java index 591aa2bff1..c041bdf6b1 100644 --- a/vm/ByteCodeTranslator/src/com/codename1/tools/translator/Parser.java +++ b/vm/ByteCodeTranslator/src/com/codename1/tools/translator/Parser.java @@ -740,6 +740,9 @@ public void visit(int version, int access, String name, String signature, String } class MethodVisitorWrapper extends MethodVisitor { + private static final String SIMD_CANDIDATE_DESC = "Lcom/codename1/annotations/Simd$Candidate;"; + private static final String SIMD_REDUCTION_DESC = "Lcom/codename1/annotations/Simd$Reduction;"; + private static final String SIMD_WIDTH_HINT_DESC = "Lcom/codename1/annotations/Simd$WidthHint;"; private final BytecodeMethod mtd; public MethodVisitorWrapper(MethodVisitor mv, BytecodeMethod mtd) { super(Opcodes.ASM9, mv); @@ -1198,7 +1201,30 @@ public AnnotationVisitor visitTypeAnnotation(int typeRef, TypePath typePath, Str @Override public AnnotationVisitor visitAnnotation(String desc, boolean visible) { if (mv == null) return null; - return new AnnotationVisitorWrapper(super.visitAnnotation(desc, visible)); + if (SIMD_CANDIDATE_DESC.equals(desc)) { + mtd.setSimdCandidateHint(true); + } else if (SIMD_REDUCTION_DESC.equals(desc)) { + mtd.setSimdReductionHint(true); + } + + AnnotationVisitor base = super.visitAnnotation(desc, visible); + AnnotationVisitor wrapped = new AnnotationVisitorWrapper(base); + if (!SIMD_WIDTH_HINT_DESC.equals(desc)) { + return wrapped; + } + + return new AnnotationVisitor(Opcodes.ASM9, wrapped) { + @Override + public void visit(String name, Object value) { + if ("value".equals(name) && value instanceof Integer) { + int widthHint = ((Integer)value).intValue(); + if (widthHint > 0) { + mtd.setSimdWidthHint(widthHint); + } + } + super.visit(name, value); + } + }; } @Override diff --git a/vm/tests/src/test/java/com/codename1/tools/translator/BytecodeMethodSimdHintTest.java b/vm/tests/src/test/java/com/codename1/tools/translator/BytecodeMethodSimdHintTest.java new file mode 100644 index 0000000000..22db16e2b7 --- /dev/null +++ b/vm/tests/src/test/java/com/codename1/tools/translator/BytecodeMethodSimdHintTest.java @@ -0,0 +1,62 @@ +package com.codename1.tools.translator; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.objectweb.asm.Opcodes; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class BytecodeMethodSimdHintTest { + + @BeforeEach + void cleanParser() { + Parser.cleanup(); + } + + @Test + void failsForAnnotatedMethodsThatAreNotVectorizationCandidates() { + BytecodeMethod method = new BytecodeMethod( + "com_example_SimdCarrier", + Opcodes.ACC_PUBLIC | Opcodes.ACC_STATIC, + "scalarBody", + "()V", + null, + null + ); + method.setSimdCandidateHint(true); + method.setSimdWidthHint(16); + method.setMaxes(1, 0); + method.addInstruction(Opcodes.RETURN); + assertThrows(IllegalStateException.class, () -> method.appendMethodC(new StringBuilder()), + "SIMD-candidate methods with no array access opcodes should fail validation"); + } + + @Test + void allowsAnnotatedMethodsThatContainArrayAccessOpcodes() { + BytecodeMethod method = new BytecodeMethod( + "com_example_SimdCarrier", + Opcodes.ACC_PUBLIC | Opcodes.ACC_STATIC, + "vectorBody", + "()V", + null, + null + ); + method.setSimdCandidateHint(true); + method.setSimdWidthHint(16); + method.setMaxes(1, 0); + method.addInstruction(Opcodes.BALOAD); + method.addInstruction(Opcodes.RETURN); + StringBuilder out = new StringBuilder(); + assertDoesNotThrow(() -> method.appendMethodC(out), + "SIMD-candidate methods with array access opcodes should pass validation"); + String generated = out.toString(); + assertTrue(generated.contains("cn1_simd_com_example_SimdCarrier_vectorBody__"), + "SIMD-eligible methods should emit weak SIMD hook declarations"); + assertTrue(generated.contains("#if defined(CN1_ENABLE_SIMD_PRAGMAS) && defined(__clang__)"), + "SIMD-eligible methods should emit opt-in SIMD pragma guards"); + assertTrue(generated.contains("#pragma clang attribute push(__attribute__((target(\"neon\"))), apply_to=function)"), + "SIMD-eligible methods should include NEON targeting pragmas under the opt-in guard"); + } +}