Skip to content

Commit 93e3b2e

Browse files
msimacekdjoooooe
authored andcommitted
Add node for raising regex errors
1 parent 283fd66 commit 93e3b2e

File tree

2 files changed

+61
-26
lines changed

2 files changed

+61
-26
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SREModuleBuiltins.java

Lines changed: 59 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,12 @@
4040
*/
4141
package com.oracle.graal.python.builtins.modules;
4242

43+
import static com.oracle.graal.python.nodes.BuiltinNames.T__SRE;
4344
import static com.oracle.graal.python.nodes.ErrorMessages.BAD_CHAR_IN_GROUP_NAME;
4445
import static com.oracle.graal.python.nodes.ErrorMessages.BAD_ESCAPE_END_OF_STRING;
4546
import static com.oracle.graal.python.nodes.ErrorMessages.INVALID_GROUP_REFERENCE;
46-
import static com.oracle.graal.python.nodes.ErrorMessages.MISSING_S;
47+
import static com.oracle.graal.python.nodes.ErrorMessages.MISSING_GROUP_NAME;
48+
import static com.oracle.graal.python.nodes.ErrorMessages.MISSING_LEFT_ANGLE_BRACKET;
4749
import static com.oracle.graal.python.nodes.ErrorMessages.UNKNOWN_GROUP_NAME;
4850
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
4951
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError;
@@ -61,15 +63,13 @@
6163
import com.oracle.graal.python.annotations.Builtin;
6264
import com.oracle.graal.python.builtins.CoreFunctions;
6365
import com.oracle.graal.python.builtins.Python3Core;
64-
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
6566
import com.oracle.graal.python.builtins.PythonBuiltins;
6667
import com.oracle.graal.python.builtins.objects.PNone;
6768
import com.oracle.graal.python.builtins.objects.array.PArray;
6869
import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAccessLibrary;
6970
import com.oracle.graal.python.builtins.objects.buffer.PythonBufferAcquireLibrary;
7071
import com.oracle.graal.python.builtins.objects.bytes.BytesNodes;
7172
import com.oracle.graal.python.builtins.objects.cext.common.NativePointer;
72-
import com.oracle.graal.python.builtins.objects.exception.PBaseException;
7373
import com.oracle.graal.python.builtins.objects.memoryview.PMemoryView;
7474
import com.oracle.graal.python.builtins.objects.mmap.PMMap;
7575
import com.oracle.graal.python.builtins.objects.module.PythonModule;
@@ -80,7 +80,6 @@
8080
import com.oracle.graal.python.lib.PyObjectLookupAttr;
8181
import com.oracle.graal.python.lib.PyObjectSizeNode;
8282
import com.oracle.graal.python.lib.PyUnicodeCheckNode;
83-
import com.oracle.graal.python.nodes.BuiltinNames;
8483
import com.oracle.graal.python.nodes.ErrorMessages;
8584
import com.oracle.graal.python.nodes.HiddenAttr;
8685
import com.oracle.graal.python.nodes.PNodeWithContext;
@@ -99,6 +98,7 @@
9998
import com.oracle.graal.python.runtime.PythonOptions;
10099
import com.oracle.graal.python.runtime.exception.PException;
101100
import com.oracle.graal.python.runtime.exception.PythonErrorType;
101+
import com.oracle.graal.python.runtime.formatting.ErrorMessageFormatter;
102102
import com.oracle.truffle.api.CompilerDirectives;
103103
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
104104
import com.oracle.truffle.api.dsl.Bind;
@@ -107,6 +107,7 @@
107107
import com.oracle.truffle.api.dsl.GenerateCached;
108108
import com.oracle.truffle.api.dsl.GenerateInline;
109109
import com.oracle.truffle.api.dsl.GenerateNodeFactory;
110+
import com.oracle.truffle.api.dsl.GenerateUncached;
110111
import com.oracle.truffle.api.dsl.Idempotent;
111112
import com.oracle.truffle.api.dsl.ImportStatic;
112113
import com.oracle.truffle.api.dsl.NeverDefault;
@@ -133,6 +134,8 @@
133134
@CoreFunctions(defineModule = "_sre")
134135
public final class SREModuleBuiltins extends PythonBuiltins {
135136

137+
private static final TruffleString T_ERROR = tsLiteral("error");
138+
136139
@Override
137140
protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFactories() {
138141
return SREModuleBuiltinsFactory.getFactories();
@@ -189,7 +192,6 @@ public static final class TRegexCache {
189192

190193
private static final String ENCODING_UTF_32 = "Encoding=UTF-32";
191194
private static final String ENCODING_LATIN_1 = "Encoding=LATIN-1";
192-
private static final TruffleString T_ERROR = tsLiteral("error");
193195
private static final TruffleString T_VALUE_ERROR_UNICODE_FLAG_BYTES_PATTERN = tsLiteral("cannot use UNICODE flag with a bytes pattern");
194196
private static final TruffleString T_VALUE_ERROR_LOCALE_FLAG_STR_PATTERN = tsLiteral("cannot use LOCALE flag with a str pattern");
195197
private static final TruffleString T_VALUE_ERROR_ASCII_UNICODE_INCOMPATIBLE = tsLiteral("ASCII and UNICODE flags are incompatible");
@@ -406,7 +408,7 @@ public Object compile(Node node, PythonContext context, PythonMethod method, boo
406408
regexp = compiledRegex;
407409
}
408410
} catch (RuntimeException e) {
409-
throw handleCompilationError(node, e, lib, context);
411+
throw handleCompilationError(node, e, lib);
410412
}
411413
if (isLocaleSensitive()) {
412414
setLocaleSensitiveRegexp(method, mustAdvance, locale, regexp);
@@ -418,7 +420,7 @@ public Object compile(Node node, PythonContext context, PythonMethod method, boo
418420

419421
// No BoundaryCallContext: lookups attribute on a builtin module; constructs builtin
420422
// exceptions
421-
private RuntimeException handleCompilationError(Node node, RuntimeException e, InteropLibrary lib, PythonContext context) {
423+
private RuntimeException handleCompilationError(Node node, RuntimeException e, InteropLibrary lib) {
422424
try {
423425
if (lib.isException(e)) {
424426
if (lib.getExceptionType(e) == ExceptionType.PARSE_ERROR) {
@@ -431,10 +433,7 @@ private RuntimeException handleCompilationError(Node node, RuntimeException e, I
431433
} else {
432434
SourceSection sourceSection = lib.getSourceLocation(e);
433435
int position = sourceSection.getCharIndex();
434-
PythonModule module = context.lookupBuiltinModule(BuiltinNames.T__SRE);
435-
Object errorConstructor = PyObjectLookupAttr.executeUncached(module, T_ERROR);
436-
PBaseException exception = (PBaseException) CallNode.executeUncached(errorConstructor, reason, originalPattern, position);
437-
return PRaiseNode.raiseExceptionObjectStatic(node, exception);
436+
throw RaiseRegexErrorNode.executeWithPatternAndPositionUncached(reason, originalPattern, position);
438437
}
439438
}
440439
}
@@ -549,7 +548,7 @@ Object localeSensitive(VirtualFrame frame, PythonObject pattern, PythonMethod me
549548
@TruffleBoundary
550549
@NeverDefault
551550
protected Object lookupGetLocaleFunction() {
552-
PythonModule module = getContext().lookupBuiltinModule(BuiltinNames.T__SRE);
551+
PythonModule module = getContext().lookupBuiltinModule(T__SRE);
553552
return PyObjectLookupAttr.executeUncached(module, T__GETLOCALE);
554553
}
555554

@@ -634,14 +633,14 @@ static Object doSingleContext(
634633
static Object doRead(
635634
@Bind PythonContext context,
636635
@Cached ReadAttributeFromModuleNode read) {
637-
PythonModule module = context.lookupBuiltinModule(BuiltinNames.T__SRE);
636+
PythonModule module = context.lookupBuiltinModule(T__SRE);
638637
return read.execute(module, T_MATCH_CONSTRUCTOR);
639638
}
640639

641640
@TruffleBoundary
642641
@NeverDefault
643642
protected static Object lookupMatchConstructor() {
644-
PythonModule module = PythonContext.get(null).lookupBuiltinModule(BuiltinNames.T__SRE);
643+
PythonModule module = PythonContext.get(null).lookupBuiltinModule(T__SRE);
645644
return PyObjectLookupAttr.executeUncached(module, T_MATCH_CONSTRUCTOR);
646645
}
647646
}
@@ -903,7 +902,7 @@ static Object doCached(VirtualFrame frame, TRegexCache pattern, TruffleString re
903902
}
904903
}
905904

906-
private static abstract sealed class ReplacementToken {
905+
private abstract static sealed class ReplacementToken {
907906
}
908907

909908
private static final class Codepoint extends ReplacementToken {
@@ -930,7 +929,7 @@ private GroupReference(int groupNumber) {
930929
}
931930
}
932931

933-
private static abstract sealed class ReplacementConsumer {
932+
private abstract static sealed class ReplacementConsumer {
934933
abstract void codepoint(int codepoint);
935934

936935
abstract void literal(TruffleString replacement, int fromIndex, int toIndex);
@@ -957,11 +956,44 @@ void groupReference(int groupNumber) {
957956
}
958957
}
959958

959+
@GenerateInline(false) // Only for errors
960+
@GenerateUncached
961+
abstract static class RaiseRegexErrorNode extends Node {
962+
public final PException execute(VirtualFrame frame, TruffleString message) {
963+
return executeWithPatternAndPosition(frame, message, PNone.NONE, PNone.NONE);
964+
}
965+
966+
public final PException executeFormatted(VirtualFrame frame, TruffleString message, Object... formatArgs) {
967+
return execute(frame, doFormat(message, formatArgs));
968+
}
969+
970+
@TruffleBoundary
971+
private static TruffleString doFormat(TruffleString message, Object[] formatArgs) {
972+
return TruffleString.fromJavaStringUncached(ErrorMessageFormatter.format(message, formatArgs), TS_ENCODING);
973+
}
974+
975+
public abstract PException executeWithPatternAndPosition(VirtualFrame frame, TruffleString message, Object pattern, Object position);
976+
977+
public static PException executeWithPatternAndPositionUncached(TruffleString message, Object pattern, Object position) {
978+
return SREModuleBuiltinsFactory.RaiseRegexErrorNodeGen.getUncached().executeWithPatternAndPosition(null, message, pattern, position);
979+
}
980+
981+
@Specialization
982+
static PException createAndRaise(VirtualFrame frame, TruffleString message, Object pattern, Object position,
983+
@Bind Node inliningTarget,
984+
@Bind PythonContext context,
985+
@Cached ReadAttributeFromModuleNode readAttribute,
986+
@Cached CallNode callNode) {
987+
PythonModule module = context.lookupBuiltinModule(T__SRE);
988+
Object errorType = readAttribute.execute(module, T_ERROR);
989+
Object exception = callNode.execute(frame, errorType, message, pattern, position);
990+
throw PRaiseNode.raiseExceptionObjectStatic(inliningTarget, exception);
991+
}
992+
}
993+
960994
private static final int CODEPOINT_LENGTH_ASCII = 4;
961-
// TODO
962-
private static final PythonBuiltinClassType PATTERN_ERROR = PythonErrorType.Exception;
963995

964-
private static void parseReplacement(Object tregexCompiledRegex, TruffleString replacement, ReplacementConsumer consumer,
996+
private static void parseReplacement(VirtualFrame frame, Object tregexCompiledRegex, TruffleString replacement, ReplacementConsumer consumer,
965997
Node inliningTarget,
966998
TruffleString.ByteIndexOfCodePointNode indexOfNode,
967999
TruffleString.CodePointAtByteIndexNode codePointAtByteIndexNode,
@@ -970,7 +1002,8 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
9701002
TRegexUtil.InteropReadMemberNode readGroupCountNode,
9711003
TRegexUtil.InteropReadMemberNode readNamedGroupsNode,
9721004
InteropLibrary genericInteropLib,
973-
PRaiseNode raiseNode) {
1005+
PRaiseNode raiseNode,
1006+
RaiseRegexErrorNode raiseRegexErrorNode) {
9741007
int length = replacement.byteLength(TS_ENCODING);
9751008
if (length == 0) {
9761009
return;
@@ -985,26 +1018,26 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
9851018
}
9861019
int nextCPPos = backslashPos + CODEPOINT_LENGTH_ASCII;
9871020
if (nextCPPos >= length) {
988-
throw raiseNode.raise(inliningTarget, PATTERN_ERROR, BAD_ESCAPE_END_OF_STRING);
1021+
throw raiseRegexErrorNode.execute(frame, BAD_ESCAPE_END_OF_STRING);
9891022
}
9901023
int firstCodepoint = codePointAtByteIndexNode.execute(replacement, nextCPPos, TS_ENCODING);
9911024
nextCPPos += CODEPOINT_LENGTH_ASCII;
9921025
int secondCodepoint = nextCPPos < length ? codePointAtByteIndexNode.execute(replacement, nextCPPos, TS_ENCODING) : -1;
9931026
nextCPPos += CODEPOINT_LENGTH_ASCII;
9941027
if (firstCodepoint == 'g') {
9951028
if (secondCodepoint != '<') {
996-
throw raiseNode.raise(inliningTarget, PATTERN_ERROR, MISSING_S, "<");
1029+
throw raiseRegexErrorNode.execute(frame, MISSING_LEFT_ANGLE_BRACKET);
9971030
}
9981031
int nameStartPos = nextCPPos;
9991032
int nameEndPos;
10001033
if (nameStartPos >= length || (nameEndPos = indexOfNode.execute(replacement, '>', nameStartPos, length, TS_ENCODING)) < 0 || nameEndPos == nameStartPos) {
1001-
throw raiseNode.raise(inliningTarget, PATTERN_ERROR, MISSING_S, "group name");
1034+
throw raiseRegexErrorNode.execute(frame, MISSING_GROUP_NAME);
10021035
}
10031036
int nameLength = nameEndPos - nameStartPos;
10041037
assert nameLength > 0;
10051038
TruffleString name = substringByteIndexNode.execute(replacement, nameStartPos, nameLength, TS_ENCODING, true);
10061039
if (getCodeRangeNode.execute(name, TS_ENCODING) != TruffleString.CodeRange.ASCII) {
1007-
throw raiseNode.raise(inliningTarget, PATTERN_ERROR, BAD_CHAR_IN_GROUP_NAME, name);
1040+
throw raiseRegexErrorNode.executeFormatted(frame, BAD_CHAR_IN_GROUP_NAME, name);
10081041
}
10091042
int groupNumber = 0;
10101043
for (int i = 0; i < nameLength; i += CODEPOINT_LENGTH_ASCII) {
@@ -1016,7 +1049,7 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
10161049
break;
10171050
}
10181051
if (groupNumber >= numberOfCaptureGroups) {
1019-
throw raiseNode.raise(inliningTarget, PATTERN_ERROR, INVALID_GROUP_REFERENCE, groupNumber);
1052+
throw raiseRegexErrorNode.executeFormatted(frame, INVALID_GROUP_REFERENCE, groupNumber);
10201053
}
10211054
}
10221055
if (groupNumber < 0) {
@@ -1048,7 +1081,7 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
10481081
consumer.codepoint(octalEscape);
10491082
lastPos = nextCPPos;
10501083
} else if (isDecimalDigit(firstCodepoint)) {
1051-
1084+
// TODO
10521085
}
10531086
}
10541087
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/ErrorMessages.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,8 @@ public abstract class ErrorMessages {
497497
public static final TruffleString MISSING_D_REQUIRED_S_ARGUMENT_S_POS = tsLiteral("%s() missing required argument '%s' (pos %d)");
498498
public static final TruffleString MISSING_D_REQUIRED_S_ARGUMENT_S_S = tsLiteral("%s() missing %d required %s argument%s: '%s'");
499499
public static final TruffleString MISSING_S = tsLiteral("Missing %s");
500+
public static final TruffleString MISSING_GROUP_NAME = tsLiteral("Missing group name");
501+
public static final TruffleString MISSING_LEFT_ANGLE_BRACKET = tsLiteral("Missing <");
500502
public static final TruffleString MMAP_INDEX_OUT_OF_RANGE = tsLiteral("mmap index out of range");
501503
public static final TruffleString MODULE_FILENAME_MISSING = tsLiteral("module filename missing");
502504
public static final TruffleString MODULE_HAS_NO_ATTR_S = tsLiteral("module has no attribute '%s'");

0 commit comments

Comments
 (0)