4040 */
4141package com .oracle .graal .python .builtins .modules ;
4242
43+ import static com .oracle .graal .python .nodes .BuiltinNames .T__SRE ;
4344import static com .oracle .graal .python .nodes .ErrorMessages .BAD_CHAR_IN_GROUP_NAME ;
4445import static com .oracle .graal .python .nodes .ErrorMessages .BAD_ESCAPE_END_OF_STRING ;
4546import static com .oracle .graal .python .nodes .ErrorMessages .INVALID_GROUP_REFERENCE ;
46- import static com .oracle .graal .python .nodes .ErrorMessages .MISSING_S ;
47+ import static com .oracle .graal .python .nodes .ErrorMessages .MISSING_GROUP_NAME ;
48+ import static com .oracle .graal .python .nodes .ErrorMessages .MISSING_LEFT_ANGLE_BRACKET ;
4749import static com .oracle .graal .python .nodes .ErrorMessages .UNKNOWN_GROUP_NAME ;
4850import static com .oracle .graal .python .runtime .exception .PythonErrorType .TypeError ;
4951import static com .oracle .graal .python .runtime .exception .PythonErrorType .ValueError ;
6163import com .oracle .graal .python .annotations .Builtin ;
6264import com .oracle .graal .python .builtins .CoreFunctions ;
6365import com .oracle .graal .python .builtins .Python3Core ;
64- import com .oracle .graal .python .builtins .PythonBuiltinClassType ;
6566import com .oracle .graal .python .builtins .PythonBuiltins ;
6667import com .oracle .graal .python .builtins .objects .PNone ;
6768import com .oracle .graal .python .builtins .objects .array .PArray ;
6869import com .oracle .graal .python .builtins .objects .buffer .PythonBufferAccessLibrary ;
6970import com .oracle .graal .python .builtins .objects .buffer .PythonBufferAcquireLibrary ;
7071import com .oracle .graal .python .builtins .objects .bytes .BytesNodes ;
7172import com .oracle .graal .python .builtins .objects .cext .common .NativePointer ;
72- import com .oracle .graal .python .builtins .objects .exception .PBaseException ;
7373import com .oracle .graal .python .builtins .objects .memoryview .PMemoryView ;
7474import com .oracle .graal .python .builtins .objects .mmap .PMMap ;
7575import com .oracle .graal .python .builtins .objects .module .PythonModule ;
8080import com .oracle .graal .python .lib .PyObjectLookupAttr ;
8181import com .oracle .graal .python .lib .PyObjectSizeNode ;
8282import com .oracle .graal .python .lib .PyUnicodeCheckNode ;
83- import com .oracle .graal .python .nodes .BuiltinNames ;
8483import com .oracle .graal .python .nodes .ErrorMessages ;
8584import com .oracle .graal .python .nodes .HiddenAttr ;
8685import com .oracle .graal .python .nodes .PNodeWithContext ;
9998import com .oracle .graal .python .runtime .PythonOptions ;
10099import com .oracle .graal .python .runtime .exception .PException ;
101100import com .oracle .graal .python .runtime .exception .PythonErrorType ;
101+ import com .oracle .graal .python .runtime .formatting .ErrorMessageFormatter ;
102102import com .oracle .truffle .api .CompilerDirectives ;
103103import com .oracle .truffle .api .CompilerDirectives .TruffleBoundary ;
104104import com .oracle .truffle .api .dsl .Bind ;
107107import com .oracle .truffle .api .dsl .GenerateCached ;
108108import com .oracle .truffle .api .dsl .GenerateInline ;
109109import com .oracle .truffle .api .dsl .GenerateNodeFactory ;
110+ import com .oracle .truffle .api .dsl .GenerateUncached ;
110111import com .oracle .truffle .api .dsl .Idempotent ;
111112import com .oracle .truffle .api .dsl .ImportStatic ;
112113import com .oracle .truffle .api .dsl .NeverDefault ;
133134@ CoreFunctions (defineModule = "_sre" )
134135public final class SREModuleBuiltins extends PythonBuiltins {
135136
137+ private static final TruffleString T_ERROR = tsLiteral ("error" );
138+
136139 @ Override
137140 protected List <? extends NodeFactory <? extends PythonBuiltinBaseNode >> getNodeFactories () {
138141 return SREModuleBuiltinsFactory .getFactories ();
@@ -189,7 +192,6 @@ public static final class TRegexCache {
189192
190193 private static final String ENCODING_UTF_32 = "Encoding=UTF-32" ;
191194 private static final String ENCODING_LATIN_1 = "Encoding=LATIN-1" ;
192- private static final TruffleString T_ERROR = tsLiteral ("error" );
193195 private static final TruffleString T_VALUE_ERROR_UNICODE_FLAG_BYTES_PATTERN = tsLiteral ("cannot use UNICODE flag with a bytes pattern" );
194196 private static final TruffleString T_VALUE_ERROR_LOCALE_FLAG_STR_PATTERN = tsLiteral ("cannot use LOCALE flag with a str pattern" );
195197 private static final TruffleString T_VALUE_ERROR_ASCII_UNICODE_INCOMPATIBLE = tsLiteral ("ASCII and UNICODE flags are incompatible" );
@@ -406,7 +408,7 @@ public Object compile(Node node, PythonContext context, PythonMethod method, boo
406408 regexp = compiledRegex ;
407409 }
408410 } catch (RuntimeException e ) {
409- throw handleCompilationError (node , e , lib , context );
411+ throw handleCompilationError (node , e , lib );
410412 }
411413 if (isLocaleSensitive ()) {
412414 setLocaleSensitiveRegexp (method , mustAdvance , locale , regexp );
@@ -418,7 +420,7 @@ public Object compile(Node node, PythonContext context, PythonMethod method, boo
418420
419421 // No BoundaryCallContext: lookups attribute on a builtin module; constructs builtin
420422 // exceptions
421- private RuntimeException handleCompilationError (Node node , RuntimeException e , InteropLibrary lib , PythonContext context ) {
423+ private RuntimeException handleCompilationError (Node node , RuntimeException e , InteropLibrary lib ) {
422424 try {
423425 if (lib .isException (e )) {
424426 if (lib .getExceptionType (e ) == ExceptionType .PARSE_ERROR ) {
@@ -431,10 +433,7 @@ private RuntimeException handleCompilationError(Node node, RuntimeException e, I
431433 } else {
432434 SourceSection sourceSection = lib .getSourceLocation (e );
433435 int position = sourceSection .getCharIndex ();
434- PythonModule module = context .lookupBuiltinModule (BuiltinNames .T__SRE );
435- Object errorConstructor = PyObjectLookupAttr .executeUncached (module , T_ERROR );
436- PBaseException exception = (PBaseException ) CallNode .executeUncached (errorConstructor , reason , originalPattern , position );
437- return PRaiseNode .raiseExceptionObjectStatic (node , exception );
436+ throw RaiseRegexErrorNode .executeWithPatternAndPositionUncached (reason , originalPattern , position );
438437 }
439438 }
440439 }
@@ -549,7 +548,7 @@ Object localeSensitive(VirtualFrame frame, PythonObject pattern, PythonMethod me
549548 @ TruffleBoundary
550549 @ NeverDefault
551550 protected Object lookupGetLocaleFunction () {
552- PythonModule module = getContext ().lookupBuiltinModule (BuiltinNames . T__SRE );
551+ PythonModule module = getContext ().lookupBuiltinModule (T__SRE );
553552 return PyObjectLookupAttr .executeUncached (module , T__GETLOCALE );
554553 }
555554
@@ -634,14 +633,14 @@ static Object doSingleContext(
634633 static Object doRead (
635634 @ Bind PythonContext context ,
636635 @ Cached ReadAttributeFromModuleNode read ) {
637- PythonModule module = context .lookupBuiltinModule (BuiltinNames . T__SRE );
636+ PythonModule module = context .lookupBuiltinModule (T__SRE );
638637 return read .execute (module , T_MATCH_CONSTRUCTOR );
639638 }
640639
641640 @ TruffleBoundary
642641 @ NeverDefault
643642 protected static Object lookupMatchConstructor () {
644- PythonModule module = PythonContext .get (null ).lookupBuiltinModule (BuiltinNames . T__SRE );
643+ PythonModule module = PythonContext .get (null ).lookupBuiltinModule (T__SRE );
645644 return PyObjectLookupAttr .executeUncached (module , T_MATCH_CONSTRUCTOR );
646645 }
647646 }
@@ -903,7 +902,7 @@ static Object doCached(VirtualFrame frame, TRegexCache pattern, TruffleString re
903902 }
904903 }
905904
906- private static abstract sealed class ReplacementToken {
905+ private abstract static sealed class ReplacementToken {
907906 }
908907
909908 private static final class Codepoint extends ReplacementToken {
@@ -930,7 +929,7 @@ private GroupReference(int groupNumber) {
930929 }
931930 }
932931
933- private static abstract sealed class ReplacementConsumer {
932+ private abstract static sealed class ReplacementConsumer {
934933 abstract void codepoint (int codepoint );
935934
936935 abstract void literal (TruffleString replacement , int fromIndex , int toIndex );
@@ -957,11 +956,44 @@ void groupReference(int groupNumber) {
957956 }
958957 }
959958
959+ @ GenerateInline (false ) // Only for errors
960+ @ GenerateUncached
961+ abstract static class RaiseRegexErrorNode extends Node {
962+ public final PException execute (VirtualFrame frame , TruffleString message ) {
963+ return executeWithPatternAndPosition (frame , message , PNone .NONE , PNone .NONE );
964+ }
965+
966+ public final PException executeFormatted (VirtualFrame frame , TruffleString message , Object ... formatArgs ) {
967+ return execute (frame , doFormat (message , formatArgs ));
968+ }
969+
970+ @ TruffleBoundary
971+ private static TruffleString doFormat (TruffleString message , Object [] formatArgs ) {
972+ return TruffleString .fromJavaStringUncached (ErrorMessageFormatter .format (message , formatArgs ), TS_ENCODING );
973+ }
974+
975+ public abstract PException executeWithPatternAndPosition (VirtualFrame frame , TruffleString message , Object pattern , Object position );
976+
977+ public static PException executeWithPatternAndPositionUncached (TruffleString message , Object pattern , Object position ) {
978+ return SREModuleBuiltinsFactory .RaiseRegexErrorNodeGen .getUncached ().executeWithPatternAndPosition (null , message , pattern , position );
979+ }
980+
981+ @ Specialization
982+ static PException createAndRaise (VirtualFrame frame , TruffleString message , Object pattern , Object position ,
983+ @ Bind Node inliningTarget ,
984+ @ Bind PythonContext context ,
985+ @ Cached ReadAttributeFromModuleNode readAttribute ,
986+ @ Cached CallNode callNode ) {
987+ PythonModule module = context .lookupBuiltinModule (T__SRE );
988+ Object errorType = readAttribute .execute (module , T_ERROR );
989+ Object exception = callNode .execute (frame , errorType , message , pattern , position );
990+ throw PRaiseNode .raiseExceptionObjectStatic (inliningTarget , exception );
991+ }
992+ }
993+
960994 private static final int CODEPOINT_LENGTH_ASCII = 4 ;
961- // TODO
962- private static final PythonBuiltinClassType PATTERN_ERROR = PythonErrorType .Exception ;
963995
964- private static void parseReplacement (Object tregexCompiledRegex , TruffleString replacement , ReplacementConsumer consumer ,
996+ private static void parseReplacement (VirtualFrame frame , Object tregexCompiledRegex , TruffleString replacement , ReplacementConsumer consumer ,
965997 Node inliningTarget ,
966998 TruffleString .ByteIndexOfCodePointNode indexOfNode ,
967999 TruffleString .CodePointAtByteIndexNode codePointAtByteIndexNode ,
@@ -970,7 +1002,8 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
9701002 TRegexUtil .InteropReadMemberNode readGroupCountNode ,
9711003 TRegexUtil .InteropReadMemberNode readNamedGroupsNode ,
9721004 InteropLibrary genericInteropLib ,
973- PRaiseNode raiseNode ) {
1005+ PRaiseNode raiseNode ,
1006+ RaiseRegexErrorNode raiseRegexErrorNode ) {
9741007 int length = replacement .byteLength (TS_ENCODING );
9751008 if (length == 0 ) {
9761009 return ;
@@ -985,26 +1018,26 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
9851018 }
9861019 int nextCPPos = backslashPos + CODEPOINT_LENGTH_ASCII ;
9871020 if (nextCPPos >= length ) {
988- throw raiseNode . raise ( inliningTarget , PATTERN_ERROR , BAD_ESCAPE_END_OF_STRING );
1021+ throw raiseRegexErrorNode . execute ( frame , BAD_ESCAPE_END_OF_STRING );
9891022 }
9901023 int firstCodepoint = codePointAtByteIndexNode .execute (replacement , nextCPPos , TS_ENCODING );
9911024 nextCPPos += CODEPOINT_LENGTH_ASCII ;
9921025 int secondCodepoint = nextCPPos < length ? codePointAtByteIndexNode .execute (replacement , nextCPPos , TS_ENCODING ) : -1 ;
9931026 nextCPPos += CODEPOINT_LENGTH_ASCII ;
9941027 if (firstCodepoint == 'g' ) {
9951028 if (secondCodepoint != '<' ) {
996- throw raiseNode . raise ( inliningTarget , PATTERN_ERROR , MISSING_S , "<" );
1029+ throw raiseRegexErrorNode . execute ( frame , MISSING_LEFT_ANGLE_BRACKET );
9971030 }
9981031 int nameStartPos = nextCPPos ;
9991032 int nameEndPos ;
10001033 if (nameStartPos >= length || (nameEndPos = indexOfNode .execute (replacement , '>' , nameStartPos , length , TS_ENCODING )) < 0 || nameEndPos == nameStartPos ) {
1001- throw raiseNode . raise ( inliningTarget , PATTERN_ERROR , MISSING_S , "group name" );
1034+ throw raiseRegexErrorNode . execute ( frame , MISSING_GROUP_NAME );
10021035 }
10031036 int nameLength = nameEndPos - nameStartPos ;
10041037 assert nameLength > 0 ;
10051038 TruffleString name = substringByteIndexNode .execute (replacement , nameStartPos , nameLength , TS_ENCODING , true );
10061039 if (getCodeRangeNode .execute (name , TS_ENCODING ) != TruffleString .CodeRange .ASCII ) {
1007- throw raiseNode . raise ( inliningTarget , PATTERN_ERROR , BAD_CHAR_IN_GROUP_NAME , name );
1040+ throw raiseRegexErrorNode . executeFormatted ( frame , BAD_CHAR_IN_GROUP_NAME , name );
10081041 }
10091042 int groupNumber = 0 ;
10101043 for (int i = 0 ; i < nameLength ; i += CODEPOINT_LENGTH_ASCII ) {
@@ -1016,7 +1049,7 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
10161049 break ;
10171050 }
10181051 if (groupNumber >= numberOfCaptureGroups ) {
1019- throw raiseNode . raise ( inliningTarget , PATTERN_ERROR , INVALID_GROUP_REFERENCE , groupNumber );
1052+ throw raiseRegexErrorNode . executeFormatted ( frame , INVALID_GROUP_REFERENCE , groupNumber );
10201053 }
10211054 }
10221055 if (groupNumber < 0 ) {
@@ -1048,7 +1081,7 @@ private static void parseReplacement(Object tregexCompiledRegex, TruffleString r
10481081 consumer .codepoint (octalEscape );
10491082 lastPos = nextCPPos ;
10501083 } else if (isDecimalDigit (firstCodepoint )) {
1051-
1084+ // TODO
10521085 }
10531086 }
10541087 }
0 commit comments