@@ -536,9 +536,10 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
536536#define IS_WHITESPACE (c ) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
537537
538538static int
539- extract_anchors_from_expr (const char * segment_str , expr_ty expr , int * left_anchor , int * right_anchor )
539+ extract_anchors_from_expr (const char * segment_str , expr_ty expr , Py_ssize_t * left_anchor , Py_ssize_t * right_anchor ,
540+ char * * primary_error_char , char * * secondary_error_char )
540541{
541- switch (expr -> kind ) {
542+ switch (expr -> kind ) {
542543 case BinOp_kind : {
543544 expr_ty left = expr -> v .BinOp .left ;
544545 expr_ty right = expr -> v .BinOp .right ;
@@ -554,13 +555,21 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho
554555 if (i + 1 < right -> col_offset && !IS_WHITESPACE (segment_str [i + 1 ])) {
555556 ++ * right_anchor ;
556557 }
558+
559+ // Set the error characters
560+ * primary_error_char = "~" ;
561+ * secondary_error_char = "^" ;
557562 break ;
558563 }
559564 return 1 ;
560565 }
561566 case Subscript_kind : {
562567 * left_anchor = expr -> v .Subscript .value -> end_col_offset ;
563568 * right_anchor = expr -> v .Subscript .slice -> end_col_offset + 1 ;
569+
570+ // Set the error characters
571+ * primary_error_char = "~" ;
572+ * secondary_error_char = "^" ;
564573 return 1 ;
565574 }
566575 default :
@@ -569,11 +578,13 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho
569578}
570579
571580static int
572- extract_anchors_from_stmt (const char * segment_str , stmt_ty statement , int * left_anchor , int * right_anchor )
581+ extract_anchors_from_stmt (const char * segment_str , stmt_ty statement , Py_ssize_t * left_anchor , Py_ssize_t * right_anchor ,
582+ char * * primary_error_char , char * * secondary_error_char )
573583{
574584 switch (statement -> kind ) {
575585 case Expr_kind : {
576- return extract_anchors_from_expr (segment_str , statement -> v .Expr .value , left_anchor , right_anchor );
586+ return extract_anchors_from_expr (segment_str , statement -> v .Expr .value , left_anchor , right_anchor ,
587+ primary_error_char , secondary_error_char );
577588 }
578589 default :
579590 return 0 ;
@@ -583,7 +594,8 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_
583594static int
584595extract_anchors_from_line (PyObject * filename , PyObject * line ,
585596 Py_ssize_t start_offset , Py_ssize_t end_offset ,
586- int * left_anchor , int * right_anchor )
597+ Py_ssize_t * left_anchor , Py_ssize_t * right_anchor ,
598+ char * * primary_error_char , char * * secondary_error_char )
587599{
588600 int res = -1 ;
589601 PyArena * arena = NULL ;
@@ -620,12 +632,17 @@ extract_anchors_from_line(PyObject *filename, PyObject *line,
620632 assert (module -> kind == Module_kind );
621633 if (asdl_seq_LEN (module -> v .Module .body ) == 1 ) {
622634 stmt_ty statement = asdl_seq_GET (module -> v .Module .body , 0 );
623- res = extract_anchors_from_stmt (segment_str , statement , left_anchor , right_anchor );
635+ res = extract_anchors_from_stmt (segment_str , statement , left_anchor , right_anchor ,
636+ primary_error_char , secondary_error_char );
624637 } else {
625638 res = 0 ;
626639 }
627640
628641done :
642+ if (res > 0 ) {
643+ * left_anchor += start_offset ;
644+ * right_anchor += start_offset ;
645+ }
629646 Py_XDECREF (segment );
630647 if (arena ) {
631648 _PyArena_Free (arena );
@@ -646,6 +663,25 @@ ignore_source_errors(void) {
646663 return 0 ;
647664}
648665
666+ static inline int
667+ print_error_location_carets (PyObject * f , int offset , Py_ssize_t start_offset , Py_ssize_t end_offset ,
668+ Py_ssize_t right_start_offset , Py_ssize_t left_end_offset ,
669+ const char * primary , const char * secondary ) {
670+ int err = 0 ;
671+ int special_chars = (left_end_offset != -1 || right_start_offset != -1 );
672+ while (++ offset <= end_offset ) {
673+ if (offset <= start_offset || offset > end_offset ) {
674+ err = PyFile_WriteString (" " , f );
675+ } else if (special_chars && left_end_offset < offset && offset <= right_start_offset ) {
676+ err = PyFile_WriteString (secondary , f );
677+ } else {
678+ err = PyFile_WriteString (primary , f );
679+ }
680+ }
681+ err = PyFile_WriteString ("\n" , f );
682+ return err ;
683+ }
684+
649685static int
650686tb_displayline (PyTracebackObject * tb , PyObject * f , PyObject * filename , int lineno ,
651687 PyFrameObject * frame , PyObject * name )
@@ -665,76 +701,71 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
665701 return err ;
666702 int truncation = _TRACEBACK_SOURCE_LINE_INDENT ;
667703 PyObject * source_line = NULL ;
668- /* ignore errors since we can't report them, can we? */
669- if (!_Py_DisplaySourceLine (f , filename , lineno , _TRACEBACK_SOURCE_LINE_INDENT ,
670- & truncation , & source_line )) {
671- int code_offset = tb -> tb_lasti ;
672- PyCodeObject * code = _PyFrame_GetCode (frame );
673-
674- int start_line ;
675- int end_line ;
676- int start_col_byte_offset ;
677- int end_col_byte_offset ;
678- if (!PyCode_Addr2Location (code , code_offset , & start_line , & start_col_byte_offset ,
679- & end_line , & end_col_byte_offset )) {
680- goto done ;
681- }
682- if (start_line != end_line ) {
683- goto done ;
684- }
685704
686- if (start_col_byte_offset < 0 || end_col_byte_offset < 0 ) {
687- goto done ;
688- }
689-
690- // Convert the utf-8 byte offset to the actual character offset so we
691- // print the right number of carets.
692- Py_ssize_t start_offset = (Py_ssize_t )start_col_byte_offset ;
693- Py_ssize_t end_offset = (Py_ssize_t )end_col_byte_offset ;
694-
695- if (source_line ) {
696- start_offset = _PyPegen_byte_offset_to_character_offset (source_line , start_col_byte_offset );
697- end_offset = _PyPegen_byte_offset_to_character_offset (source_line , end_col_byte_offset );
698- }
705+ if (_Py_DisplaySourceLine (f , filename , lineno , _TRACEBACK_SOURCE_LINE_INDENT ,
706+ & truncation , & source_line ) != 0 ) {
707+ /* ignore errors since we can't report them, can we? */
708+ err = ignore_source_errors ();
709+ goto done ;
710+ }
699711
700- const char * primary , * secondary ;
701- primary = secondary = "^" ;
712+ int code_offset = tb -> tb_lasti ;
713+ PyCodeObject * code = _PyFrame_GetCode ( frame ) ;
702714
703- int left_end_offset = Py_SAFE_DOWNCAST (end_offset , Py_ssize_t , int ) - Py_SAFE_DOWNCAST (start_offset , Py_ssize_t , int );
704- int right_start_offset = left_end_offset ;
715+ int start_line ;
716+ int end_line ;
717+ int start_col_byte_offset ;
718+ int end_col_byte_offset ;
719+ if (!PyCode_Addr2Location (code , code_offset , & start_line , & start_col_byte_offset ,
720+ & end_line , & end_col_byte_offset )) {
721+ goto done ;
722+ }
723+ if (start_line != end_line ) {
724+ goto done ;
725+ }
705726
706- if (source_line ) {
707- int res = extract_anchors_from_line (filename , source_line , start_offset , end_offset ,
708- & left_end_offset , & right_start_offset );
709- if (res < 0 ) {
710- err = ignore_source_errors ();
711- if (err < 0 ) {
712- goto done ;
713- }
714- } else if (res > 0 ) {
715- primary = "^" ;
716- secondary = "~" ;
717- }
718- }
727+ if (start_col_byte_offset < 0 || end_col_byte_offset < 0 ) {
728+ goto done ;
729+ }
719730
720- char offset = truncation ;
721- while (++ offset <= end_offset ) {
722- if (offset <= start_offset ) {
723- err = PyFile_WriteString (" " , f );
724- } else if (offset <= left_end_offset + start_offset ) {
725- err = PyFile_WriteString (secondary , f );
726- } else if (offset <= right_start_offset + start_offset ) {
727- err = PyFile_WriteString (primary , f );
728- } else {
729- err = PyFile_WriteString (secondary , f );
730- }
731+ // When displaying errors, we will use the following generic structure:
732+ //
733+ // ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
734+ // ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
735+ // | |-> left_end_offset | |-> left_offset
736+ // |-> start_offset |-> right_start_offset
737+ //
738+ // In general we will only have (start_offset, end_offset) but we can gather more information
739+ // by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds
740+ // we could get *left_end_offset* and *right_start_offset* and some selection of characters for
741+ // the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the
742+ // AST information or we cannot identify special ranges within it, then left_end_offset and
743+ // right_end_offset will be set to -1.
744+
745+ // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
746+ Py_ssize_t start_offset = (Py_ssize_t )start_col_byte_offset ;
747+ Py_ssize_t end_offset = (Py_ssize_t )end_col_byte_offset ;
748+ Py_ssize_t left_end_offset = -1 ;
749+ Py_ssize_t right_start_offset = -1 ;
750+
751+ char * primary_error_char = "^" ;
752+ char * secondary_error_char = primary_error_char ;
753+
754+ if (source_line ) {
755+ start_offset = _PyPegen_byte_offset_to_character_offset (source_line , start_col_byte_offset );
756+ end_offset = _PyPegen_byte_offset_to_character_offset (source_line , end_col_byte_offset );
757+ int res = extract_anchors_from_line (filename , source_line , start_offset , end_offset ,
758+ & left_end_offset , & right_start_offset ,
759+ & primary_error_char , & secondary_error_char );
760+ if (res < 0 && ignore_source_errors () < 0 ) {
761+ goto done ;
731762 }
732- err = PyFile_WriteString ("\n" , f );
733763 }
734- else {
735- err = ignore_source_errors ();
736- }
737-
764+
765+ err = print_error_location_carets (f , truncation , start_offset , end_offset ,
766+ right_start_offset , left_end_offset ,
767+ primary_error_char , secondary_error_char );
768+
738769done :
739770 Py_XDECREF (source_line );
740771 return err ;
0 commit comments