@@ -883,31 +883,137 @@ def test_named_unicode_escapes(self):
883883 self .checkPatternError (br'\N{LESS-THAN SIGN}' , r'bad escape \N' , 0 )
884884 self .checkPatternError (br'[\N{LESS-THAN SIGN}]' , r'bad escape \N' , 1 )
885885
886- def test_string_boundaries (self ):
886+ def test_word_boundaries (self ):
887887 # See http://bugs.python.org/issue10713
888- self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ),
889- "abc" )
888+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ), "abc" )
889+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" , re .ASCII ).group (1 ), "abc" )
890+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" ).group (1 ), b"abc" )
891+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" , re .LOCALE ).group (1 ), b"abc" )
892+ self .assertEqual (re .search (r"\b(ьюя)\b" , "ьюя" ).group (1 ), "ьюя" )
893+ self .assertIsNone (re .search (r"\b(ьюя)\b" , "ьюя" , re .ASCII ))
894+ # There's a word boundary between a word and a non-word.
895+ self .assertTrue (re .match (r".\b" , "a=" ))
896+ self .assertTrue (re .match (r".\b" , "a=" , re .ASCII ))
897+ self .assertTrue (re .match (br".\b" , b"a=" ))
898+ self .assertTrue (re .match (br".\b" , b"a=" , re .LOCALE ))
899+ self .assertTrue (re .match (r".\b" , "я=" ))
900+ self .assertIsNone (re .match (r".\b" , "я=" , re .ASCII ))
901+ # There's a word boundary between a non-word and a word.
902+ self .assertTrue (re .match (r".\b" , "=a" ))
903+ self .assertTrue (re .match (r".\b" , "=a" , re .ASCII ))
904+ self .assertTrue (re .match (br".\b" , b"=a" ))
905+ self .assertTrue (re .match (br".\b" , b"=a" , re .LOCALE ))
906+ self .assertTrue (re .match (r".\b" , "=я" ))
907+ self .assertIsNone (re .match (r".\b" , "=я" , re .ASCII ))
908+ # There is no word boundary inside a word.
909+ self .assertIsNone (re .match (r".\b" , "ab" ))
910+ self .assertIsNone (re .match (r".\b" , "ab" , re .ASCII ))
911+ self .assertIsNone (re .match (br".\b" , b"ab" ))
912+ self .assertIsNone (re .match (br".\b" , b"ab" , re .LOCALE ))
913+ self .assertIsNone (re .match (r".\b" , "юя" ))
914+ self .assertIsNone (re .match (r".\b" , "юя" , re .ASCII ))
915+ # There is no word boundary between a non-word characters.
916+ self .assertIsNone (re .match (r".\b" , "=-" ))
917+ self .assertIsNone (re .match (r".\b" , "=-" , re .ASCII ))
918+ self .assertIsNone (re .match (br".\b" , b"=-" ))
919+ self .assertIsNone (re .match (br".\b" , b"=-" , re .LOCALE ))
920+ # There is no non-boundary match between a word and a non-word.
921+ self .assertIsNone (re .match (r".\B" , "a=" ))
922+ self .assertIsNone (re .match (r".\B" , "a=" , re .ASCII ))
923+ self .assertIsNone (re .match (br".\B" , b"a=" ))
924+ self .assertIsNone (re .match (br".\B" , b"a=" , re .LOCALE ))
925+ self .assertIsNone (re .match (r".\B" , "я=" ))
926+ self .assertTrue (re .match (r".\B" , "я=" , re .ASCII ))
927+ # There is no non-boundary match between a non-word and a word.
928+ self .assertIsNone (re .match (r".\B" , "=a" ))
929+ self .assertIsNone (re .match (r".\B" , "=a" , re .ASCII ))
930+ self .assertIsNone (re .match (br".\B" , b"=a" ))
931+ self .assertIsNone (re .match (br".\B" , b"=a" , re .LOCALE ))
932+ self .assertIsNone (re .match (r".\B" , "=я" ))
933+ self .assertTrue (re .match (r".\B" , "=я" , re .ASCII ))
934+ # There's a non-boundary match inside a word.
935+ self .assertTrue (re .match (r".\B" , "ab" ))
936+ self .assertTrue (re .match (r".\B" , "ab" , re .ASCII ))
937+ self .assertTrue (re .match (br".\B" , b"ab" ))
938+ self .assertTrue (re .match (br".\B" , b"ab" , re .LOCALE ))
939+ self .assertTrue (re .match (r".\B" , "юя" ))
940+ self .assertTrue (re .match (r".\B" , "юя" , re .ASCII ))
941+ # There's a non-boundary match between a non-word characters.
942+ self .assertTrue (re .match (r".\B" , "=-" ))
943+ self .assertTrue (re .match (r".\B" , "=-" , re .ASCII ))
944+ self .assertTrue (re .match (br".\B" , b"=-" ))
945+ self .assertTrue (re .match (br".\B" , b"=-" , re .LOCALE ))
890946 # There's a word boundary at the start of a string.
891947 self .assertTrue (re .match (r"\b" , "abc" ))
948+ self .assertTrue (re .match (r"\b" , "abc" , re .ASCII ))
949+ self .assertTrue (re .match (br"\b" , b"abc" ))
950+ self .assertTrue (re .match (br"\b" , b"abc" , re .LOCALE ))
951+ self .assertTrue (re .match (r"\b" , "ьюя" ))
952+ self .assertIsNone (re .match (r"\b" , "ьюя" , re .ASCII ))
953+ # There's a word boundary at the end of a string.
954+ self .assertTrue (re .fullmatch (r".+\b" , "abc" ))
955+ self .assertTrue (re .fullmatch (r".+\b" , "abc" , re .ASCII ))
956+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" ))
957+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" , re .LOCALE ))
958+ self .assertTrue (re .fullmatch (r".+\b" , "ьюя" ))
959+ self .assertIsNone (re .search (r"\b" , "ьюя" , re .ASCII ))
892960 # A non-empty string includes a non-boundary zero-length match.
893- self .assertTrue (re .search (r"\B" , "abc" ))
961+ self .assertEqual (re .search (r"\B" , "abc" ).span (), (1 , 1 ))
962+ self .assertEqual (re .search (r"\B" , "abc" , re .ASCII ).span (), (1 , 1 ))
963+ self .assertEqual (re .search (br"\B" , b"abc" ).span (), (1 , 1 ))
964+ self .assertEqual (re .search (br"\B" , b"abc" , re .LOCALE ).span (), (1 , 1 ))
965+ self .assertEqual (re .search (r"\B" , "ьюя" ).span (), (1 , 1 ))
966+ self .assertEqual (re .search (r"\B" , "ьюя" , re .ASCII ).span (), (0 , 0 ))
894967 # There is no non-boundary match at the start of a string.
895- self .assertFalse (re .match (r"\B" , "abc" ))
968+ self .assertIsNone (re .match (r"\B" , "abc" ))
969+ self .assertIsNone (re .match (r"\B" , "abc" , re .ASCII ))
970+ self .assertIsNone (re .match (br"\B" , b"abc" ))
971+ self .assertIsNone (re .match (br"\B" , b"abc" , re .LOCALE ))
972+ self .assertIsNone (re .match (r"\B" , "ьюя" ))
973+ self .assertTrue (re .match (r"\B" , "ьюя" , re .ASCII ))
974+ # There is no non-boundary match at the end of a string.
975+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" ))
976+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" , re .ASCII ))
977+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" ))
978+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" , re .LOCALE ))
979+ self .assertIsNone (re .fullmatch (r".+\B" , "ьюя" ))
980+ self .assertTrue (re .fullmatch (r".+\B" , "ьюя" , re .ASCII ))
896981 # However, an empty string contains no word boundaries, and also no
897982 # non-boundaries.
898- self .assertIsNone (re .search (r"\B" , "" ))
983+ self .assertIsNone (re .search (r"\b" , "" ))
984+ self .assertIsNone (re .search (r"\b" , "" , re .ASCII ))
985+ self .assertIsNone (re .search (br"\b" , b"" ))
986+ self .assertIsNone (re .search (br"\b" , b"" , re .LOCALE ))
899987 # This one is questionable and different from the perlre behaviour,
900988 # but describes current behavior.
901- self .assertIsNone (re .search (r"\b" , "" ))
989+ self .assertIsNone (re .search (r"\B" , "" ))
990+ self .assertIsNone (re .search (r"\B" , "" , re .ASCII ))
991+ self .assertIsNone (re .search (br"\B" , b"" ))
992+ self .assertIsNone (re .search (br"\B" , b"" , re .LOCALE ))
902993 # A single word-character string has two boundaries, but no
903994 # non-boundary gaps.
904995 self .assertEqual (len (re .findall (r"\b" , "a" )), 2 )
996+ self .assertEqual (len (re .findall (r"\b" , "a" , re .ASCII )), 2 )
997+ self .assertEqual (len (re .findall (br"\b" , b"a" )), 2 )
998+ self .assertEqual (len (re .findall (br"\b" , b"a" , re .LOCALE )), 2 )
905999 self .assertEqual (len (re .findall (r"\B" , "a" )), 0 )
1000+ self .assertEqual (len (re .findall (r"\B" , "a" , re .ASCII )), 0 )
1001+ self .assertEqual (len (re .findall (br"\B" , b"a" )), 0 )
1002+ self .assertEqual (len (re .findall (br"\B" , b"a" , re .LOCALE )), 0 )
9061003 # If there are no words, there are no boundaries
9071004 self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
1005+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
1006+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
1007+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
9081008 self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
1009+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
1010+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
1011+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
9091012 # Can match around the whitespace.
9101013 self .assertEqual (len (re .findall (r"\B" , " " )), 2 )
1014+ self .assertEqual (len (re .findall (r"\B" , " " , re .ASCII )), 2 )
1015+ self .assertEqual (len (re .findall (br"\B" , b" " )), 2 )
1016+ self .assertEqual (len (re .findall (br"\B" , b" " , re .LOCALE )), 2 )
9111017
9121018 def test_bigcharset (self ):
9131019 self .assertEqual (re .match ("([\u2222 \u2223 ])" ,
0 commit comments