re
import re match = re.search("abc", "aabca") print(match.group()) # abc is found in text match = re.search("xyz", "aabca") print(match is None) # no match
re.search
Match
A
B
AB
BA
AAB
ABB
A((B)C)
Mustrit saab kasutada:
sub in text
Näide: "abc"
Sisalduv tekst: "aabca"
Mittesisalduv tekst: "ababac"
.
Näide: "a.bc"
Sisalduv tekst: "a2bca" Sisalduv tekst: "aaubca"
Mittesisalduv tekst: "abc"
^
Näide: "^abc"
Sisalduv tekst: "abcd"
Mittesisalduv tekst: "aabc"
$
Näide: "abc$"
Sisalduv tekst: "aaabc"
Mittesisalduv tekst: "abca"
?
Näide: "ab?c"
Sisalduv tekst: "aaca" Sisalduv tekst: "aabca"
Mittesisalduv tekst: "aabbcc"
*
Näide: "ab*c"
Sisalduv tekst: "aaca" Sisalduv tekst: "aabca" Sisalduv tekst: "aabbbca"
Mittesisalduv tekst: "abab" Mittesisalduv tekst: "bbc"
+
Näide: "ab+c"
Sisalduv tekst: "aabca" Sisalduv tekst: "aabbbca"
Mittesisalduv tekst: "aac" Mittesisalduv tekst: "abbba"
\
Näide: "a\.bc"
Sisalduv tekst: "a.bc"
Mittesisalduv tekst: "a2bc"
m
{m}
Näide: "ab{2}c"
Sisalduv tekst: "aabbcc"
Mittesisalduv tekst: "aacc" Mittesisalduv tekst: "aabcc" Mittesisalduv tekst: "aabbbcc"
n
{m,n}
Näide: "ab{2,3}c"
Sisalduv tekst: "aabbcc" Sisalduv tekst: "aabbbcc"
Mittesisalduv tekst: "aacc" Mittesisalduv tekst: "aabcc" Mittesisalduv tekst: "aabbbbcc"
[..]
Näide: "[ab]cd"
Sisalduv tekst: "acd" Sisalduv tekst: "bcd"
Mittesisalduv tekst: "cd" Mittesisalduv tekst: "ccd"
[.-.]
Näide: "[a-c]de"
Sisalduv tekst: "ade" Sisalduv tekst: "bde" Sisalduv tekst: "cde"
Mittesisalduv tekst: "de" Mittesisalduv tekst: "dde"
Näide: "[a\-c]cd"
Sisalduv tekst: "acd" Sisalduv tekst: "ccd" Sisalduv tekst: "-cd"
Mittesisalduv tekst: "cd" Mittesisalduv tekst: "bcd"
Näide: "[ac-]cd"
(...)
Näide: "(ab)?cd"
Sisalduv tekst: "acde" Sisalduv tekst: "abcde" Sisalduv tekst: "abbcd"
Mittesisalduv tekst: "ab" Mittesisalduv tekst: "dde"
|
Näide: "(ab|cd)+ef"
Sisalduv tekst: "abefg" Sisalduv tekst: "cdefg" Sisalduv tekst: "acdabefg"
Mittesisalduv tekst: "acef" Mittesisalduv tekst: "abbef"
(?=...)
(?!...)
(?<=...)
(?<!...)
"a(?=b)"
"ab"
Näide: "a(?=b)"
Sisalduv tekst: "aab"
Mittesisalduv tekst: "ac" Mittesisalduv tekst: "aa"
Näide: "a(?!b)"
Sisalduv tekst: "ac" Sisalduv tekst: "a"
Mittesisalduv tekst: "acb" Mittesisalduv tekst: "b"
Näide: "(?<=a)b"
Sisalduv tekst: "aabc"
Mittesisalduv tekst: "b" Mittesisalduv tekst: "acb"
Näide: "(?<!a)b"
Sisalduv tekst: "bc" Sisalduv tekst: "acb"
Mittesisalduv tekst: "ab" Mittesisalduv tekst: "aab"
\1, \2
Näide: "(.+) \1"
Sisalduv tekst: "a ac" Sisalduv tekst: "11 12" Sisalduv tekst: "cc ccc" Sisalduv tekst: "ab abc"
Mittesisalduv tekst: "aa" Mittesisalduv tekst: "ab ba"
\w
Näide: "\w+a" ("[a-zA-Z0-9_]+a")
Sisalduv tekst: "1a" Sisalduv tekst: "aa" Sisalduv tekst: "babac" Sisalduv tekst: "hi babac"
Mittesisalduv tekst: "a" Mittesisalduv tekst: "a,a" Mittesisalduv tekst: " a"
\s
Näide: "a\s{2}b"
Sisalduv tekst: "a b" Sisalduv tekst: "a\t bc"
Mittesisalduv tekst: "ab" Mittesisalduv tekst: "a b" Mittesisalduv tekst: "a b"
\d
Näide: "\d{2}"
Sama: "[0-9]{2}"
Sisalduv tekst: "12" Sisalduv tekst: "a34c" Sisalduv tekst: "a123"
Mittesisalduv tekst: "ab" Mittesisalduv tekst: "1 2" Mittesisalduv tekst: "a2"
[^..]
Näide: "[^a-c1-3]x"
Sisalduv tekst: "4x" Sisalduv tekst: "ayxc"
Mittesisalduv tekst: "ax" Mittesisalduv tekst: "1x" Mittesisalduv tekst: "cx"
"\\\\"
r"\\"
r""
"\\"
"d"
import re text = "backslash: \\" print(text) # backslash: \ print("match" if re.search("\\\\", text) else "no match") # match print("match" if re.search(r"\\", text) else "no match") # match print("match" if re.search("\\", text) else "no match") # error
\\
import re print("match" if re.search("(.)\1", "aa") else "no match") # no match, why? print("match" if re.search("(.)\\1", "aa") else "no match") # match print("match" if re.search(r"(.)\1", "aa") else "no match") # match
\1
import re match = re.search("xk(ab)?(cd)", "xkcde") print(match.group()) print(match.group(0)) print(match.group(1)) # None. why? for "(ab)"? print(match.group(2)) # for "(cd)"
group
group(0)
group(1)
import re match = re.search("(?:ab)?(cd)e", "acde") # (?:...) print(match.group()) print(match.groups()) # ('cd') match = re.search("(ab)?(cd)e", "acde") print(match.group()) # cde print(match.groups()) # (None, 'cd')
(?:...)
.groups()
match()
search()
import re m1 = re.match("c", "abcdef") m2 = re.search("c", "abcdef") print("Match" if m1 is not None else "No match") # No match print("Match" if m2 is not None else "No match") # Match
findall()
finditer()
import re text = "tere minu@email.ee, sõbra email on guido@baggins.com ja guits@bag.com" emails = re.findall(r"[\w.-]+@[\w.-]+", text) for email in emails: print(email) for email in re.finditer(r"[\w.-]+@[\w.-]+", text): print(email.group())
emails
finditer
(ab|cd)?cd