Python 문자열 및 파일시스템

------------------------------------문자열 다루기--------------------------------------
"str".capitalize() -첫 문자를 대문자로 나머지는 소문자로
"python is powerful".count('p', 0, -1) -문자열의 처음부터 끝까지 p가 몇개인지 리턴
"가나다".encode('cp949') -윈도우에서 사용하는 cp949로 변환 utf-8
"가나다".encode('latin1', 'ignore') - 에러난 부분 무시 replace, xmlcharrefreplace, backslashreplace
"python is powerful".endswith('ful', 5, -1) - 5에서 -1로 슬라이싱 후 비교
"python is powerful".endswith(('m','l')) - 튜플사용가능
"python\tis\tpowerful".expandtabs(3) - \t 텝문자를 3칸 공백으로 만든다
"str".find('key', start, end) - 문자열찾기(못찾으면 -1)
"str".index('key', start, end) - 문자열찾기(못찾으면 에러)
"str".isalnum() - 특수문자포함되면 false 문자,숫자만 있으면 true
"str".isalpha() - 알파벳으로만 이루어졌나?
"str".islower() - 모두 소문자인가?
"str".isspace() - 모두 공백,공백문자인가?
"str".istital() - 단어마다 맨앞에 대문자?
"str".isupper() - 모두 대문자?
"str".isdecimal() , isdigit() - 10진수로 되어있으면 true
"str".isnumeric() - 숫자면 true
"str".isidentifier() - 변수사용가능? chr(0xc0), chr(0xc0).isidentifier()
"str".isprintable() - 출력가능 문자인가?
"\t".join(["python","is","powerful"]) -사이에 텝으로 연결
"PYTHON".lower() - 소문자로
--------------------------------------정규표현식 re모듈
re.search('[0-9]*th','35th') -0~9까지 숫자가 0번이상 반복되고 th가 연결된 문장을 확인
re.split('[:. ]+', 'apple Orange:banana tomato', 2) -구분자로 : . ' '이 사용됨 2번만 분리
re.split('\n+', text) -행구분자 분리
re.findall(r"app\w*", "application orange apple") -app로 시작하는 모든 단어
re.sub(r"[:,|\s]", ", " , "Apple:Orange Banana|Tomato", 2) -필드구분자를 , 로 변경하여 한문장으로 변경
re.sub(r"\b(\d{4}-\d{4})\b", r"\1", "Copyright Derick 1990-2009") - 연도부분을 이텔릭체 로변경
re.sub(r"\b(?P<yaer>\d{4}-\d{4})\b", r"\g<year>", "Copyright Derick 1990-2009")
def Upper(m):
return m.group().upper()
re.sub("[T|t]he", Upper, "The time is the money") -The나 The를 Upper함수로 보내어 변경
c = re.compile(r"app\w*")
c.findall("application orange apple") -정규표현식C를 이용하여 검색
c = re.compile('apple', re.I) -대소문자 구분 X
c= re.compile('^.+') -첫라인만 매칭 c=re.compile('^.+', re.M) -멀티라인
----------------------------------title테그 내용출력
import urllib.request, re
web = urllib.request.urlopen("http://www.example.com")
html = web.read()
web.close()
code = str(html).encode('utf-8').decode('cp949')
c = re.compile(r'.*? #<title>앞의 모든 문자 무시
<title.*?> #<title>태그, 옵션은 무시
(.*) #내용저장
</title> #</title>태그
""", re.I|re.S|re.X) #대소문자무시, 개행포함
c.findall(code)
--------------------------------------------------match
telChecker = re.compile(r"(\d{2,3})-(\d{3,4})-(\d{4})")
bool(telchecker.match("02-123-4532")) -전화번호 검사
m = telChecker.match("02-123-3243")
m.groups() -튜플로 반환
m.group() - 전체문자열반환
m.group(1) -첫번째 문자열반환
m.group(2,3) -2,3번째 문자열 튜플로 반환
m.start() -시작 문자열 인덱스 m.start(2) - "123"의 시작인덱스
m.end() -종료인덱스 (문자열길이) m.end(2)- "123"의 종료인덱스
m.string[m.start(2):m.end(3)] -지역번호를 제외한 전화번호출력
m = re.match(r"(?P<area_code>\d+)-(?P<exchange_number>\d+)-(?P<user_number>\d+)", "02-123-2342") -문자열에 이름부여
m.group("user_number") -2342출력
m.groupdict() -모든 문자열 사전형태로 반환

-----------------------------------------파일시스템 from os.path import *
abspath('tmp') -tmp의 절대경로를 반환
basename('C:\\Python30\\tmp')   -파일이름을 반환 tmp   dirname('path') -폴더만반환
commonprefix(['path1', 'path2']) -path들의 공통경로반환
exists('path') -파일이나 디렉토리의 존재유무
expanduser('~\\test') -~절대경로로 변경
expandvars('$HOME\\temp') -환경변수를 경로로 변경
getatime('path') -최근접근시간 import time    time.gmtime(getatime('path'))
time.struct_time(tm_year=2009
getmtime('path') -최근 변경시간
getctime('paht') -생성시간
getsize('path') -파일크기
isabs('path')- path가 절대경로인가?
isfile('path') - 파일인가? - isdir('path')
join('C:\\Python30', 'D:\\Test', 'test.py') -D:\\test\\test.py로 변경
normcase('path') - OS에 맞도록 경로 문자열 조정
normpath('C:/Python30/../python.exe') -'C:\\python.exe' 정규화
split('path') - path를 디렉토리와 파일부분으로 나눈다.
splitdrive('path') - path를 드라이브 부분과 나머지로 나눈다.
splitext('path') -확장자부분을 나눈다.
import glob---------------------dir기능
glob.glob('cannon?.*')     glob.glob('canon[0-9].*')    - canon2.mp3 검색
glob.glob('*.exe')     glob.glob(abspath('.') + '\\*.exe') -현재경로의 모든 exe파일검색
glob.iglob('*') - for i in glob.iglob('*'):   print(i)    -결과가 많은경우에

저작자표시

'프로그램 > Python' 카테고리의 다른 글

python timeout (only linux) (0)	2018.12.18
error : UnicodeEncodeError: 'cp949' codec can't encode character '\u2764' in position 19: illegal multibyte sequence (0)	2018.11.26
python - OptionParser (3)	2013.05.21
python – os.glob 모듈 (0)	2013.05.21
python split() (0)	2013.05.21

'프로그램 > Python' 카테고리의 다른 글

티스토리툴바