Huffman 압축 및 해제 프로그램

https://needneo.tistory.com/95

[Python] 파이썬 명령 인자값 받는 방법 (sys.argv)

파이썬으로 작성된 파일을 실행할 때 인수(argument, 인자값)를 받아서 처리를 해야 되는 경우가 있다. 예를 들어, 로컬과 개발 등의 환경이 서로 달라서 인자값을 줘야 한다던지 같은 파일을 다른

needneo.tistory.com

huffman.py라는 파이썬 프로그램에서 3개의 인자값을 받아 이 값을 해석해 허프만 압축 및 복호화하는 프로그램을 만들고자 한다.

3개의 입력인자

1. -z(압축) / -x(압축해제) : 2개 중 선택

2. 입력 파일명

3. 출력 파일명

위의 세 개의 인자값을 입력받았을 때 첫 번째로 입력받은 인자가

"-z" 면, "입력 파일명"에 해당하는 파일을 읽어서 Huffman 코드로 변환하여 "출력 파일명"으로 저장

ex) python huffman.py -z data.txt data_huffmn.txt (data.txt라는 파일을 읽어서 huffman 코드로 변환하여 data_huffman.txt파일로 저장)

"-x" 면, Huffman 코드로 저장된 "입력 파일명"에 해당하는 파일을 읽어서 Huffman 코드를 일반 텍스트로 복호화하여 "출력 파일명"으로 저장

ex) python huffman.py -x data_huffman.txt data.txt (data_huffman.txt라는 파일을 읽어서 복호화하여 data.txt파일로 저장)

(될 때까지 해보기~)

# 단어의 빈도수가 있는 텍스트파일이 있음
# heap에서 pop두번해서 꺼낸 두개의 요소를 합쳐서(merge) 다시 넣고 또 두개요소를 뽑아서 다시 넣고를 반복 - 최종적으로 1개만 남을 때까지
# 제일 작은 빈도수를 가진 두개의 요소가 뽑힐 것
# heap : 제일 위의 노드가 최솟값을 가짐
# heap에서 pop을 하면 제일 위 노드가 꺼내짐 => 최솟값이 꺼내지는 것
# 첫번째 pop은 왼쪽에 두번째 pop은 오른쪽에 두기로 일단 정함
# 1. 트리만들기
# 2. 만든 트리를 탐색하면서 0(왼쪽으로 내려가면), 1(오른쪽으로 내려가면) 번호 붙이기


import heapq
import sys


class Entry:
    def __init__(self, freq, value, l, r, code):
        self.frequency = freq   # 빈도 수
        # 글자 자체 (a, b, c, ...) | 이파리의 문자 또는 내부노드의 합성된 문자열
        self.word = value
        self.left = l           # 왼쪽 자식
        self.right = r          # 오른쪽 자식
        self.code = code        # 허프만 코드

    # entry간 크기비교할 때 쓰는 함수
    # 만약 정수끼리는 서로 값 비교가 가능한데 객체(structure)일 경우에는 객체가 가지고 있는 멤버변수중 어느 하나를 뽑아서 그걸 기준으로 비교를 하든지 해야하는데
    # 자기자신(self)이 other보다 "작은지" 검사하는 함수
    # 내 빈도수가 다른 객체의 빈도수보다 작은 걸 리턴
    def __lt__(self, other):    # 객체를 빈도수로 비교하기 위해
        return self.frequency < other.frequency

    def get_key(self):      # 빈도 수
        return self.frequency

    def get_value(self):    # 이파리 문자
        return self.word

    def get_code(self):     # 허프만 코드
        return self.code

    def get_left(self):     # 왼쪽
        return self.left

    def get_right(self):    # 오른쪽
        return self.right

    def set_code(self, code):   # 허프만 코드
        self.code = code


def create_huffman_tree(a):
    while len(a) > 1:       # 힙에 1개의 노드만 남을 때까지 반복
        e1 = heapq.heappop(a)   # 힙에서 최소 빈도수 가진 노드 제거하여 e1이 참조
        e2 = heapq.heappop(a)   # 힙에서 최소 빈도수 가진 노드 제거하여 e2가 참조
        n = Entry(e1.get_key() + e2.get_key(),      # e1과 e2의 빈도수의 합
                    e1.get_value() + e2.get_value(),  # string 이어붙이기
                    e1, e2, '')   # e1, e2가 각각 새 노드의 왼쪽, 오른쪽 자식
        heapq.heappush(a, n)    # 새 노드를 힙에 삽입
    return heapq.heappop(a)     # 1개 남은 노드(루트 노드)를 힙에서 제거하며 리턴


# 허프만압축을 통해 문자별 부여되는 이진코드
def update_code(node, dict= None): # 전위 순회하며 허프만 코드를 만들고, 각 이파리에서 코드 출력
    if dict is None:
        dict = {}

    if node.get_left() is not None:     # 왼쪽으로 내려가면
        node.get_left().set_code(node.get_code() + '0')  # 코드에 0 추가
        update_code(node.get_left(), dict)

    if node.get_right() is not None:     # 오른쪽으로 내려가면
        node.get_right().set_code(node.get_code() + '1')   # 코드에 1 추가
        update_code(node.get_right(), dict)

    if len(node.get_value()) == 1:  # 이파리이면(문자 1개만 있음), 허프만 코드 출력
        # print(node.get_value(), ":", node.get_code(), ' ', end='')
        dict[node.get_value()] = node.get_code()

    return dict


def print_heap(a):      # 힙 출력
    for i in range(len(a)):
        print(f"[{a[i].get_key()}, {a[i].get_value()}] ", end="")
    print()

# text를 허프만압축을 통해 이진코드로 변환

def encode_text(code_table, text):  # 1.문자별 이진코드표와 2.압축할 문자들을 멤버변수로
    encodeString = ""
    # print(f"{text} ==> ")
    for ch in text:     # text문자의 첫번째, 두번째, ... 을 읽으면서
        # print(code_table[ch], end="") if ch in code_table else print("_")
        encodeString += code_table[ch]

    return encodeString


def make_heapq(text: str):
    counts = {}
    for ch in text:
        counts[ch] = counts.get(ch, 0) + 1

    list = [Entry(v, k, None, None, None) for k, v in counts.items()]
    heapq.heapify(list)     # 내부적으로 객체간의 크기비교를 할 때
    return list


def test():
    text = "Huffman coding is a data compression algorithm."

    list = make_heapq(text)

    print("최소 힙:")
    print_heap(list)

    print("허프만 코드:")
    tree = create_huffman_tree(list)
    dict = update_code(tree)
    print(dict)

    print()
    encode_text(dict, text)     # 문자를 허프만압축을 통해 이진코드로 변환


def decode_test(dict, text):    # text는 암호화된 이진코드
    # print(f"dict: {dict}")
    # print(f"text: {text}")
    decodeDict = {v: k for k,v in dict.items()}  # dict 키 값 자리바꿈
    # print(f"decodeDict: {decodeDict}")
    # 이진코드문자열을 한문자씩 읽으면서 한문자가 dict에 없으면 두문자를, dict에 글이 있을 때까지
    # 글자수를 계속 늘려가면서 dict에 있는 이진코드가 있어 한 문자로 변경할 수 있도록
    # 전체 조건 : 마지막으로 읽어들이는 end가 len(text) 일때까지
    start = 0
    end = 1
    decodeString = ""
    while end <= len(text):  # 마지막글자까지
        # text[0:2] 면 text[0] text[1] 출력 즉 end-1 까지 출력
        if str(text[start:end]) in decodeDict:  # 읽어들인 이진코드가 dict에 있으면
            # decodeString에 키값에 해당하는 value 이어붙이기, start와 end값 다시 부여
            decodeString += decodeDict[str(text[start:end])]
            start = end
            end += 1
            # print(f"------------------복호화 확인: {decodeString}")
        else:                        # 읽어들인 이진코드가 dict에 없으면
            end += 1

    return decodeString


def test_file():
    print("!!! test_file() 함수 실행 !!!")
    # f = open("C:/doit/새파일.txt", 'r')
    f = open("C:/workspace_vscode/새파일.txt", 'r')
    text = ""
    while True:
        line = f.readline()
        if not line:
            break
        # print(line)
        text = line
    f.close()

    list = make_heapq(text)

    # print("최소 힙:")
    # print_heap(list)

    # print("허프만 코드:")
    tree = create_huffman_tree(list)
    dict = update_code(tree)
    # print(dict)

    # print()
    # encode_text(dict, text)
    encodeString = encode_text(dict, text)
    # print("------------------------------------------------")
    print(f"encodeString: {encodeString}")
    # print()

    decodeString = decode_test(dict, encodeString)
    print(f"decodeString: {decodeString}")

    print("------------------------------인자 확인")
    print("인자 개수: ", len(sys.argv))
    for v in sys.argv:
        # print("인자 확인 => ", end='')
        # print(v)
        print(f"인자확인 => {v}")

    # return dict,


def encoding(inputFile, outputFile):
    # "입력 파일명"에 해당하는 파일을 읽어서 huffman 코드로 변환하여 "출력 파일명"으로 저장
    # 입력 파일은 이미 텍스트 파일이 존재
    # 암호화한 텍스트 파일은 이제 새로 생성되는 것
    f = open(f"C:/workspace_vscode/{inputFile}", "r", encoding="UTF8")
    text = ""
    while True:
        line = f.readline()
        if not line:
            break
        # print(line)
        text = line
    f.close()

    list = make_heapq(text)
    print("최소 힙:")
    print_heap(list)

    print("허프만 코드:")
    tree = create_huffman_tree(list)
    dict = update_code(tree)
    print(dict)

    encodeString = encode_text(dict, text)
    print(f"{text}=> {encodeString}")

    # 상대경로(huffman.py 파일이 위치하는 폴더에 txt파일 만들기)
    f = open(f"./{outputFile}", "w")
    f.write(encodeString)   # huffman코드
    f.write(f"\n\n{dict}")    # dict 정보도 같이 저장
    f.close()


    # return dict


def decoding(inputFile, outputFile):
# def decoding(inputFile):
    # huffman 코드로 저장된 "입력 파일명"에 해당하는 파일을 읽어서 복호화하여 "출력 파일명"으로 저장
    # -z 를 수행하면서 암호화된 텍스트파일은 존재
    # !!! -z의 출력파일명과 -x의 입력파일명은 동일
    # (-x의 입력파일을 통해 허프만 트리를 만들었기 때문)
    # 복호화하려면 압축할때 만든 허프만 dict 정보를 가지고 있어야 함
    

    # 상대경로 현재 huffman.py 파일이 위치하는 폴더에 위치한 텍스트파일
    # f = open(f"C:/workspace_vscode/{inputFile}", "r", encoding="UTF8")
    f = open(f"./{inputFile}", "r", encoding="CP949")
    # 이진코드 끝에 엔터하나 새로운 줄에 엔터하나 그 뒤에 dict정보가 이어짐
    text = ""
    dict = {}
    countEnter = 0
    while True:
        line = f.readline()    # 한줄읽음
        # print("현재읽은 한줄: ", line)
        # print(f"읽은 한줄의 글자수: {len(line)}")
        # print(f"읽은 한줄의 첫번째 글자: {line[0]}")
        # print(f"읽은 한줄의 마지막 글자: {line[len(line)-1]}")
        # print(f"한줄에 엔터만 있으면? {line[:]}")
        # print(line[:] == "\n")
        # print("-------------------------일단 한줄 읽고 종료")
        # if not line: break
        # print(f"데이터타입: {type(line)}")
        # break
        if countEnter == 0 and line[len(line)-1] == "\n":    # 읽어들인 한줄 끝에 엔터가 있으면 +1하고 다음줄읽기
            countEnter += 1
            text = line[:len(line)-1]   # 마지막 글자는 엔터니까 그 전까지가 이진코드
            continue
        
        if countEnter == 1 and line[:] == "\n":     # 그 다음줄에 엔터만 있으면
            countEnter += 1
            continue
        
        if countEnter == 2: # dict 정보
            # print("이제부터는 dict정보")
            dict = eval(line)   # string -> dict
            # print(dict)
            break
        
        # if not line: break
        
        # text += line
    # print(f"text: {text}")
    # print(f"dict: {dict}")
    # print(f"enter개수: {countEnter}")
    f.close()
    
    decodeDict = {v: k for k,v in dict.items()}  # dict 키 값 자리바꿈

    # 이진코드문자열을 한문자씩 읽으면서 한문자가 dict에 없으면 두문자를, dict에 글이 있을 때까지
    # 글자수를 계속 늘려가면서 dict에 있는 이진코드가 있어 한 문자로 변경할 수 있도록
    # 전체 조건 : 마지막으로 읽어들이는 end가 len(text) 일때까지
    start = 0
    end = 1
    decodeString = ""
    while end <= len(text):
        if str(text[start:end]) in decodeDict:  # 읽어들인 이진코드가 dict에 있으면
            # decodeString에 키값에 해당하는 value 이어붙이기, start와 end값 다시 부여
            decodeString += decodeDict[str(text[start:end])]
            start = end
            end += 1
            # print(f"------------------복호화 확인: {decodeString}")
        else:                        # 읽어들인 이진코드가 dict에 없으면
            end += 1

    # 상대경로(huffman.py 파일이 위치하는 폴더에 txt파일 만들기)
    f = open(f"./{outputFile}", "w")
    f.write(decodeString)
    f.close()


def fileToHuffman(zOrx, inputFile, outputFile):
    # 입력 인자 확인
    print("fileToHuffman 함수 실행")
    print("인자 개수: ", len(sys.argv))
    for v in sys.argv:
        print(v)

    # if zOrx == '-z':
    #     pass

    # elif zOrx == '-x':
    #     pass


# 실행
if __name__ == "__main__":
    # test()
    # test_file()

    # Entry(freq, value, left, right, code)
    data = [Entry(60, 'a', None, None, None),
            Entry(20, 'b', None, None, None),
            Entry(30, 'c', None, None, None),
            Entry(35, 'd', None, None, None),
            Entry(40, 'e', None, None, None),
            Entry(90, 'f', None, None, None)]

    heapq.heapify(data)     # 내부적으로 객체간의 크기비교를 할 때

    # print('최소 힙:')
    # print_heap(data)

    # print('허프만 코드:')
    # tree = create_huffman_tree(data)
    # dict = update_code(tree)    # 허프만코드를 통해 부여된 문자별 이진코드
    # print(dict)

    # -------------------------------------------------------------------
    # 코드 테이블 출력
    # 출력 확인
    # print(f"f= {dict['f']}")        # f= 11

    # 디코딩은 이진코드로 문자를 찾아야 하므로 딕셔너리 키 값 순서를 바꿀 것
    # {'b': '000'} => {'000': 'b'}
    # decodeDict = {v:k for k,v in dict.items()}
    # print(f"11= {decodeDict['11']}")    # 11= f

    # print("dict 확인")

    # for i in dict:
    #     print(dict[i], end=" ")

    # text = "문자열 사이 \ 확인 => 안녕" \
    # "하세요" \
    # " 그냥 문자열이 길 때 엔터칠텐데 이게 한문장이라는걸 의미하는 듯"
    # print(text)

    # print("---------------------------파일 읽기 테스트")
    # test_file()

    # print("sys.argv 확인: ", sys.argv)  # 리스트에 인자가 담겨서 출력
    # sys.argv 확인:  ['c:\\workspace_vscode\\Python\\week07\\huffman.py']
    # for v in sys.argv:
    #     print("리스트에서 하나 꺼내서 출력: ", v)
    # 리스트에서 하나 꺼내서 출력:  c:\workspace_vscode\Python\week07\huffman.py

    # for v in range(0, len(sys.argv)):
    #     print(f"앞에 적절히 제거 후 인자 확인: {sys.argv[v]}")

    # 3개의 입력을 받아서 처리
    # print("---------------------입력인자 확인")
    # zOrx = input()
    # inputFile = input()
    # outputFile = input()
    # print("sys.argv 확인: ", sys.argv)
    # print(len(sys.argv))
    # for v in sys.argv:
    #     print("리스트에서 하나 꺼내서 출력: ", v)

    # print("3가지 인자 입력")
    # if len(sys.argv) == 4:
    #     # 허프만 압축
    #     pass
    # else:
    #     pass

    print("----------------허프만 압축(-z) 먼저 실행 후 복호화 실행-----------------------")
    print("세가지 인자 입력")

    if len(sys.argv) == 4:
        zOrx = sys.argv[1]
        inputFile = sys.argv[2]
        outputFile = sys.argv[3]
        # print(f"zOrx: {zOrx}, inputFile: {inputFile}, outputFile: {outputFile}")

        # dict = {}
        if zOrx == "-z":    # 허프만 압축 수행
            print("-------------------------------------------허프만 압축")
            # "입력 파일명"에 해당하는 파일을 읽어서 huffman 코드로 변환하여 "출력 파일명"으로 저장
            # 입력 파일은 이미 텍스트 파일이 존재
            # 암호화한 텍스트 파일은 이제 새로 생성되는 것
            encoding(inputFile, outputFile)
            # dict = encoding(inputFile, outputFile)
            # print(dict)

        else:               # 복호화 수행
            print("---------------------------------------------복호화")
            # huffman 코드로 저장된 "입력 파일명"에 해당하는 파일을 읽어서 복호화하여 "출력 파일명"으로 저장
            # -z 를 수행하면서 암호화된 텍스트파일은 존재
            # !!! -z의 출력파일명과 -x의 입력파일명은 동일
            # (-x의 입력파일을 통해 허프만 트리를 만들었기 때문)
            # 복호화하려면 압축할때 만든 허프만 dict 정보를 가지고 있어야 함
            decoding(inputFile, outputFile)
            # decoding(inputFile)


    # fileToHuffman("-z", "새파일.txt", "허프만번역.txt")

    # zOrx = sys.argv[1]
    # input = sys.argv[2]
    # output = sys.argv[3]
    # print(f"압축및해제: {zOrx} | 입력파일명: {input} | 출력파일명: {output}")


    # -------------------------------------------------------------------

    # text = "abbbaaadaaaaafffedf"
    # # text = "hi"
    # # print(len(text))
    # # print(text[0:len(text)])
    # list = make_heapq(text)

    # print('최소 힙:')
    # print_heap(list)

    # print('허프만 코드:')
    # tree = create_huffman_tree(list)
    # dict = update_code(tree)
    # print(dict)

    # print()
    # encodeString = encode_text(dict, text)
    # print(encodeString)
    # print(encodeString[0:1])

    # # 이진코드를 키값으로 바꾸기
    # decodeDict = {v:k for k,v in dict.items()}

    # bool = str(encodeString[0:1]) in decodeDict
    # print(f"True or False: {bool}")

    # # 읽어들인 이진코드가 decodeDict에 있으면 그 키에 해당하는 value를 출력
    # print(f"{str(encodeString[0:1])} => {decodeDict[str(encodeString[0:1])]}")
    
    # 파일 읽어들인 한줄에서 한글자 추출
    # str = "Hello"
    # print(str[0])
    # print(str[len(str)-1])
    
    # f= open("C:/workspace_vscode/data.txt", "r", encoding="UTF8")
    # text = ""
    # while True:
    #     line = f.readline()
    #     if not line: break
    #     print(line)
    #     text = line
    
    # print("while문에 쓴 변수를 쓸 수 있나? ", text)
    # print(text[0])
    # print(text[len(text)-1])   
    # f.close()
    
    
    # dict 실험
    # f= open("C:/workspace_vscode/dict.txt", "r", encoding="UTF8")
    # dict = {}
    # while True:
    #     line = f.readline()
    #     if not line: break
    #     print(f"메모장에서 읽어들인 text: {line}")
    #     dict = eval(line)   # string -> dict
        
    # print(f"읽어들인 string을 dict로 변환: {dict}")

완성!!!!

import heapq
import sys

class Entry:
    def __init__(self, freq, value, l, r, code):
        self.frequency = freq   # 빈도 수
        self.word = value       # 글자 자체 (a, b, c, ...) | 이파리의 문자 또는 내부노드의 합성된 문자열
        self.left = l           # 왼쪽 자식
        self.right = r          # 오른쪽 자식
        self.code = code        # 허프만 코드

    def __lt__(self, other):    # 객체를 빈도수로 비교하기 위해
        return self.frequency < other.frequency

    def get_key(self):          # 빈도 수
        return self.frequency

    def get_value(self):        # 이파리 문자
        return self.word

    def get_code(self):         # 허프만 코드
        return self.code

    def get_left(self):         # 왼쪽
        return self.left

    def get_right(self):        # 오른쪽
        return self.right

    def set_code(self, code):   # 허프만 코드
        self.code = code


def create_huffman_tree(a):
    while len(a) > 1:           # 힙에 1개의 노드만 남을 때까지 반복
        e1 = heapq.heappop(a)   # 힙에서 최소 빈도수 가진 노드 제거하여 e1이 참조
        e2 = heapq.heappop(a)   # 힙에서 최소 빈도수 가진 노드 제거하여 e2가 참조
        n = Entry(e1.get_key() + e2.get_key(),      # e1과 e2의 빈도수의 합
                  e1.get_value() + e2.get_value(),  # string 이어붙이기
                  e1, e2, '')   # e1, e2가 각각 새 노드의 왼쪽, 오른쪽 자식
        heapq.heappush(a, n)    # 새 노드를 힙에 삽입
    return heapq.heappop(a)     # 1개 남은 노드(루트 노드)를 힙에서 제거하며 리턴


# 허프만압축을 통해 문자별 부여되는 이진코드
def update_code(node, dict=None):  # 전위 순회하며 허프만 코드를 만들고, 각 이파리에서 코드 출력
    if dict is None:
        dict = {}

    if node.get_left() is not None:     # 왼쪽으로 내려가면
        node.get_left().set_code(node.get_code() + '0')  # 코드에 0 추가
        update_code(node.get_left(), dict)

    if node.get_right() is not None:     # 오른쪽으로 내려가면
        node.get_right().set_code(node.get_code() + '1')   # 코드에 1 추가
        update_code(node.get_right(), dict)

    if len(node.get_value()) == 1:  # 이파리이면(문자 1개만 있음), 허프만 코드 출력
        # print(node.get_value(), ":", node.get_code(), ' ', end='')
        dict[node.get_value()] = node.get_code()

    return dict


def print_heap(a):      # 힙 출력
    for i in range(len(a)):
        print(f"[{a[i].get_key()}, {a[i].get_value()}] ", end="")
    print()


def encode_text(code_table, text):  # 1.문자별 이진코드표와 2.압축할 문자들을 멤버변수로
    encodedString = ""
    for ch in text:     # text문자의 첫번째, 두번째, ... 한글자씩 읽으면서
        encodedString += code_table[ch]

    return encodedString


def make_heapq(text: str):
    counts = {}
    for ch in text:
        counts[ch] = counts.get(ch, 0) + 1

    list = [Entry(v, k, None, None, None) for k, v in counts.items()]
    heapq.heapify(list)     # list를 heap으로 변환
    return list


# "입력 파일명"에 해당하는 파일을 읽어서 huffman 코드로 변환하여 "출력 파일명"으로 저장
def encoding(inputFile, outputFile):
    # 바탕화면에 저장된 txt파일을 읽어서(절대경로 사용) 현재 파이썬 파일이 있는 폴더에 인코딩된 txt파일을 저장할 것
    f = open(f"C:/Users/user/Desktop/{inputFile}", "r", encoding="UTF8")
    
    text = ""
    while True:
        line = f.readline()
        if not line:
            break
        # print(line)
        text = line
    f.close()

    list = make_heapq(text)
    # print("최소 힙:")
    # print_heap(list)

    # print("허프만 코드:")
    tree = create_huffman_tree(list)
    dict = update_code(tree)
    # print(dict)

    encodedString = encode_text(dict, text)
    print(f"{text}=> {encodedString}")

    # 상대경로(huffman.py 파일이 위치하는 폴더에 txt파일 만들기)
    f = open(f"./{outputFile}", "w")
    f.write(encodedString)   # huffman코드
    f.write(f"\n\n{dict}")    # dict 정보도 같이 저장
    f.close()


# huffman 코드로 저장된 "입력 파일명"에 해당하는 파일을 읽어서 복호화하여 "출력 파일명"으로 저장
def decoding(inputFile, outputFile):
    # 상대경로) 현재 huffman.py 파일이 위치하는 폴더에 위치한 텍스트파일
    f = open(f"./{inputFile}", "r", encoding="CP949")
    
    text = ""	# 읽어들인 파일에서 이진코드 저장
    dict = {}	# 읽어들인 파일에서 dict정보 저장
    countEnter = 0
    
    while True:
        line = f.readline()  
        
        if countEnter == 0 and line[len(line)-1] == "\n":
            countEnter += 1
            text = line[:len(line)-1]   # 마지막 글자는 엔터니까 그 전까지가 이진코드
            continue

        if countEnter == 1 and line[:] == "\n":     # 그 다음줄에 엔터만 있으면
            countEnter += 1
            continue

        if countEnter == 2:  # dict 정보
            dict = eval(line)   # string -> dict
            # print(dict)
            break

    f.close()


    # 디코딩은 이진코드로 문자를 찾아야 하므로 딕셔너리 키 값 순서를 바꿈
    # {'b': '000'} => {'000': 'b'}
    decodeDict = {v: k for k, v in dict.items()}

    # 이진코드문자열을 한문자씩 읽으면서 한문자가 dict에 없으면 두문자를, dict에 글이 있을 때까지
    # 글자수를 계속 늘려가면서 읽어들인 이진코드가 dict에 있을때 한 문자로 변경할 수 있도록
    start = 0
    end = 1
    decodedString = ""
    while end <= len(text):
        if str(text[start:end]) in decodeDict:  # 읽어들인 이진코드가 dict에 있으면
            # decodedString에 키값에 해당하는 value 이어붙이기, start와 end값 다시 부여
            decodedString += decodeDict[str(text[start:end])]
            start = end
            end += 1
            # print(f"------------------복호화 확인: {decodedString}")
        else:                        # 읽어들인 이진코드가 dict에 없으면
            end += 1

    print(f"{text}=> {decodedString}")
    
    # 상대경로(huffman.py 파일이 위치하는 폴더에 txt파일 만들기)
    f = open(f"./{outputFile}", "w")
    f.write(decodedString)
    f.close()


# 실행
if __name__ == "__main__":
	
    # Entry(freq, value, left, right, code)
    # data = [Entry(60, 'a', None, None, None),
    #         Entry(20, 'b', None, None, None),
    #         Entry(30, 'c', None, None, None),
    #         Entry(35, 'd', None, None, None),
    #         Entry(40, 'e', None, None, None),
    #         Entry(90, 'f', None, None, None)]

    # heapq.heapify(data)     # 내부적으로 객체간의 크기비교를 할 때

    # print('최소 힙:')
    # print_heap(data)

    # print('허프만 코드:')
    # tree = create_huffman_tree(data)
    # dict = update_code(tree)    # 허프만코드를 통해 부여된 문자별 이진코드
    # print(dict)

    print("--------------------------3가지 인자 입력(처음 data.txt는 바탕화면에 위치한다고 가정)--------------------")

    if len(sys.argv) == 4:
        zOrx = sys.argv[1]
        inputFile = sys.argv[2]
        outputFile = sys.argv[3]
        # print(f"zOrx: {zOrx}, inputFile: {inputFile}, outputFile: {outputFile}")

        if zOrx == "-z":    # 허프만 압축 수행
            print(f"------------------------------------허프만 압축({inputFile})-----------------------------------")
            encoding(inputFile, outputFile)     # 파일에 이진코드, dict정보 함께 저장
            # dict = encoding(inputFile, outputFile)
            # print(dict)
            print(f"-----------------------------------허프만 압축 완료({outputFile})------------------------------")

        else:               # 복호화 수행
            print(f"------------------------------------복호화({inputFile})----------------------------------------")
            decoding(inputFile, outputFile)
            print(f"-------------------------------------복호화 완료({outputFile})-----------------------------------")

'파이썬 알고리즘 > 그리디 알고리즘' 카테고리의 다른 글

허프만 압축 (0)	2023.04.24
최단 경로 찾기 (다익스트라 알고리즘) (0)	2023.04.24
최소 신장 트리 (Prim MST) (0)	2023.04.18
최소 신장 트리 (Kruskal MST) (0)	2023.04.13

어쩌겠어 해야지

Huffman 압축 및 해제 프로그램

'파이썬 알고리즘 > 그리디 알고리즘' 카테고리의 다른 글

티스토리툴바

Huffman 압축 및 해제 프로그램

'파이썬 알고리즘 > 그리디 알고리즘' 카테고리의 다른 글

'파이썬 알고리즘/그리디 알고리즘' Related Articles

티스토리툴바