자연 언어 처리 NLP_중국어 분사_역방향 최대 일치 알고리즘

7968 단어
"""
     
"""
# 
dict_words = []

# , 
def init():
    with open("dict/dict.txt","r",encoding="utf-8") as dict_input:
        for word in dict_input:
            dict_words.append(word.strip())

# 
def cut_words(words_input,dict_words):
    # 
    cut_words_list = []

    words_input = words_input.strip()
    # 
    words_input_length = len(words_input)

    # 
    max_length_dict_words = max(len(word) for word in dict_words)

    while words_input_length > 0:
        #  
        max_cut_length = min(words_input_length, max_length_dict_words)

        #  
        subString_words_input = words_input[-max_cut_length:]#!

        while max_cut_length > 0:
            if subString_words_input in dict_words:
                cut_words_list.append(subString_words_input)
                break
            elif max_cut_length == 1:
                cut_words_list.append(subString_words_input)
                break
            else:
                max_cut_length -= 1
                subString_words_input = words_input[-max_cut_length:]#!
        words_input = words_input[0:-max_cut_length]#!
        words_input_length -= max_cut_length

    #  
    cut_words_list.reverse()#!

    #result_cut_words = "/".join(cut_words_list)
    #return result_cut_words

    return cut_words_list

# 
def main():
    init()
    while True:
        print(" :")
        words_input = input()
        if not words_input:
            break
        result = "/".join(cut_words(words_input, dict_words))
        print(" :")
        print(result)

# 
if __name__ == "__main__":
    main()

좋은 웹페이지 즐겨찾기