Golang을 사용하여 StarDict 사전 idx 파일을 읽고 줄별로 내보내기

4212 단어 golang
StarDict 사전 idx 파일 형식:
각 항목은 단어 목록에 세 개의 도메인으로 구성됩니다.
word_str;                 //a utf-8 string terminated by '\0'.
//utf-8 인코딩 문자열이'\0'종료자로 끝납니다.word_str의 길이는 256보다 작을 것이다
word_data_offset;//word data's offset in .dict file
//단어 데이터가 있습니다.dict 파일의 편향,
                                 //If the version is "3.0.0"and "idxoffsetbits=64", 
                                  //word_data_offset will be 64-bits unsigned number in network byte order. 
word_data_size;   //word data's total size in .dict file
//단어 데이터가 있습니다.dict 파일의 총 크기,worddata_size should be 32-bits unsigned number
                                 // in network byte order.
package main


import (
//    "bufio"
    "io"
    "os"
    "fmt"
     "strconv"
)
func main() {
    fi, err := os.Open("gaojihanyudacidian_fix.idx")/*      \\*/
    if err != nil { panic(err) }
    defer fi.Close()

    fo, err := os.Create("output.txt")
    if err != nil { panic(err) }
    defer fo.Close()
	/*    4   */
    read_buf := make([]byte,4)
    jiange := make([]byte,1)
    huanghuang := make([]byte,1)
    jiange[0]=9
     huanghuang[0]=10   
    var pos,nextPos uint64 = 0,0

    /*                
     1 word_str  
     2 word_data_offset  
     3 word_data_size*/ 
    var setp int = 1  
    /*       word_str      (         \0),       word_str[lenth_of_word_str]    */
    var lenth_of_word_str,charNum int = 0,0
    word_str := make([]byte,257)
    var tmpChar byte =0
    var word_data_offset uint64 = 0  
    var word_data_size uint64 = 0    
    count :=1
    for{
    	pos=nextPos
	count++
        n,err := fi.ReadAt(read_buf,(int64)(pos))
        if err != nil && err != io.EOF{
            panic(err)
        }
        /*      :      4          */
        if   n < 4{
            fmt.Printf("
finish read
") break } switch setp { case 1: // fmt.Println("1:") tmpChar=read_buf[0] /* ‘\0’ */ if tmpChar != 0{ /* , 1 */ if tmpChar < 128{ charNum=1/*charNum utf8 */ }else if tmpChar < 194{ panic(err) }else if tmpChar < 224{ charNum=2 }else if tmpChar < 240{ charNum=3 }else{ panic(err) } read_buf[charNum]=0 str1 := (string)(read_buf[0:charNum+1]) copy(word_str[lenth_of_word_str:lenth_of_word_str+charNum], read_buf[:charNum]) lenth_of_word_str=lenth_of_word_str+charNum nextPos=nextPos+(uint64)(charNum) continue }else{ word_str[lenth_of_word_str]=9 nextPos=nextPos+1 } case 2: word_data_offset =0 word_data_offset = word_data_offset+((uint64)(read_buf[0]))*16*16*16*16*16*16 word_data_offset = word_data_offset+((uint64)(read_buf[1]))*16*16*16*16 word_data_offset = word_data_offset+((uint64)(read_buf[2]))*16*16 word_data_offset = word_data_offset+(uint64)(read_buf[3]) nextPos=nextPos+4 // fmt.Printf("word_data_offset =%d
",word_data_offset/*(uint64)(read_buf[3])*/) case 3: word_data_size =0 word_data_size = word_data_size+(uint64)(read_buf[0])*16*16*16*16*16*16 word_data_size = word_data_size+(uint64)(read_buf[1])*16*16*16*16 word_data_size = word_data_size+(uint64)(read_buf[2])*16*16 word_data_size = word_data_size+(uint64)(read_buf[3]) nextPos=nextPos+4 default: } /* */ if setp == 3{ if _,err := fo.Write( word_str[:lenth_of_word_str+1]); err != nil{ panic(err) } word_data_offset_str := strconv.FormatUint(word_data_offset,10) word_data_size_str := strconv.FormatUint(word_data_size,10) if _,err := fo.WriteString( word_data_offset_str); err != nil{ panic(err) } if _,err := fo.Write( jiange[0:1]); err != nil{ panic(err) } if _,err := fo.WriteString( word_data_size_str); err != nil{ panic(err) } if _,err := fo.Write( huanghuang[0:1]); err != nil{ panic(err) } lenth_of_word_str=0 } setp=setp+1 if setp > 4{ setp = 1 } } }

좋은 웹페이지 즐겨찾기