[도구 코드] Mnist와 Cifar-10 데이터 세트를numpy 형식으로 변환

11754 단어 도구 코드
Cifar-10:
import pickle
import glob
import numpy as np


def Dataloader():
	data_list = glob.glob("data_batch_*")

	for data in data_list:
		data = pickle.load(open(data, 'rb'), encoding='bytes')
		labels, data, filenames = data[b'labels'], data[b'data'], data[b'filenames']
		labels, data = map(np.array, [labels, data])
		try:
			Data = np.r_[Data, data]
			Labels = np.r_[Labels, labels]
		except:
			Data = data
			Labels = labels
	
	np.save("data/data.npy", Data)
	np.save("data/label.npy", Labels)
			
if __name__ == "__main__":
	Dataloader()

Mnist:
import numpy as np
import struct
 
def loadImageSet(filename):
 
    binfile = open(filename, 'rb') #  
    buffers = binfile.read()
 
    head = struct.unpack_from('>IIII', buffers, 0) #  4 , 
 
    offset = struct.calcsize('>IIII')  #  data 
    imgNum = head[1]
    width = head[2]
    height = head[3]
 
    bits = imgNum * width * height  # data 60000*28*28 
    bitsString = '>' + str(bits) + 'B'  # fmt :'>47040000B'
 
    imgs = struct.unpack_from(bitsString, buffers, offset) #  data , 
 
    binfile.close()
    imgs = np.reshape(imgs, [imgNum, width * height]) # reshape [60000,784] 
 
    return imgs,head
 
 
def loadLabelSet(filename):
 
    binfile = open(filename, 'rb') #  
    buffers = binfile.read()
 
    head = struct.unpack_from('>II', buffers, 0) #  label 2 
 
    labelNum = head[1]
    offset = struct.calcsize('>II')  #  label 
 
    numString = '>' + str(labelNum) + "B" # fmt :'>60000B'
    labels = struct.unpack_from(numString, buffers, offset) #  label 
 
    binfile.close()
    labels = np.reshape(labels, [labelNum]) #  ( )
 
    return labels,head
 
 
if __name__ == "__main__":
    file1= './train-images.idx3-ubyte'
    file2= './train-labels.idx1-ubyte'
 
    imgs,data_head = loadImageSet(file1)
 
    labels,labels_head = loadLabelSet(file2)

    np.save("data/data.npy", imgs)
    np.save("data/label.npy", labels)

좋은 웹페이지 즐겨찾기