2. Neural Network From Scratch
A tutorial for beginners with Pytorch and FASTAI you can create your own classifier.
- Neural Network From Scratch Using Pytorch and FAST.AI Capabilities
- Import Packages
- Download Dataset
- Point to Current Directory to Path
- Lets see whats inside Path
- Differnet methods to See Pic
- See image as Array
- Tensors are Array in Pytorch
- Lets see image in a fancy way
- Images as Tensor List
- Lets Store it as Tensor Stack
- Compare the types above
- Put All things Together for Validation
- Prepare Training Data
- First Batch
path = untar_data(URLs.MNIST_SAMPLE)
path
Path.BASE_PATH = path
path.ls()
(path/'train').ls()
sevens = (path/'train'/'7').ls().sorted()
threes = (path/'train'/'3').ls().sorted()
sevens
Lets do following
- Read/Get a path of image
- Pass it to Image opener by python library
im3_path = threes[1]
im3 = Image.open(im3_path)
im3
array(im3)[4:10,4:10]
tensor(im3)[4:10,4:10]
pd.DataFrame(tensor(im3)[4:15,3:22]).style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')
sevens_list = [tensor(Image.open(o)) for o in sevens]
threes_list = [tensor(Image.open(o)) for o in threes]
type(sevens_list), type(sevens_list[0])
show_image(sevens_list[0])
sevens_stack = torch.stack(sevens_list).float()/255
threes_stack = torch.stack(threes_list).float()/255
type(sevens_stack)
type(sevens_list), type(sevens_list[0])
valid_3_tens = torch.stack([tensor(Image.open(o))
for o in (path/'valid'/'3').ls()])
valid_3_tens = valid_3_tens.float()/255
valid_7_tens = torch.stack([tensor(Image.open(o))
for o in (path/'valid'/'7').ls()])
valid_7_tens = valid_7_tens.float()/255
valid_3_tens.shape,valid_7_tens.shape
type([sevens_stack, threes_stack]) ,len([sevens_stack, threes_stack])
train_x = torch.cat([sevens_stack, threes_stack]).view(-1,28*28)
type(train_x),type(train_x[0]), train_x.shape
train_y = tensor([0]*len(sevens_stack) + [1]*len(threes_stack)).unsqueeze(1)
dset = list(zip(train_x,train_y))
x,y = dset[0]
x.shape,y.shape
valid_x = torch.cat([valid_7_tens ,valid_3_tens]).view(-1,28*28)
valid_y = tensor([0]*len(valid_7_tens )+ [1]* len(valid_3_tens)).unsqueeze(1)
valid_dset=list(zip(valid_x,valid_y))
valid_x.shape,valid_y.shape
dl = DataLoader(range(15), batch_size=5,shuffle=True)
list(dl)
ds=DataLoader(L(enumerate(string.ascii_lowercase)), batch_size=5, shuffle=True)
list(ds)
dl = DataLoader(dset, batch_size=256,shuffle=True)
xb, yb = first(dl)
xb.shape, yb.shape
type(valid_dset),len(valid_dset[0])
valid_dl = DataLoader(valid_dset, batch_size=256,shuffle=True)
v_xb, v_yb = first(valid_dl)
def init_params(size,std=1.0): return (torch.randn(size)*std).requires_grad_()
weights = init_params((28*28,1))
bias = init_params(1)
def linear1(x): return x@weights +bias
xb, yb = first(dl)
preds = linear1(train_x)
preds=linear1(xb)
yb.shape,train_y.shape
train_y
yb
corrects =(preds>0.5).float() == yb
corrects.float().mean().item()
def mnist_loss(predictions,targets):
predictions = predictions.sigmoid()
return torch.where(targets==1, (1-predictions), predictions).mean()
loss = mnist_loss(corrects,yb)
def calc_grad(x,y,model):
preds = model(x)
loss = mnist_loss(preds,y)
loss.backward()
calc_grad(xb,yb,linear1)
weights.grad.shape,weights.grad.mean(),bias.grad
Why Gradzero?
Our only remaining step is to update the weights and biases based on the gradient and learning rate. When we do so, we have to tell PyTorch not to take the gradient of this step too—otherwise things will get very confusing when we try to compute the derivative at the next batch! If we assign to the data attribute of a tensor then PyTorch will not take the gradient of that step. Here's our basic training loop for an epoch
weights.grad.zero_(),bias.grad.zero_();
def train_epoch(model,lr,params):
for x,y in dl:
calc_grad(x,y,model)
for p in params:
p.data -= p.grad*lr
p.grad.zero_()
batch accuracy used to evaluate model
def batch_accuracy(xb, yb):
preds = xb.sigmoid()
correct = (preds>0.5) == yb
return correct.float().mean()
batch_accuracy(linear1(xb), yb)
def validate_epoch(model):
accs = [batch_accuracy(model(v_xb), v_yb) for v_xb,v_yb in valid_dl]
return round(torch.stack(accs).mean().item(), 4)
validate_epoch(linear1)
lr = 1e-1
params = weights,bias
train_epoch(linear1, lr, params)
validate_epoch(linear1)
for i in range(400):
train_epoch(linear1, lr, params)
print(validate_epoch(linear1), end=' ')
linear_model =nn.Linear(28*28,1)
w,b = linear_model.parameters()
w.shape, b.shape
class BasicOpt:
#class contains two attributes params=weights,bias and learning rate
# get all the parameters
def __init__(self, params,lr): self.params, self.lr = list(params),lr
# gradient
def step(self, *args, **kwargs):
for p in self.params:
p.data -= p.grad.data * self.lr
# zero grad
def zero_grad(self, *args , **kwargs):
for p in self.params:
p.grad = None
opt = BasicOpt(linear_model.parameters(), lr=1e-1)
def train_epoch(model):
for x,y in dl:
calc_grad(x,y,model)
opt.step()
opt.zero_grad()
validate_epoch(linear_model)
def train_model(model,epochs):
for i in range(epochs):
train_epoch(model)
print(validate_epoch(linear_model),end=" " )
train_model(model=linear_model, epochs = 10)
linear_model = nn.Linear(28*28,1)
opt = SGD(linear_model.parameters(), lr =1e-1)
train_model(linear_model,20)
dls = DataLoaders(dl, valid_dl)
To create a Learner
without using an application (such as cnn_learner
) we need to pass in all the elements that we've created in this chapter: the DataLoaders
, the model, the optimization function (which will be passed the parameters), the loss function, and optionally any metrics to print:
learn = Learner(dls, nn.Linear(28*28,1), opt_func=SGD,
loss_func=mnist_loss, metrics=batch_accuracy)
Now we can call fit
:
learn.fit(10, lr=lr)