deeper1_model/word_launch.py at master · Deepersensor/deeper1_model · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
"""
Fixed Neural Chatbot - With Beam Search + Retrieval Fallback
"""

import torch
import pickle
import sys
import os
from difflib import SequenceMatcher

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MAX_LEN = 20

class WordVocab:
    def __init__(self):
        self.word2idx = {'<PAD>': 0, '<UNK>': 1}
        self.idx2word = {0: '<PAD>', 1: '<UNK>'}
        self.next_idx = 2

    def encode(self, text):
        words = text.lower().split()
        return [self.word2idx.get(w, 1) for w in words[:MAX_LEN]]

    def decode(self, tokens):
        words = [self.idx2word.get(t, '<UNK>') for t in tokens if t not in [0, 1]]
        return ' '.join(words)

class Seq2Seq(torch.nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        self.vocab_size = vocab_size
        self.embed = torch.nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.encoder = torch.nn.LSTM(embed_dim, hidden_dim, num_layers=2, batch_first=True)
        self.decoder = torch.nn.LSTM(embed_dim, hidden_dim, num_layers=2, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, vocab_size)

    def forward(self, questions, answers):
        q_embed = self.embed(questions)
        _, (hidden, cell) = self.encoder(q_embed)
        a_embed = self.embed(answers)
        decoder_out, _ = self.decoder(a_embed, (hidden, cell))
        logits = self.fc(decoder_out)
        return logits

    def generate_beam(self, question_tokens, max_len=8, beam_width=3):
        """Beam search decoding"""
        self.eval()
        with torch.no_grad():
            # Encode
            q_input = torch.tensor([question_tokens], dtype=torch.long).to(DEVICE)
            q_embed = self.embed(q_input)
            _, (hidden, cell) = self.encoder(q_embed)

            # Beam search
            beams = [([0], 0.0, hidden, cell)]  # (tokens, score, h, c)

            for step in range(max_len):
                new_beams = []

                for tokens, score, h, c in beams:
                    if tokens[-1] in [0, 1]:  # Stop token
                        new_beams.append((tokens, score, h, c))
                        continue

                    # Predict next
                    current_token = torch.tensor([[tokens[-1]]], dtype=torch.long).to(DEVICE)
                    token_embed = self.embed(current_token)
                    decoder_out, (h_new, c_new) = self.decoder(token_embed, (h, c))
                    logits = self.fc(decoder_out[0, 0, :])
                    log_probs = torch.log_softmax(logits, dim=0)

                    # Get top k
                    top_probs, top_indices = torch.topk(log_probs, min(beam_width, self.vocab_size))

                    for prob, idx in zip(top_probs, top_indices):
                        new_score = score + prob.item()
                        new_tokens = tokens + [idx.item()]
                        new_beams.append((new_tokens, new_score, h_new, c_new))

                # Sort and keep top beams
                beams = sorted(new_beams, key=lambda x: x[1], reverse=True)[:beam_width]

            # Return best
            best_tokens = beams[0][0][1:]  # Skip START token
            return best_tokens

def similarity(a, b):
    """String similarity"""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def main():
    if not os.path.exists('word_model.pt') or not os.path.exists('word_vocab.pkl'):
        print("Model not found! Train first: python word_train.py")
        sys.exit(1)

    print("="*60)
    print("Loading Neural Chatbot (Beam Search + Fallback)...")
    ckpt = torch.load('word_model.pt', map_location=DEVICE, weights_only=False)

    with open('word_vocab.pkl', 'rb') as f:
        vocab = pickle.load(f)

    cfg = ckpt['config']
    model = Seq2Seq(cfg['vocab_size'], cfg['embed_dim'], cfg['hidden_dim']).to(DEVICE)
    model.load_state_dict(ckpt['model'])

    # Training data for fallback
    training_data = [
        ("hello", "hi"),
        ("hi", "hello"),
        ("how are you", "great"),
        ("what is your name", "chatbot"),
        ("goodbye", "bye"),
        ("thanks", "welcome"),
        ("help", "help you"),
        ("good morning", "morning"),
        ("how is it going", "good"),
        ("tell me a joke", "joke"),
        ("are you smart", "yes"),
        ("who are you", "ai"),
        ("what can you do", "chat"),
        ("bye", "later"),
        ("cool", "thanks"),
        ("like you", "like you too"),
    ]

    print("Ready!")
    print("="*60)
    print("Chat (type 'quit' to exit)")
    print("="*60)

    while True:
        user_input = input("\nYou: ").strip()
        if user_input.lower() in ['quit', 'exit']:
            print("Goodbye!")
            break

        if user_input:
            # Try neural
            q_tokens = vocab.encode(user_input)
            q_padded = q_tokens + [0] * (MAX_LEN - len(q_tokens))

            try:
                response_tokens = model.generate_beam(q_padded[:MAX_LEN], max_len=6, beam_width=3)
                response = vocab.decode(response_tokens).strip()
            except:
                response = ""

            # Fallback to retrieval
            if not response or len(response.split()) < 1:
                best_match = max(training_data, key=lambda x: similarity(user_input, x[0]))
                response = best_match[1]

            print(f"Bot: {response}")

if __name__ == "__main__":
    main()