diff --git a/README.md b/README.md
index 6bdae9b..319fab8 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@
 
 
 # Silero VAD
-![image](https://user-images.githubusercontent.com/12515440/106419932-a7d50a80-646a-11eb-8f2b-00b454ed9b98.png)
+![image](https://user-images.githubusercontent.com/36505480/107667211-06cf2680-6c98-11eb-9ee5-37eb4596260f.png)
 
 **Silero VAD: pre-trained enterprise-grade Voice Activity Detector (VAD), Number Detector and Language Classifier.**
 Enterprise-grade Speech Products made refreshingly simple (see our [STT](https://github.com/snakers4/silero-models) models).
@@ -60,6 +60,7 @@ The models are small enough to be included directly into this repository. Newer
 | model=                        | Params | Model type          | Streaming | Languages      | PyTorch | ONNX | Colab |
 |--------------------------------|--------|---------------------|--------------------|----------------|---------|------|-------| 
 | `'silero_vad'`             | 1.1M   | VAD                 |  Yes       | `ru`, `en`, `de`, `es` (*) | :heavy_check_mark: | :heavy_check_mark:      | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-vad/blob/master/silero-vad.ipynb) |
+| `'silero_vad_micro'`             | 10K   | VAD                 |  Yes       | `ru`, `en`, `de`, `es` (*) | :heavy_check_mark: | :heavy_check_mark:      | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-vad/blob/master/silero-vad.ipynb) |
 | `'silero_number_detector'` | 1.1M   | Number Detector     | No       | `ru`, `en`, `de`, `es` | :heavy_check_mark: | :heavy_check_mark:      | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-vad/blob/master/silero-vad.ipynb) |
 | `'silero_lang_detector'`   | 1.1M   | Language Classifier |  No       | `ru`, `en`, `de`, `es` | :heavy_check_mark: | :heavy_check_mark:      | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snakers4/silero-vad/blob/master/silero-vad.ipynb) |
 
@@ -79,6 +80,7 @@ What models do:
 | `v1.1`  | 2020-12-24  | better vad models compatible with chunks shorter than 250 ms
 | `v1.2`  | 2020-12-30  | Number Detector added
 | `v2`    | 2021-01-11  | Add Language Classifier heads (en, ru, de, es) |
+| `v2.1`    | 2021-02-11  | Add micro (10k params) VAD models |
 
 ### PyTorch
 
@@ -333,7 +335,7 @@ Since our VAD (only VAD, other networks are more flexible) was trained on chunks
 
 [Auditok](https://github.com/amsehili/auditok) - logic same as Webrtc, but we use 50ms frames.
 
-![image](https://user-images.githubusercontent.com/12515440/106419932-a7d50a80-646a-11eb-8f2b-00b454ed9b98.png)
+![image](https://user-images.githubusercontent.com/36505480/107667211-06cf2680-6c98-11eb-9ee5-37eb4596260f.png)
 
 ## FAQ
 
@@ -346,6 +348,7 @@ Since our VAD (only VAD, other networks are more flexible) was trained on chunks
 - `num_steps` - nubmer of overlapping windows to split audio chunk into (we recommend 4 or 8)
 - `num_samples_per_window` - number of samples in each window, our models were trained using `4000` samples (250 ms) per window, so this is preferable value (lesser values reduce [quality](https://github.com/snakers4/silero-vad/issues/2#issuecomment-750840434));
 - `min_speech_samples` - minimum speech chunk duration in samples
+- `min_silence_samples` - minimum silence duration in samples between to separate speech chunks
 
 Optimal parameters may vary per domain, but we provided a tiny tool to learn the best parameters. You can invoke `speech_timestamps` with visualize_probs=True (`pandas` required):
 
diff --git a/files/model_micro.jit b/files/model_micro.jit
new file mode 100644
index 0000000..ec76fca
Binary files /dev/null and b/files/model_micro.jit differ
diff --git a/files/model_micro.onnx b/files/model_micro.onnx
new file mode 100644
index 0000000..1f36b09
Binary files /dev/null and b/files/model_micro.onnx differ
diff --git a/hubconf.py b/hubconf.py
index 6b8f056..5a515ca 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -29,6 +29,23 @@ def silero_vad(**kwargs):
     return model, utils
 
 
+def silero_vad_micro(**kwargs):
+    """Silero Voice Activity Detector
+    Returns a model with a set of utils
+    Please see https://github.com/snakers4/silero-vad for usage examples
+    """
+    hub_dir = torch.hub.get_dir()
+    model = init_jit_model(model_path=f'{hub_dir}/snakers4_silero-vad_master/files/model_micro.jit')
+    utils = (get_speech_ts,
+             save_audio,
+             read_audio,
+             state_generator,
+             single_audio_stream,
+             collect_chunks)
+
+    return model, utils
+
+
 def silero_number_detector(**kwargs):
     """Silero Number Detector
     Returns a model with a set of utils
diff --git a/utils_vad.py b/utils_vad.py
index a8f8c60..0f2b46d 100644
--- a/utils_vad.py
+++ b/utils_vad.py
@@ -60,6 +60,7 @@ def get_speech_ts(wav: torch.Tensor,
                   batch_size: int = 200,
                   num_samples_per_window: int = 4000,
                   min_speech_samples: int = 10000, #samples
+                  min_silence_samples: int = 500,
                   run_function=validate,
                   visualize_probs=False):
 
@@ -95,20 +96,31 @@ def get_speech_ts(wav: torch.Tensor,
       smoothed_probs = []
 
     speech_probs = outs[:, 1]  # this is very misleading
+    temp_end = 0
     for i, predict in enumerate(speech_probs):  # add name
         buffer.append(predict)
         smoothed_prob = (sum(buffer) / len(buffer))
         if visualize_probs:
           smoothed_probs.append(float(smoothed_prob))
+        if (smoothed_prob >= trig_sum) and temp_end:
+            temp_end=0
         if (smoothed_prob >= trig_sum) and not triggered:
             triggered = True
             current_speech['start'] = step * max(0, i-num_steps)
+            continue
         if (smoothed_prob < neg_trig_sum) and triggered:
-            current_speech['end'] = step * i
-            if (current_speech['end'] - current_speech['start']) > min_speech_samples:
-                speeches.append(current_speech)
-            current_speech = {}
-            triggered = False
+            if not temp_end:
+                temp_end = step * i
+            if step * i - temp_end < min_silence_samples:
+                continue
+            else:
+                current_speech['end'] = temp_end
+                if (current_speech['end'] - current_speech['start']) > min_speech_samples:
+                    speeches.append(current_speech)
+                temp_end = 0
+                current_speech = {}
+                triggered = False
+                continue
     if current_speech:
         current_speech['end'] = len(wav)
         speeches.append(current_speech)