diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..2006ccd
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,2 @@
+*.bin
+example_test/
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 5a48dd5..032b052 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /target
-*.bin
\ No newline at end of file
+*.bin
+/example_test
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 63e0b9b..5be9d88 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -105,7 +105,7 @@ dependencies = [
 
 [[package]]
 name = "llama_cpp_rs"
-version = "0.1.2"
+version = "0.2.0"
 dependencies = [
  "bindgen",
  "cc",
diff --git a/Cargo.toml b/Cargo.toml
index 2def871..fb6b762 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,12 +2,24 @@
 authors = ["mdrokz <mohammadmunshi@gmail.com>"]
 name = "llama_cpp_rs"
 description = "Rust bindings for LLAMA.CPP inference"
-categories = ["api-bindings","development-tools::ffi","development-tools::build-utils","science"]
-keywords = ["machine-learning","api-bindings","llama","llama-cpp","inference"]
+categories = [
+    "api-bindings",
+    "development-tools::ffi",
+    "development-tools::build-utils",
+    "science",
+]
+keywords = [
+    "machine-learning",
+    "api-bindings",
+    "llama",
+    "llama-cpp",
+    "inference",
+]
+exclude = ["examples", "example_test"]
 license-file = "LICENSE"
 readme = "README.md"
 repository = "https://github.com/mdrokz/rust-llama.cpp"
-version = "0.1.2"
+version = "0.2.0"
 edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -21,4 +33,10 @@ bindgen = "0.66.1"
 
 [lib]
 name = "llama_cpp_rs"
-path = "src/lib.rs"
\ No newline at end of file
+path = "src/lib.rs"
+
+[features]
+opencl = []
+cuda = []
+openblas = []
+blis = []
\ No newline at end of file
diff --git a/README.md b/README.md
index dcf615d..3fec9b6 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ fn main() {
     )
     .unwrap();
 
-    let mut predict_options = PredictOptions {
+    let predict_options = PredictOptions {
         token_callback: Some(Box::new(|token| {
             println!("token1: {}", token);
 
@@ -61,12 +61,20 @@ fn main() {
 
 ```
 
+## Examples 
+
+The examples contain dockerfiles to run them
+
+see [examples](https://github.com/mdrokz/rust-llama.cpp/examples/README.md)
+
 ## TODO
 
-- [ ] Implement support for cublas,openBLAS & OpenCL
+- [x] Implement support for cublas,openBLAS & OpenCL
 - [ ] Implement support for GPU (Metal)
 - [ ] Add some test cases
-- [ ] Add some proper examples
+- [ ] Support for fetching models through http & S3
+- [ ] Sync with latest master & support GGUF
+- [x] Add some proper examples
 
 ## LICENSE
 
diff --git a/build.rs b/build.rs
index 4c308bd..abe37b8 100644
--- a/build.rs
+++ b/build.rs
@@ -1,7 +1,9 @@
 use std::env;
 use std::path::PathBuf;
 
-fn main() {
+use cc::Build;
+
+fn compile_bindings(out_path: &PathBuf) {
     let bindings = bindgen::Builder::default()
         .header("./binding.h")
         .blocklist_function("tokenCallback")
@@ -9,52 +11,180 @@ fn main() {
         .generate()
         .expect("Unable to generate bindings");
 
-    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
-
     bindings
-        .write_to_file(out_path.join("bindings.rs"))
+        .write_to_file(&out_path.join("bindings.rs"))
         .expect("Couldn't write bindings!");
+}
+
+fn compile_opencl(cx: &mut Build, cxx: &mut Build) {
+    cx.flag("-DGGML_USE_CLBLAST");
+    cxx.flag("-DGGML_USE_CLBLAST");
+
+    if cfg!(target_os = "linux") {
+        println!("cargo:rustc-link-lib=OpenCL");
+        println!("cargo:rustc-link-lib=clblast");
+    } else if cfg!(target_os = "macos") {
+        println!("cargo:rustc-link-lib=framework=OpenCL");
+        println!("cargo:rustc-link-lib=clblast");
+    }
+
+    cxx.file("./llama.cpp/ggml-opencl.cpp");
+}
+
+fn compile_openblas(cx: &mut Build) {
+    cx.flag("-DGGML_USE_OPENBLAS")
+        .include("/usr/local/include/openblas")
+        .include("/usr/local/include/openblas");
+    println!("cargo:rustc-link-lib=openblas");
+}
+
+fn compile_blis(cx: &mut Build) {
+    cx.flag("-DGGML_USE_OPENBLAS")
+        .include("/usr/local/include/blis")
+        .include("/usr/local/include/blis");
+    println!("cargo:rustc-link-search=native=/usr/local/lib");
+    println!("cargo:rustc-link-lib=blis");
+}
+
+fn compile_cuda(cxx_flags: &str) {
+    println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
+    println!("cargo:rustc-link-search=native=/opt/cuda/lib64");
+
+    if let Ok(cuda_path) = std::env::var("CUDA_PATH") {
+        println!(
+            "cargo:rustc-link-search=native={}/targets/x86_64-linux/lib",
+            cuda_path
+        );
+    }
+
+    let libs = "cublas culibos cudart cublasLt pthread dl rt";
+
+    for lib in libs.split_whitespace() {
+        println!("cargo:rustc-link-lib={}", lib);
+    }
+
+    let mut nvcc = cc::Build::new();
+
+    let env_flags = vec![
+        ("LLAMA_CUDA_DMMV_X=32", "-DGGML_CUDA_DMMV_X"),
+        ("LLAMA_CUDA_DMMV_Y=1", "-DGGML_CUDA_DMMV_Y"),
+        ("LLAMA_CUDA_KQUANTS_ITER=2", "-DK_QUANTS_PER_ITERATION"),
+    ];
+
+    let nvcc_flags = "--forward-unknown-to-host-compiler -arch=native ";
+
+    for nvcc_flag in nvcc_flags.split_whitespace() {
+        nvcc.flag(nvcc_flag);
+    }
+
+    for cxx_flag in cxx_flags.split_whitespace() {
+        nvcc.flag(cxx_flag);
+    }
+
+    for env_flag in env_flags {
+        let mut flag_split = env_flag.0.split("=");
+        if let Ok(val) = std::env::var(flag_split.next().unwrap()) {
+            nvcc.flag(&format!("{}={}", env_flag.1, val));
+        } else {
+            nvcc.flag(&format!("{}={}", env_flag.1, flag_split.next().unwrap()));
+        }
+    }
+
+    nvcc.compiler("nvcc")
+        .file("./llama.cpp/ggml-cuda.cu")
+        .flag("-Wno-pedantic")
+        .include("./llama.cpp/ggml-cuda.h")
+        .compile("ggml-cuda");
+}
+
+fn compile_ggml(cx: &mut Build, cx_flags: &str) {
+    for cx_flag in cx_flags.split_whitespace() {
+        cx.flag(cx_flag);
+    }
+
+    cx.include("./llama.cpp")
+        .file("./llama.cpp/ggml.c")
+        .cpp(false)
+        .compile("ggml");
+}
+
+fn compile_llama(cxx: &mut Build, cxx_flags: &str, out_path: &PathBuf, ggml_type: &str) {
+    for cxx_flag in cxx_flags.split_whitespace() {
+        cxx.flag(cxx_flag);
+    }
+
+    let ggml_obj = PathBuf::from(&out_path).join("llama.cpp/ggml.o");
+
+    cxx.object(ggml_obj);
+
+    if !ggml_type.is_empty() {
+        let ggml_feature_obj =
+            PathBuf::from(&out_path).join(format!("llama.cpp/ggml-{}.o", ggml_type));
+        cxx.object(ggml_feature_obj);
+    }
+
+    cxx.shared_flag(true)
+        .file("./llama.cpp/examples/common.cpp")
+        .file("./llama.cpp/llama.cpp")
+        .file("./binding.cpp")
+        .cpp(true)
+        .compile("binding");
+}
+
+fn main() {
+    let out_path = PathBuf::from(env::var("OUT_DIR").expect("No out dir found"));
+
+    compile_bindings(&out_path);
 
     let mut cx_flags = String::from("-Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -march=native -mtune=native");
     let mut cxx_flags = String::from("-Wall -Wdeprecated-declarations -Wunused-but-set-variable -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar -march=native -mtune=native");
-    
+
     // check if os is linux
     // if so, add -fPIC to cxx_flags
     if cfg!(target_os = "linux") {
         cx_flags.push_str(" -pthread");
         cxx_flags.push_str(" -fPIC -pthread");
     }
-    
-    let mut cbuild = &mut cc::Build::new();
-    
-    let mut ccbuild = &mut cc::Build::new();
-    
-    for cx_flag in cx_flags.split(" ").into_iter() {
-        cbuild = cbuild.flag(cx_flag);
-    }
-    
-    for cxx_flag in cxx_flags.split(" ").into_iter() {
-        ccbuild = ccbuild.flag(cxx_flag);
-    }
-    
-
-    cbuild
-    .include("./llama.cpp")
-    .file("./llama.cpp/ggml.c")
-    .cpp(false)
-    .compile("ggml");
-
-    let out_dir = env::var("OUT_DIR").unwrap();
-    let ggml_obj = PathBuf::from(out_dir).join("llama.cpp/ggml.o");
-
-    ccbuild
-        .include("./llama.cpp/examples")
-        .include("./llama.cpp")
-        .shared_flag(true)
-        .object(ggml_obj)
-        .file("./llama.cpp/examples/common.cpp")
-        .file("./llama.cpp/llama.cpp")
-        .file("./binding.cpp")
-        .cpp(true)
-        .compile("binding");
+
+    let mut cx = cc::Build::new();
+
+    let mut cxx = cc::Build::new();
+
+    let mut ggml_type = String::new();
+
+    cxx.include("./llama.cpp/examples").include("./llama.cpp");
+
+    if cfg!(feature = "opencl") {
+        compile_opencl(&mut cx, &mut cxx);
+        ggml_type = "opencl".to_string();
+    } else if cfg!(feature = "openblas") {
+        compile_openblas(&mut cx);
+    } else if cfg!(feature = "blis") {
+        compile_blis(&mut cx);
+    }
+
+    if cfg!(feature = "cuda") {
+        cx_flags.push_str(" -DGGML_USE_CUBLAS");
+        cxx_flags.push_str(" -DGGML_USE_CUBLAS");
+
+        cx.include("/usr/local/cuda/include")
+            .include("/opt/cuda/include");
+        cxx.include("/usr/local/cuda/include")
+            .include("/opt/cuda/include");
+
+        if let Ok(cuda_path) = std::env::var("CUDA_PATH") {
+            cx.include(format!("{}/targets/x86_64-linux/include", cuda_path));
+            cxx.include(format!("{}/targets/x86_64-linux/include", cuda_path));
+        }
+
+        compile_ggml(&mut cx, &cx_flags);
+
+        compile_cuda(&cxx_flags);
+
+        compile_llama(&mut cxx, &cxx_flags, &out_path, "cuda");
+    } else {
+        compile_ggml(&mut cx, &cx_flags);
+
+        compile_llama(&mut cxx, &cxx_flags, &out_path, &ggml_type);
+    }
 }
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..6b670af
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,64 @@
+> [!IMPORTANT]  
+> This was tested with Docker running on Linux & Windows on my Linux PC with RX 6700 XT GPU & my brothers Windows PC with NVIDIA RTX 2060 SUPER. <br>If you can test it on other GPUs & Platforms, please update this `README.md` with a PR!<br>
+
+## Supported / Tested
+
+- AMD RX 6700 XT / Fedora 37
+- NVIDIA RTX 2060 Super / Windows 11 Docker
+
+# Examples
+
+There are 3 examples basic, cuda and opencl each of them have their own Dockerfile except the basic example.
+
+# basic
+
+A simple example that runs inference on the default options:
+
+```
+cargo run --release
+```
+
+# cuda
+
+A example to use nvidia GPU's with the cuda feature:
+
+firstly build the image from the root of the repository
+
+```
+docker build -f examples/cuda/Dockerfile . -t llama_cuda
+```
+
+then you can run it:
+
+### linux
+
+```
+docker run --device=/dev/dri:/dev/dri --volume=<your directory that contains the models>
+:/models llama_cuda
+```
+
+### windows
+
+```
+docker run --volume=<your directory that contains the models>:/models --gpus all llama_cuda
+```
+
+
+# opencl
+
+A example to run CLBlast supported GPUs:
+
+firstly build the image from the root of the repository
+
+```
+docker build -f examples/opencl/Dockerfile . -t llama_opencl
+```
+
+then you can run it:
+
+### linux
+
+```
+docker run --device=/dev/dri:/dev/dri --volume=<your directory that contains the models>
+:/models llama_opencl
+```
\ No newline at end of file
diff --git a/examples/basic/Cargo.toml b/examples/basic/Cargo.toml
new file mode 100644
index 0000000..cb00112
--- /dev/null
+++ b/examples/basic/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+authors = ["mdrokz <mohammadmunshi@gmail.com>"]
+name = "llama_basic"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+llama_cpp_rs = {path = "../../"}
\ No newline at end of file
diff --git a/examples/basic/src/main.rs b/examples/basic/src/main.rs
new file mode 100644
index 0000000..1ec4ff3
--- /dev/null
+++ b/examples/basic/src/main.rs
@@ -0,0 +1,34 @@
+use llama_cpp_rs::{
+    options::{ModelOptions, PredictOptions},
+    LLama,
+};
+
+fn main() {
+    let model_options = ModelOptions::default();
+
+    let llama = LLama::new(
+        "./<your model>.bin".into(),
+        &model_options,
+    )
+    .unwrap();
+
+    let predict_options = PredictOptions {
+        tokens: 0,
+        threads: 14,
+        top_k: 90,
+        top_p: 0.86,
+        token_callback: Some(Box::new(|token| {
+            println!("token1: {}", token);
+
+            true
+        })),
+        ..Default::default()
+    };
+
+    llama
+        .predict(
+            "what are the national animals of india".into(),
+             predict_options,
+        )
+        .unwrap();
+}
\ No newline at end of file
diff --git a/examples/cuda/Cargo.toml b/examples/cuda/Cargo.toml
new file mode 100644
index 0000000..331aa2e
--- /dev/null
+++ b/examples/cuda/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+authors = ["mdrokz <mohammadmunshi@gmail.com>"]
+name = "llama_cuda"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+llama_cpp_rs = {path = "../../", features = ["cuda"]}
\ No newline at end of file
diff --git a/examples/cuda/Dockerfile b/examples/cuda/Dockerfile
new file mode 100644
index 0000000..d18c588
--- /dev/null
+++ b/examples/cuda/Dockerfile
@@ -0,0 +1,24 @@
+FROM nvidia/cuda:12.2.0-devel-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    git \
+    cmake \
+    clang \
+    cargo \
+    nano \
+    software-properties-common
+
+
+RUN apt-get clean && \
+rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY . .
+
+RUN cd examples/cuda && \
+    cargo build --release
+
+CMD ["./examples/cuda/target/release/llama_cuda"]
\ No newline at end of file
diff --git a/examples/cuda/src/main.rs b/examples/cuda/src/main.rs
new file mode 100644
index 0000000..fbe508a
--- /dev/null
+++ b/examples/cuda/src/main.rs
@@ -0,0 +1,33 @@
+use llama_cpp_rs::{
+    options::{ModelOptions, PredictOptions},
+    LLama,
+};
+
+fn main() {
+    let model_options = ModelOptions {
+        n_gpu_layers: 12,
+        ..Default::default()
+    };
+
+    let llama = LLama::new("/models/<your model>.bin".into(), &model_options).unwrap();
+
+    let predict_options = PredictOptions {
+        tokens: 0,
+        threads: 14,
+        top_k: 90,
+        top_p: 0.86,
+        token_callback: Some(Box::new(|token| {
+            println!("token1: {}", token);
+
+            true
+        })),
+        ..Default::default()
+    };
+
+    llama
+        .predict(
+            "what are the national animals of india".into(),
+            predict_options,
+        )
+        .unwrap();
+}
diff --git a/examples/opencl/Cargo.toml b/examples/opencl/Cargo.toml
new file mode 100644
index 0000000..49e5ef0
--- /dev/null
+++ b/examples/opencl/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+authors = ["mdrokz <mohammadmunshi@gmail.com>"]
+name = "llama_opencl"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+llama_cpp_rs = {path = "../../", features = ["opencl"]}
\ No newline at end of file
diff --git a/examples/opencl/Dockerfile b/examples/opencl/Dockerfile
new file mode 100644
index 0000000..0b51118
--- /dev/null
+++ b/examples/opencl/Dockerfile
@@ -0,0 +1,29 @@
+FROM cebxan/amdgpu-opencl
+
+# install common dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    cmake \
+    clang \
+    cargo \
+    nano \
+    clinfo \
+    software-properties-common \
+    opencl-headers \ 
+    ocl-icd-libopencl1 \ 
+    ocl-icd-opencl-dev
+
+
+# install clblast
+RUN add-apt-repository ppa:cnugteren/clblast && \
+    apt-get update && \
+    apt-get install -y libclblast-dev
+
+WORKDIR /app
+
+COPY . .
+
+RUN cd examples/opencl && \
+    cargo build --release
+
+CMD ["./examples/opencl/target/release/llama_opencl"]
\ No newline at end of file
diff --git a/examples/opencl/src/main.rs b/examples/opencl/src/main.rs
new file mode 100644
index 0000000..fbe508a
--- /dev/null
+++ b/examples/opencl/src/main.rs
@@ -0,0 +1,33 @@
+use llama_cpp_rs::{
+    options::{ModelOptions, PredictOptions},
+    LLama,
+};
+
+fn main() {
+    let model_options = ModelOptions {
+        n_gpu_layers: 12,
+        ..Default::default()
+    };
+
+    let llama = LLama::new("/models/<your model>.bin".into(), &model_options).unwrap();
+
+    let predict_options = PredictOptions {
+        tokens: 0,
+        threads: 14,
+        top_k: 90,
+        top_p: 0.86,
+        token_callback: Some(Box::new(|token| {
+            println!("token1: {}", token);
+
+            true
+        })),
+        ..Default::default()
+    };
+
+    llama
+        .predict(
+            "what are the national animals of india".into(),
+            predict_options,
+        )
+        .unwrap();
+}