@@ -39,40 +39,11 @@ vllm serve qwen/Qwen1.5-0.5B-Chat --port 8001
39
39
40
40
- Use the script: < gh-file:examples/online_serving/retrieval_augmented_generation_with_langchain.py >
41
41
42
- ``` python
43
- python retrieval_augmented_generation_with_langchain.py -- help
44
- usage: retrieval_augmented_generation_with_langchain.py [- h] [-- vllm- api- key VLLM_API_KEY ]
45
- [-- vllm- embedding- endpoint VLLM_EMBEDDING_ENDPOINT ]
46
- [-- vllm- chat- endpoint VLLM_CHAT_ENDPOINT ]
47
- [-- uri URI ] [-- url URL ]
48
- [-- embedding- model EMBEDDING_MODEL ]
49
- [-- chat- model CHAT_MODEL ] [- i] [- k TOP_K ]
50
- [- c CHUNK_SIZE ] [- o CHUNK_OVERLAP ]
51
-
52
- RAG Demo with vLLM and langchain
53
-
54
- options:
55
- - h, -- help show this help message and exit
56
- -- vllm- api- key VLLM_API_KEY
57
- API key for vLLM compatible services
58
- -- vllm- embedding- endpoint VLLM_EMBEDDING_ENDPOINT
59
- Base URL for embedding service
60
- -- vllm- chat- endpoint VLLM_CHAT_ENDPOINT
61
- Base URL for chat service
62
- -- uri URI URI for Milvus database
63
- -- url URL URL of the document to process
64
- -- embedding- model EMBEDDING_MODEL
65
- Model name for embeddings
66
- -- chat- model CHAT_MODEL
67
- Model name for chat
68
- - i, -- interactive Enable interactive Q& A mode
69
- - k TOP_K , -- top- k TOP_K
70
- Number of top results to retrieve
71
- - c CHUNK_SIZE , -- chunk- size CHUNK_SIZE
72
- Chunk size for document splitting
73
- - o CHUNK_OVERLAP , -- chunk- overlap CHUNK_OVERLAP
74
- Chunk overlap for document splitting
75
- ```
42
+ :::{argparse}
43
+ :module: examples.online_serving.retrieval_augmented_generation_with_langchain
44
+ :func: get_parser
45
+ :prog: retrieval_augmented_generation_with_langchain.py
46
+ :::
76
47
77
48
- Run the script
78
49
@@ -112,34 +83,11 @@ vllm serve qwen/Qwen1.5-0.5B-Chat --port 8001
112
83
113
84
- Use the script: < gh-file:examples/online_serving/retrieval_augmented_generation_with_llamaindex.py >
114
85
115
- ```python
116
- python retrieval_augmented_generation_with_llamaindex.py -- help
117
- usage: retrieval_augmented_generation_with_llamaindex.py [- h] [-- url URL ]
118
- [-- embedding- model EMBEDDING_MODEL ]
119
- [-- chat- model CHAT_MODEL ]
120
- [-- vllm- api- key VLLM_API_KEY ]
121
- [-- embedding- endpoint EMBEDDING_ENDPOINT ]
122
- [-- chat- endpoint CHAT_ENDPOINT ]
123
- [-- db- path DB_PATH ] [- i]
124
-
125
- RAG with vLLM and LlamaIndex
126
-
127
- options:
128
- - h, -- help show this help message and exit
129
- -- url URL URL of the document to process
130
- -- embedding- model EMBEDDING_MODEL
131
- Model name for embeddings
132
- -- chat- model CHAT_MODEL
133
- Model name for chat
134
- -- vllm- api- key VLLM_API_KEY
135
- API key for vLLM compatible services
136
- -- embedding- endpoint EMBEDDING_ENDPOINT
137
- Base URL for embedding service
138
- -- chat- endpoint CHAT_ENDPOINT
139
- Base URL for chat service
140
- -- db- path DB_PATH Path to Milvus database
141
- - i, -- interactive Enable interactive Q& A mode
142
- ```
86
+ :::{argparse}
87
+ :module: examples.online_serving.retrieval_augmented_generation_with_llamaindex
88
+ :func: get_parser
89
+ :prog: retrieval_augmented_generation_with_llamaindex.py
90
+ :::
143
91
144
92
- Run the script
145
93
0 commit comments