I am creating a django API where it takes a pdf doc and using RAG, a query is made to the doc and the output is generated via LLM. I want the output as json and I am using jsonoutputparser but I am getting an error while parsing to json. Currently, for testing purpose, I am using a query where it takes a candidate resume and generates output. Here is my code:
import os
import io
import tempfile
from typing import List
from rest_framework.decorators import api_view, parser_classes
from rest_framework.parsers import MultiPartParser, FormParser, JSONParser
from rest_framework.response import Response
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains.retrieval import create_retrieval_chain
from langchain_openai import OpenAI
from langchain.vectorstores import FAISS
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
@api_view(['POST'])
@parser_classes([MultiPartParser, FormParser, JSONParser])
def generateThroughAI(request):
uploaded_file = request.data.get("file")
file_content = uploaded_file.read()
bytesio_object = io.BytesIO(file_content)
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as temp_file:
temp_file.write(bytesio_object.getvalue())
temp_file_path = temp_file.name
loader = PyPDFLoader(file_path=temp_file_path)
documents = loader.load()
text_splitter = CharacterTextSplitter(
chunk_size=500, chunk_overlap=20, separator="\n"
)
docs = text_splitter.split_documents(documents=documents)
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)
class Output(BaseModel):
summary: str = Field(description="short summary about the candidate")
skills: List[str] = Field(description="list containing the skills of the candidate")
parser = JsonOutputParser(pydantic_object=Output)
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
combine_docs_chain = create_stuff_documents_chain(
OpenAI(), retrieval_qa_chat_prompt, output_parser=parser
)
retrieval_chain = create_retrieval_chain(
vectorstore.as_retriever(), combine_docs_chain
)
res = retrieval_chain.invoke({"input": "Provide a list of skills the candidate has along with a short summary about the candidate"})
print(res)
os.remove(temp_file_path)
return Response({"result": "done!"})
And here is the error:
Internal Server Error: /documents/generate-content/
Traceback (most recent call last):
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\output_parsers\json.py", line 66, in parse_result
return parse_json_markdown(text)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\utils\json.py", line 147, in parse_json_markdown
return _parse_json(json_str, parser=parser)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\utils\json.py", line 160, in _parse_json
return parser(json_str)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\utils\json.py", line 120, in parse_partial_json
return json.loads(s, strict=strict)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\json\__init__.py", line 359, in loads
return cls(**kw).decode(s)
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\Admin\Envs\docchat\lib\site-packages\django\core\handlers\exception.py", line 55, in inner
response = get_response(request)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\django\core\handlers\base.py", line 197, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\django\views\decorators\csrf.py", line 56, in wrapper_view
return view_func(*args, **kwargs)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\django\views\generic\base.py", line 104, in view
return self.dispatch(request, *args, **kwargs)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\rest_framework\views.py", line 509, in dispatch
response = self.handle_exception(exc)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\rest_framework\views.py", line 469, in handle_exception
self.raise_uncaught_exception(exc)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\rest_framework\views.py", line 480, in raise_uncaught_exception
raise exc
File "C:\Users\Admin\Envs\docchat\lib\site-packages\rest_framework\views.py", line 506, in dispatch
response = handler(request, *args, **kwargs)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\rest_framework\decorators.py", line 50, in handler
return func(*args, **kwargs)
File "C:\Users\Admin\Desktop\docchat\documents\views.py", line 146, in generateThroughAI
res = retrieval_chain.invoke({"input": "Provide a list of skills the candidate has along with a short summary about the candidate"})
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 4573, in invoke
return self.bound.invoke(
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 2504, in invoke
input = step.invoke(input, config)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\passthrough.py", line 469, in invoke
return self._call_with_config(self._invoke, input, config, **kwargs)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 1598, in _call_with_config
context.run(
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\config.py", line 380, in call_func_with_variable_args
return func(input, **kwargs) # type: ignore[call-arg]
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\passthrough.py", line 456, in _invoke
**self.mapper.invoke(
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 3149, in invoke
output = {key: future.result() for key, future in zip(steps, futures)}
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 3149, in <dictcomp>
output = {key: future.result() for key, future in zip(steps, futures)}
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\concurrent\futures\_base.py", line 445, in result
return self.__get_result()
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\concurrent\futures\_base.py", line 390, in __get_result
raise self._exception
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\concurrent\futures\thread.py", line 52, in run
result = self.fn(*self.args, **self.kwargs)
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 4573, in invoke
return self.bound.invoke(
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 2504, in invoke
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\output_parsers\base.py", line 178, in invoke
return self._call_with_config(
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\base.py", line 1598, in _call_with_config
context.run(
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\runnables\config.py", line 380, in call_func_with_variable_args
return func(input, **kwargs) # type: ignore[call-arg]
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\output_parsers\base.py", line 179, in <lambda>
lambda inner_input: self.parse_result([Generation(text=inner_input)]),
File "C:\Users\Admin\Envs\docchat\lib\site-packages\langchain_core\output_parsers\json.py", line 69, in parse_result
raise OutputParserException(msg, llm_output=text) from e
langchain_core.exceptions.OutputParserException: Invalid json output: Skills:
1. ReactJS - experienced in creating user-friendly web apps using ReactJS.
2. NextJS - proficient in NextJS for frontend development.
3. Redux - familiar with using Redux for state management in web applications.
4. AWS - knowledgeable in using AWS for cloud computing and storage.
5. Django - skilled in developing web applications using Django.
6. Python - proficient in Python programming language.
7. REST - experienced in building REST APIs for web applications.
8. Postgres - familiar with using Postgres as a database management system.
9. Flutter - skilled in using Flutter for cross-platform mobile app development.
10. GraphQL - familiar with using GraphQL for efficient data querying.
11. MongoDB - knowledgeable in using MongoDB for database management.
12. ExpressJS - experienced in using ExpressJS for backend development.
13. JavaScript - proficient in JavaScript programming language.
14. TypeScript - familiar with using TypeScript for frontend development.
15. Java - skilled in Java programming language.
16. Deep Learning - knowledgeable in implementing deep learning algorithms.
17. AI & ML - familiar with using AI and ML techniques for data analysis and prediction.
18. Web Design & Development - experienced in designing and developing web applications.
19. VueJS - proficient in using VueJS for
[14/Jun/2024 18:14:57] "POST /documents/generate-content/ HTTP/1.1" 500 308536
I tried using JsonOutputParser provided by langchain, but could not get the expected output.