Parse JSON files using llama_index JSONReader (#1176)
Patch the default list of llama_index to support JSON files. This injection of JSON documents should improve the comprehension in JSON files, as there is a parsing of JSON files.
This commit is contained in:
parent
0c40cfb115
commit
23cd3fea10
|
@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, AnyStr
|
||||||
from injector import inject, singleton
|
from injector import inject, singleton
|
||||||
from llama_index import (
|
from llama_index import (
|
||||||
Document,
|
Document,
|
||||||
|
JSONReader,
|
||||||
ServiceContext,
|
ServiceContext,
|
||||||
StorageContext,
|
StorageContext,
|
||||||
StringIterableReader,
|
StringIterableReader,
|
||||||
|
@ -27,6 +28,14 @@ from private_gpt.paths import local_data_path
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from llama_index.readers.base import BaseReader
|
from llama_index.readers.base import BaseReader
|
||||||
|
|
||||||
|
# Patching the default file reader to support other file types
|
||||||
|
FILE_READER_CLS = DEFAULT_FILE_READER_CLS.copy()
|
||||||
|
FILE_READER_CLS.update(
|
||||||
|
{
|
||||||
|
".json": JSONReader,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,9 +85,13 @@ class IngestService:
|
||||||
def ingest(self, file_name: str, file_data: AnyStr | Path) -> list[IngestedDoc]:
|
def ingest(self, file_name: str, file_data: AnyStr | Path) -> list[IngestedDoc]:
|
||||||
logger.info("Ingesting file_name=%s", file_name)
|
logger.info("Ingesting file_name=%s", file_name)
|
||||||
extension = Path(file_name).suffix
|
extension = Path(file_name).suffix
|
||||||
reader_cls = DEFAULT_FILE_READER_CLS.get(extension)
|
reader_cls = FILE_READER_CLS.get(extension)
|
||||||
documents: list[Document]
|
documents: list[Document]
|
||||||
if reader_cls is None:
|
if reader_cls is None:
|
||||||
|
logger.debug(
|
||||||
|
"No reader found for extension=%s, using default string reader",
|
||||||
|
extension,
|
||||||
|
)
|
||||||
# Read as a plain text
|
# Read as a plain text
|
||||||
string_reader = StringIterableReader()
|
string_reader = StringIterableReader()
|
||||||
if isinstance(file_data, Path):
|
if isinstance(file_data, Path):
|
||||||
|
@ -91,6 +104,7 @@ class IngestService:
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported data type {type(file_data)}")
|
raise ValueError(f"Unsupported data type {type(file_data)}")
|
||||||
else:
|
else:
|
||||||
|
logger.debug("Specific reader found for extension=%s", extension)
|
||||||
reader: BaseReader = reader_cls()
|
reader: BaseReader = reader_cls()
|
||||||
if isinstance(file_data, Path):
|
if isinstance(file_data, Path):
|
||||||
# Already a path, nothing to do
|
# Already a path, nothing to do
|
||||||
|
|
Loading…
Reference in New Issue