Parse JSON files using llama_index JSONReader (#1176)
Patch the default list of llama_index to support JSON files. This injection of JSON documents should improve the comprehension in JSON files, as there is a parsing of JSON files.
This commit is contained in:
		
							parent
							
								
									0c40cfb115
								
							
						
					
					
						commit
						23cd3fea10
					
				|  | @ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, AnyStr | |||
| from injector import inject, singleton | ||||
| from llama_index import ( | ||||
|     Document, | ||||
|     JSONReader, | ||||
|     ServiceContext, | ||||
|     StorageContext, | ||||
|     StringIterableReader, | ||||
|  | @ -27,6 +28,14 @@ from private_gpt.paths import local_data_path | |||
| if TYPE_CHECKING: | ||||
|     from llama_index.readers.base import BaseReader | ||||
| 
 | ||||
| # Patching the default file reader to support other file types | ||||
| FILE_READER_CLS = DEFAULT_FILE_READER_CLS.copy() | ||||
| FILE_READER_CLS.update( | ||||
|     { | ||||
|         ".json": JSONReader, | ||||
|     } | ||||
| ) | ||||
| 
 | ||||
| logger = logging.getLogger(__name__) | ||||
| 
 | ||||
| 
 | ||||
|  | @ -76,9 +85,13 @@ class IngestService: | |||
|     def ingest(self, file_name: str, file_data: AnyStr | Path) -> list[IngestedDoc]: | ||||
|         logger.info("Ingesting file_name=%s", file_name) | ||||
|         extension = Path(file_name).suffix | ||||
|         reader_cls = DEFAULT_FILE_READER_CLS.get(extension) | ||||
|         reader_cls = FILE_READER_CLS.get(extension) | ||||
|         documents: list[Document] | ||||
|         if reader_cls is None: | ||||
|             logger.debug( | ||||
|                 "No reader found for extension=%s, using default string reader", | ||||
|                 extension, | ||||
|             ) | ||||
|             # Read as a plain text | ||||
|             string_reader = StringIterableReader() | ||||
|             if isinstance(file_data, Path): | ||||
|  | @ -91,6 +104,7 @@ class IngestService: | |||
|             else: | ||||
|                 raise ValueError(f"Unsupported data type {type(file_data)}") | ||||
|         else: | ||||
|             logger.debug("Specific reader found for extension=%s", extension) | ||||
|             reader: BaseReader = reader_cls() | ||||
|             if isinstance(file_data, Path): | ||||
|                 # Already a path, nothing to do | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue