Parse JSON files using llama_index JSONReader (#1176)
Patch the default list of llama_index to support JSON files. This injection of JSON documents should improve the comprehension in JSON files, as there is a parsing of JSON files.
This commit is contained in:
		
							parent
							
								
									0c40cfb115
								
							
						
					
					
						commit
						23cd3fea10
					
				|  | @ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, AnyStr | ||||||
| from injector import inject, singleton | from injector import inject, singleton | ||||||
| from llama_index import ( | from llama_index import ( | ||||||
|     Document, |     Document, | ||||||
|  |     JSONReader, | ||||||
|     ServiceContext, |     ServiceContext, | ||||||
|     StorageContext, |     StorageContext, | ||||||
|     StringIterableReader, |     StringIterableReader, | ||||||
|  | @ -27,6 +28,14 @@ from private_gpt.paths import local_data_path | ||||||
| if TYPE_CHECKING: | if TYPE_CHECKING: | ||||||
|     from llama_index.readers.base import BaseReader |     from llama_index.readers.base import BaseReader | ||||||
| 
 | 
 | ||||||
|  | # Patching the default file reader to support other file types | ||||||
|  | FILE_READER_CLS = DEFAULT_FILE_READER_CLS.copy() | ||||||
|  | FILE_READER_CLS.update( | ||||||
|  |     { | ||||||
|  |         ".json": JSONReader, | ||||||
|  |     } | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -76,9 +85,13 @@ class IngestService: | ||||||
|     def ingest(self, file_name: str, file_data: AnyStr | Path) -> list[IngestedDoc]: |     def ingest(self, file_name: str, file_data: AnyStr | Path) -> list[IngestedDoc]: | ||||||
|         logger.info("Ingesting file_name=%s", file_name) |         logger.info("Ingesting file_name=%s", file_name) | ||||||
|         extension = Path(file_name).suffix |         extension = Path(file_name).suffix | ||||||
|         reader_cls = DEFAULT_FILE_READER_CLS.get(extension) |         reader_cls = FILE_READER_CLS.get(extension) | ||||||
|         documents: list[Document] |         documents: list[Document] | ||||||
|         if reader_cls is None: |         if reader_cls is None: | ||||||
|  |             logger.debug( | ||||||
|  |                 "No reader found for extension=%s, using default string reader", | ||||||
|  |                 extension, | ||||||
|  |             ) | ||||||
|             # Read as a plain text |             # Read as a plain text | ||||||
|             string_reader = StringIterableReader() |             string_reader = StringIterableReader() | ||||||
|             if isinstance(file_data, Path): |             if isinstance(file_data, Path): | ||||||
|  | @ -91,6 +104,7 @@ class IngestService: | ||||||
|             else: |             else: | ||||||
|                 raise ValueError(f"Unsupported data type {type(file_data)}") |                 raise ValueError(f"Unsupported data type {type(file_data)}") | ||||||
|         else: |         else: | ||||||
|  |             logger.debug("Specific reader found for extension=%s", extension) | ||||||
|             reader: BaseReader = reader_cls() |             reader: BaseReader = reader_cls() | ||||||
|             if isinstance(file_data, Path): |             if isinstance(file_data, Path): | ||||||
|                 # Already a path, nothing to do |                 # Already a path, nothing to do | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue