Rag Properties
constructor(indexPath: String = "./rag-index", storeChunkText: Boolean = true, uploadMaxBytes: Long = 10 * 1024 * 1024, uploadAllowedContentTypes: List<String> = listOf(
"text/plain",
"application/pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
), sourceLoadTimeoutMillis: Long, sourceLoadAllowHosts: List<String> = emptyList(), sourceLoadAuthHeaders: Map<String, String> = emptyMap(), sourceLoadInsecureSkipTlsVerify: Boolean = false, sourceLoadCustomCaCertPath: String? = null, sourceLoadDefaultProfile: String? = null, sourceLoadProfiles: Map<String, SourceLoadProfile> = emptyMap(), statsCacheTtlMillis: Long, statsCacheMaxEntries: Int = 32, statsCacheMaxEntriesPerTenant: Int = 4, contextualRetrievalEnabled: Boolean = true, contextualIncludeDocId: Boolean = true, contextualIncludeMetadataContext: Boolean = true, contextualMaxDocumentSummaryChars: Int = 240, rerankerEnabled: Boolean = true, rerankerType: String = "heuristic", rerankerTopN: Int = 24, rerankerAlpha: Double = 0.65, rerankerApiBaseUrl: String = "https://api.cohere.com", rerankerApiKey: String? = null, rerankerModel: String = "rerank-v3.5", rerankerRequestTimeoutMillis: Long, rerankerOnnxQueryInputName: String? = null, rerankerOnnxDocumentInputName: String? = null, rerankerOnnxOutputName: String? = null, rerankerOnnxExpectedDimensions: Int = 0, rerankerOnnxExpectedInputSchema: String? = null, rerankerOnnxExpectedTokenizer: String? = null, rerankerOnnxExpectedScoreSemantics: String? = null, rerankerOnnxExpectedTokenizerVocabChecksum: String? = null, rerankerOnnxTokenizerSchemaChecksum: String? = null, rerankerOnnxModelContractChecksum: String? = null, correctiveRetrievalEnabled: Boolean = true, correctiveMinConfidence: Double = 0.08, correctiveMinResultsBeforeSkip: Int = 3, correctiveExpandedCandidateMultiplier: Int = 2, queryRewriteEnabled: Boolean = true, queryRewriterType: String = "heuristic", queryRewriteApiBaseUrl: String = "https://api.openai.com/v1", queryRewriteApiKey: String? = null, queryRewriteModel: String = "gpt-4o-mini", queryRewriteRequestTimeoutMillis: Long, hierarchicalSummariesEnabled: Boolean = true, hierarchicalMaxSectionSummaries: Int = 4, hierarchicalMaxSummaryChars: Int = 280, hierarchicalTargetChunksPerSection: Int = 3, summarizerType: String = "rule-based", summarizerApiBaseUrl: String = "https://api.openai.com/v1", summarizerApiKey: String? = null, summarizerModel: String = "gpt-4o-mini", summarizerRequestTimeoutMillis: Long, statsCacheStoreType: String = "memory", statsCacheFilePath: String? = null, statsCacheFileMaxBytes: Long = 0, statsCacheFileRotateCount: Int = 2, statsCacheFileCleanupOnStart: Boolean = false, providerHealthAutoExportPath: String? = null, providerHealthAutoExportIntervalMillis: Long = 0, providerHealthAutoExportWindowMillis: Long? = null, providerHealthAutoExportFormat: String = "json", providerHealthAutoExportRetainCount: Int = 5, providerHealthAutoExportIncludeScopeSuffix: Boolean = false, providerHealthAutoExportPushUrl: String? = null, providerHealthAutoExportPushFormat: String = "json", providerHealthAutoExportPushTimeoutMillis: Long, providerHealthAutoExportPushHeaders: Map<String, String> = emptyMap(), providerHealthAutoExportPushMaxRetries: Int = 2, providerHealthAutoExportPushRetryBackoffMillis: Long = 250, providerHealthAutoExportPushDeadLetterPath: String? = null, providerHealthAutoExportPushDeadLetterRetainCount: Int = 5, providerHealthAutoExportPushAsyncEnabled: Boolean = false, providerHealthAutoExportPushQueueCapacity: Int = 32, providerHealthAutoExportPushDropOldestOnOverflow: Boolean = true, providerHealthAutoExportPushHmacSecret: String? = null, providerHealthAutoExportPushHmacHeaderName: String = "X-Rag-Signature", providerHealthAutoExportPushTimestampHeaderName: String = "X-Rag-Timestamp", chunkerType: String = "basic", basicMaxChars: Int = 1800, basicOverlapChars: Int = 200, slidingWindowSize: Int = 240, slidingOverlap: Int = 40, adaptiveSectionRegex: String = com.ainsoft.rag.chunking.Chunkers.DEFAULT_ADAPTIVE_SECTION_REGEX, adaptiveMinChunkSize: Int = 200, adaptiveMaxChunkSize: Int = 1000, regexSplitPattern: String = "(?m)^##?", regexGroupByPattern: String? = null, embeddingDimensions: Int = 256, embeddingProvider: String = "hash", openAiApiKey: String? = null, openAiModel: String = "text-embedding-3-small", openAiBaseUrl: String = "https://api.openai.com/v1")