Skip to content

Commit

Permalink
Added some javadoc (langchain4j#52)
Browse files Browse the repository at this point in the history
  • Loading branch information
langchain4j authored Jul 29, 2023
1 parent d4fca65 commit 78465c3
Show file tree
Hide file tree
Showing 25 changed files with 215 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

import java.util.Objects;

/**
* Represents an unstructured piece of text that usually corresponds to a content of a single file.
* This text could originate from various sources such as a text file, PDF, DOCX, or a web page (HTML).
* Each document may have associated metadata including its source, owner, creation date, etc.
*/
public class Document {

private final String text;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,20 @@

import java.io.InputStream;

/**
* Defines the interface for parsing an InputStream into a Document.
* Different document types require specialized parsing logic.
*/
public interface DocumentParser {

String DOCUMENT_TYPE = "document_type";

/**
* Parses an InputStream into a Document.
* The specific implementation of this method will depend on the type of the document being parsed.
*
* @param inputStream The InputStream that contains the content of the document.
* @return The parsed Document.
*/
Document parse(InputStream inputStream);
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,26 @@
import java.io.IOException;
import java.io.InputStream;

/**
* Defines the interface for a Document source.
* Documents can be loaded from various sources such as the file system, HTTP, FTP, etc.
*/
public interface DocumentSource {

/**
* Provides an InputStream to read the content of the document.
* This method can be implemented to read from various sources like a local file or a network connection.
*
* @return An InputStream from which the document content can be read.
* @throws IOException If an I/O error occurs while creating the InputStream.
*/
InputStream inputStream() throws IOException;

Metadata sourceMetadata();
/**
* Returns the metadata associated with the source of the document.
* This could include details such as the source location, date of creation, owner, etc.
*
* @return A Metadata object containing information associated with the source of the document.
*/
Metadata metadata();
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,30 @@

import static java.util.stream.Collectors.toList;

/**
* Defines the interface for splitting a document into text segments.
* This is necessary as LLMs have a limited context window, making it impossible to send the entire document at once.
* Therefore, the document should first be split into segments, and only the relevant segments should be sent to LLM.
*/
public interface DocumentSplitter {

/**
* Splits a single Document into a list of TextSegment objects.
* The metadata is typically copied from the document and enriched with segment-specific information,
* such as position in the document, page number, etc.
*
* @param document The Document to be split.
* @return A list of TextSegment objects derived from the input Document.
*/
List<TextSegment> split(Document document);

/**
* Splits a list of Documents into a list of TextSegment objects.
* This is a convenience method that calls the split method for each Document in the list.
*
* @param documents The list of Documents to be split.
* @return A list of TextSegment objects derived from the input Documents.
*/
default List<TextSegment> splitAll(List<Document> documents) {
return documents.stream()
.flatMap(document -> split(document).stream())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
import java.util.Map;
import java.util.Objects;

/**
* Represents metadata of a Document or a TextSegment.
* The metadata is stored in a key-value map, where both keys and values are strings.
* For a Document, the metadata could include information such as the source, creation date,
* owner, or any other relevant details.
* For a TextSegment, in addition to metadata copied from a document, it can also include segment-specific information,
* such as the page number, the position of the segment within the document, chapter, etc.
*/
public class Metadata {

private final Map<String, String> metadata;
Expand All @@ -23,8 +31,8 @@ public String get(String key) {
return metadata.get(key);
}

public Metadata add(String key, String value) {
this.metadata.put(key, value);
public Metadata add(String key, Object value) {
this.metadata.put(key, value.toString());
return this;
}

Expand Down Expand Up @@ -55,4 +63,12 @@ public String toString() {
" metadata = " + metadata +
" }";
}

public static Metadata from(String key, Object value) {
return new Metadata().add(key, value);
}

public static Metadata metadata(String key, Object value) {
return from(key, value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
import java.util.Arrays;
import java.util.List;

/**
* Represents a dense vector embedding of a text.
* This class encapsulates a float array that captures the "meaning" or semantic information of the text.
* Texts with similar meanings will have their vectors located close to each other in the embedding space.
* The embeddings are typically created by embedding models.
* @see dev.langchain4j.model.embedding.EmbeddingModel
*/
public class Embedding {

private final float[] vector;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

import java.util.Objects;

/**
* Represents a response message from an AI (LLM).
* The message can contain either a textual response or a request to execute a tool.
* In the case of tool execution, the response to this message should be a {@link ToolExecutionResultMessage}.
*/
public class AiMessage extends ChatMessage {

private final ToolExecutionRequest toolExecutionRequest;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

import java.util.Objects;

/**
* Represents a system message, typically defined by a developer.
* This type of message usually provides instructions regarding the AI's actions, such as its behavior or response style.
*/
public class SystemMessage extends ChatMessage {

public SystemMessage(String text) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.util.Objects;

/**
* Represents the result of a tool execution. Tool execution requests come from a previous AiMessage.
*/
public class ToolExecutionResultMessage extends ChatMessage {

private final String toolName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.util.Objects;

/**
* Represents a message from a user, typically an end user of the application.
*/
public class UserMessage extends ChatMessage {

private final String name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,31 @@

import java.util.List;

/**
* Represents the memory of a chat (chat history).
* As LLMs are inherently stateless, this interface is useful for tracking the conversation.
*/
public interface ChatMemory {

/**
* Adds a message to the chat memory.
*
* @param message The ChatMessage to add.
*/
void add(ChatMessage message);

/**
* Retrieves messages from the chat memory.
* Depending on the implementation, it may not return all previously added messages,
* but rather a subset, a summary, or a combination thereof, etc.
*
* @return A list of ChatMessage objects representing the portion of the chat memory that is currently retained.
*/
List<ChatMessage> messages();


/**
* Clears the chat memory.
*/
void clear();
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,66 @@
public interface ChatLanguageModel {

/**
* Sends a message from a user to the LLM and returns response.
* Sends a message from a user to the LLM and returns a response.
*
* @param userMessage User message as a String. Will be wrapped into {@link dev.langchain4j.data.message.UserMessage UserMessage} under the hood.
* @return {@link dev.langchain4j.data.message.AiMessage AiMessage}
* @param userMessage A user message as a String. Will be wrapped into {@link dev.langchain4j.data.message.UserMessage UserMessage} under the hood.
* @return Response from the LLM.
*/
default AiMessage sendUserMessage(String userMessage) {
return sendUserMessage(UserMessage.from(userMessage));
}

/**
* Sends a message from a user to the LLM and returns a response.
*
* @param userMessage A user message.
* @return Response from the LLM.
*/
default AiMessage sendUserMessage(UserMessage userMessage) {
return sendMessages(userMessage);
}

/**
* Sends a structured prompt as a user message to the LLM and returns response.
* Sends a structured prompt as a user message to the LLM and returns a response.
*
* @param structuredPrompt object annotated with {@link dev.langchain4j.model.input.structured.StructuredPrompt @StructuredPrompt}
* @return {@link dev.langchain4j.data.message.AiMessage AiMessage}
* @param structuredPrompt A user message as an object annotated with {@link dev.langchain4j.model.input.structured.StructuredPrompt @StructuredPrompt}. Will be converted into {@link dev.langchain4j.data.message.UserMessage UserMessage} under the hood.
* @return Response from the LLM.
*/
default AiMessage sendUserMessage(Object structuredPrompt) {
Prompt prompt = toPrompt(structuredPrompt);
return sendUserMessage(prompt.toUserMessage());
}

/**
* Sends a sequence of messages to the LLM and returns a response.
* Typically, the sequence contains messages in the following order:
* System (optional) -> User -> AI -> User -> AI -> User ...
*
* @param messages An array of messages to be sent.
* @return Response from the LLM.
*/
default AiMessage sendMessages(ChatMessage... messages) {
return sendMessages(asList(messages));
}

/**
* Sends a sequence of messages to the LLM and returns a response.
* Typically, the sequence contains messages in the following order:
* System (optional) -> User -> AI -> User -> AI -> User ...
*
* @param messages A list of messages to be sent.
* @return Response from the LLM.
*/
AiMessage sendMessages(List<ChatMessage> messages);

/**
* Sends a sequence of messages to the LLM and returns a response.
* Typically, the sequence contains messages in the following order:
* System (optional) -> User -> AI -> User -> AI -> User ...
*
* @param messages A list of messages to be sent.
* @param toolSpecifications A list of tools that the LLM is allowed to execute.
* @return Response from the LLM. AiMessage can contain either a textual response or a request to execute a tool.
*/
AiMessage sendMessages(List<ChatMessage> messages, List<ToolSpecification> toolSpecifications);
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import static dev.langchain4j.model.input.structured.StructuredPromptProcessor.toPrompt;
import static java.util.Collections.singletonList;

/**
* Represents a LLM that has a chat interface and can stream responses one token at a time.
*/
public interface StreamingChatLanguageModel {

default void sendUserMessage(String userMessage, StreamingResponseHandler handler) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
package dev.langchain4j.model.chat;

import dev.langchain4j.MightChangeInTheFuture;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.input.Prompt;

import java.util.List;
Expand All @@ -12,6 +11,10 @@
import static dev.langchain4j.model.input.structured.StructuredPromptProcessor.toPrompt;
import static java.util.Collections.singletonList;

/**
* Represents an interface for estimating the count of tokens in various text types such as a text, message, prompt, text segment, etc.
* This can be useful when it's necessary to know in advance the cost of processing a specified text by the LLM.
*/
public interface TokenCountEstimator {

default int estimateTokenCount(String text) {
Expand All @@ -22,19 +25,17 @@ default int estimateTokenCount(UserMessage userMessage) {
return estimateTokenCount(singletonList(userMessage));
}

@MightChangeInTheFuture("not sure this method is useful/needed")
default int estimateTokenCount(Prompt prompt) {
return estimateTokenCount(prompt.text());
}

@MightChangeInTheFuture("not sure this method is useful/needed")
default int estimateTokenCount(Object structuredPrompt) {
return estimateTokenCount(toPrompt(structuredPrompt));
}

int estimateTokenCount(List<ChatMessage> messages);

default int estimateTokenCount(TextSegment textSegment) {
return estimateTokenCount(textSegment.text());
}

int estimateTokenCount(List<ChatMessage> messages);
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

import static java.util.Collections.singletonList;

/**
* Represents a LLM that generates an embedding for a given text.
*/
public interface EmbeddingModel {

default Embedding embed(String text) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

import java.util.List;

/**
* Represents an interface for estimating the count of tokens in various texts, text segments, etc.
* This can be useful when it's necessary to know in advance the cost of processing a specified text by the LLM.
*/
public interface TokenCountEstimator {

int estimateTokenCount(String text);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
import static dev.langchain4j.data.message.SystemMessage.systemMessage;
import static dev.langchain4j.data.message.UserMessage.userMessage;

/**
* Represents a prompt (an input text sent to the LLM).
* A prompt usually contains instructions, contextual information, end-user input, etc.
* A Prompt is typically created by applying one or multiple values to a PromptTemplate.
*/
public class Prompt {

private final String text;
Expand Down
Loading

0 comments on commit 78465c3

Please sign in to comment.