How to use script nodes
You can add a special node known as a Script Node to add a Python script to modify the output of the incoming nodes. It is a useful feature when you need to perform an action programatically.
Script Nodes will require being whitelisted for internet access. Here is a list of libraries pre-installed in Script Nodes.
Pillow==10.3.0
pydub==0.25.1
pypdf==4.2.0
moviepy==1.0.3
nltk==3.8.1
pandas==2.2.2
librosa==0.10.1
spacy==3.7.4
shap==0.45.0
img2pdf==0.5.1
plotly==5.20.0
opencv-python==4.9.0.80
statsmodels==0.14.2
lxml==5.2.1
python-dateutil==2.9.0.post0
scikit-learn==1.4.2
matplotlib==3.8.4
graphviz==0.20.3
Here is an example to help you get started.
import os
os.environ["TEAM_API_KEY"] = "YOUR-TEAM-API-KEY"
Intermediate Representations
Here is an example of how the input/output to a node looks in the aixplain pipeline to help you understand how the script node should look.
{
"index": 0, # index in the list
"success": True, # whether the segment was processed successfully or not
"input_type": "audio", # input type
"is_url": true, # is the data stored in a URL?
"details": {}, # detail information
"input_segment_info": [], # Information on how the input to the segment looks like
"attributes": {
"data": "NODE_IO_DATA_HERE",
} # output parameters of the node
}
Here is how you can format the output of the Google Search Node into that structure.
from aixplain.factories import ModelFactory
# Google Search
model = ModelFactory.get("65c51c556eb563350f6e1bb1")
response = model.run("Who wrote Sargento Getúlio?")
response
You can feed script nodes with this information in the following format:
context_data = [{
"index": 0,
"success": True,
"input_type": "text",
"is_url": False,
"details": [
{'score': 0, 'document': ''},
{
'score': 1,
'data': 'Sergeant Getulio. Theatrical release poster. Directed by, Hermanno Penna. Written by, Flávio Porto Hermanno Penna. Based on, Sargento Getúlio by João Ubaldo ...',
'document': 'https://en.wikipedia.org/wiki/Sergeant_Getulio'
},
{
'score': 2,
'data': 'Amazon.com: Sargento Getulio: 9783518394359: ribeiro-jo-o-ubaldo: Books. ... just a fantastic read, an Author worth following...next time in Portuguise ...',
'document': 'https://www.amazon.com/Sargento-Getulio-ribeiro-jo-ubaldo/dp/3518394355'
},
{
'score': 3,
'data': 'Amazon.com: Sargento Getúlio: 9783803127068: Ribeiro, Joao Ubaldo: Books.',
'document': 'https://www.amazon.com/Sargento-Get%C3%BAlio/dp/3803127068'
},
{
'score': 4,
'data': 'Top cast13 · Director. Hermanno Penna · Writers · Hermanno Penna · Flávio Porto · João Ubaldo Ribeiro · All cast & crew · Production, box office & more at IMDbPro ...',
'document': 'https://www.imdb.com/title/tt0130995/'
},
{
'score': 5,
'data': 'João Ubaldo Ribeiro (January 23, 1941 – July 18, 2014) was a Brazilian writer, journalist, screenwriter and professor. Several of his books and short ...',
'document': 'https://en.wikipedia.org/wiki/Jo%C3%A3o_Ubaldo_Ribeiro'
},
{
'score': 6,
'data': 'Sargento Getúlio – Edição especial de 50 anos João Ubaldo Ribeiro in Portuguese ; Author. João Ubaldo Ribeiro ; Book Title. Sargento Getúlio – Edição especial de ...',
'document': 'https://www.ebay.com/itm/394707990573'
},
{
'score': 7,
'data': 'Sargento Getúlio by Ribeiro, João Ubaldo and a great selection of related books, art and collectibles available now at AbeBooks.com.',
'document': 'https://www.abebooks.com/book-search/title/sargento-getulio/first-edition/'
},
{
'score': 8,
'data': 'Sergeant Getúlio by João Ubaldo Ribeiro. Sergeant Getúlio. Published January 1st 1984 by Avon Books ; Sargento Getúlio by João Ubaldo Ribeiro. Sargento Getúlio.',
'document': 'https://www.goodreads.com/work/editions/300244-sargento-get-lio'
},
{
'score': 9,
'data': "Details. Bookseller: Turtle Creek Books CA (CA); Bookseller's Inventory #: 095188; Title: Sargento Getulio; Author: Ribeiro, Joao Ubaldo; Format/Binding ...",
'document': 'https://www.biblio.com/book/sargento-getulio-ribeiro-joao-ubaldo/d/781088222?srsltid=AfmBOoo_aOde6ULsSIW0SF7PnNYssTZVRLP431mX1nmnIHqeRxP8f6OB'
}
],
"input_segment_info": [],
"attributes": {"data": "Sergeant Getulio. Theatrical release poster. Directed by, Hermanno Penna. Written by, Flávio Porto Hermanno Penna. Based on, Sargento Getúlio by João Ubaldo ..."},
}]
The script node would also receive the query as input in the following format:
question_data = [{
"index": 0,
"success": True,
"input_type": "text",
"is_url": False,
"details": {},
"input_segment_info": [],
"attributes": {"input": "Who is the author of 'Sargento Getúlio'"},
}]
Developing the Script Node
The script takes JSON input fields (e.g., question_data
and context_data
) and requires an additional output_file
argument to save the output as JSON. If the details
field in the model is a list, it will appear inside a field called rows
.
Now, let's look at the script structure.
script = """import argparse
import json
import requests
PROMPT = \"\"\"Based on the context, answer the question.
Context:
<<CONTEXT>>
Question:
<<QUESTION>>
Answer:\"\"\"
def main(question_data, context_data, output_file):
# load the input JSON
with open(question_data) as f:
question_data = json.load(f)
with open(context_data) as f:
context_data = json.load(f)
# prepare question
try:
question = question_data[0]["attributes"]["data"]
except Exception:
question = question_data[0]["attributes"]["input"]
if question_data[0]["is_url"] is True:
question = requests.get(question).text
# prepare context
context = "\\n".join([f"Document {i+1}: {d['data']}" for i, d in enumerate(context_data[0]["details"]["rows"][1:])])
# prepare prompt
prompt = PROMPT.replace("<<QUESTION>>", question).replace("<<CONTEXT>>", context)
# prepare response
output_response = [{
"index": 0,
"success": True,
"input_type": "text",
"is_url": False,
"details": {},
"input_segment_info": [],
"attributes": { "data": prompt },
}]
# save response in output_file
with open(output_file, "w") as f:
json.dump(output_response, f)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--question_data", type=str, required=True)
parser.add_argument("--context_data", type=str, required=True)
parser.add_argument("--output_file", type=str, required=True)
args = parser.parse_args()
question_data = args.question_data
context_data = args.context_data
output_file = args.output_file
main(question_data, context_data, output_file)"""
with open("script.py", "w") as f:
f.write(script)
Let's test our script now.
import subprocess
import json
with open("question_data.json", "w") as f:
json.dump(question_data, f, indent=4)
# add details inside rows
for i, row in enumerate(context_data):
context_data[i]["details"] = {
"rows": context_data[i]["details"]
}
with open("context_data.json", "w") as f:
json.dump(context_data, f, indent=4)
cmd = ["python3", "script.py", "--question_data", "question_data.json", "--context_data", "context_data.json", "--output_file", "output.json"]
result = subprocess.run(cmd, capture_output=True)
Here is what the output file looks like.
with open("output.json") as f:
output = json.load(f)
output
Pipeline
For more information on how to build pipeline, please refer to this guide.
from aixplain.enums import DataType
from aixplain.factories import PipelineFactory
from aixplain.modules import Pipeline
# Initializing Pipeline
pipeline = PipelineFactory.init(name="RAG Pipeline For Google")
# Creating Input Node
question_input = pipeline.input()
question_input.label = "QuestionInput"
# Creating Search Node
GOOGLE_SERP_ASSET = "65c51c556eb563350f6e1bb1"
search_node = pipeline.search(asset_id=GOOGLE_SERP_ASSET)
# Creating Script Node
script = pipeline.script(script_path="script.py")
script.label = "ContextGeneratorScript"
script.inputs.create_param(code="question_data", data_type=DataType.TEXT)
script.inputs.create_param(code="context_data", data_type=DataType.TEXT)
script.outputs.create_param(code="data", data_type=DataType.TEXT)
# Creating LLM Node
OPENAI_GPT4O_MINI_ASSET = "669a63646eb56306647e1091"
llm_node = pipeline.text_generation(asset_id=OPENAI_GPT4O_MINI_ASSET)
# Question Input -> Search
question_input.outputs.input.link(search_node.inputs.text)
# Question Input -> Script
question_input.outputs.input.link(script.inputs.question_data)
# Search -> Script
search_node.outputs.data.link(script.inputs.context_data)
# Script -> LLM
script.outputs.data.link(llm_node.inputs.text)
# LLM -> Output
llm_node.use_output("data")
pipeline.save(save_as_asset=True)
Let's run the pipeline now.
response = pipeline.run("Who is the author of 'Sargento Getulio'?", **{ "version": "2.0" })
response
import requests
data = response["data"][0]["segments"][0]["response"]
print(requests.get(data).text)
Optionally, you can then delete the pipeline.
pipeline.delete()