Spaces:
Paused
Paused
| #! /usr/bin/env python | |
| import json | |
| from collections import Counter | |
| from argparse import ArgumentParser | |
| import os | |
| parser = ArgumentParser() | |
| parser.add_argument("--path", type=str, required=True) | |
| args = parser.parse_args() | |
| with open(args.path) as f: | |
| data = json.load(f) | |
| train_examples = [] | |
| err_count = 0 | |
| for setting in data: | |
| api_desc = [setting["NLDocumentation"]] | |
| for instance in setting["Instances"]: | |
| try: | |
| conv = [{ | |
| "role": "user", | |
| "content": instance['input'], | |
| }] | |
| for step in instance['intermediate_steps']: | |
| tool_name, params, react = step[0] | |
| step_thought = react.split("Action:")[0].strip() | |
| observation = step[1] | |
| conv.append({ | |
| "role": "assistant", | |
| "content": step_thought, | |
| }) | |
| conv.append({ | |
| "role": "tool", | |
| "name": tool_name, | |
| "parameters": json.loads(params), | |
| "observation": observation, | |
| }) | |
| conv.append({ | |
| "role": "assistant", | |
| "content": instance['Final Thought'] + "\n" + instance['output'], | |
| }) | |
| except: | |
| err_count += 1 | |
| else: | |
| train_examples.append({ | |
| "tools": api_desc, | |
| "conversations": conv | |
| }) | |
| print("err_count:", err_count) | |
| print("train_examples:", len(train_examples)) | |
| print("conversation distribution:", Counter([len(e["conversations"]) for e in train_examples])) | |
| os.makedirs("formatted_data", exist_ok=True) | |
| with open("formatted_data/tool_alpaca.jsonl", "w") as f: | |
| for e in train_examples: | |
| f.write(json.dumps(e, ensure_ascii=False) + "\n") |