import argparse import json import tqdm def format_example(example: dict) -> dict: context = f"Instruction: {example['instruction']}\n" if example.get("input"): context += f"Input: {example['input']}\n" context += "Answer: " target = example["output"] return {"context": context, "target": target} def main(): parser = argparse.ArgumentParser() parser.add_argument("--data_path", type=str, default="data/alpaca_data.json") parser.add_argument("--save_path", type=str, default="data/alpaca_data.jsonl") args = parser.parse_args() print("args:", args) with open(args.data_path) as f: examples = json.load(f) with open(args.save_path, 'w') as f: for example in tqdm.tqdm(examples, desc="formatting.."): f.write(json.dumps(format_example(example), ensure_ascii=False) + '\n') if __name__ == "__main__": main()