vectorize.py 845 B

123456789101112131415161718192021222324
  1. """
  2. Vectorize your local project
  3. """
  4. import argparse
  5. from utils.data import traverse
  6. from utils.vector import vectorize
  7. def parse_arguments():
  8. parser = argparse.ArgumentParser()
  9. parser.add_argument('--workspace', type=str, help="directory of the workspace to be vectorized", default='.')
  10. parser.add_argument('--chunk_size', type=int, help="chunk size when splitting", default=512)
  11. parser.add_argument('--overlap_size', type=int, help="chunk overlap when splitting", default=32)
  12. parser.add_argument('--batch_size', type=int, help="embedding batch size", default=16)
  13. parser.add_argument('--output_path', type=str, help="path to save the vectors", default='vectors')
  14. return parser.parse_args()
  15. if __name__ == '__main__':
  16. args = parse_arguments()
  17. files = traverse(args.workspace)
  18. vectorize(files, args)